diff --git a/doc/src/Eqs/cnp_cutoff.jpg b/doc/src/Eqs/cnp_cutoff.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fae5c6b636c0f818f8677fd7fce4f402cb03a95d Binary files /dev/null and b/doc/src/Eqs/cnp_cutoff.jpg differ diff --git a/doc/src/Eqs/cnp_cutoff.tex b/doc/src/Eqs/cnp_cutoff.tex new file mode 100644 index 0000000000000000000000000000000000000000..f74166e6087e12b931aa90f1431e214f5683a2c3 --- /dev/null +++ b/doc/src/Eqs/cnp_cutoff.tex @@ -0,0 +1,14 @@ +\documentclass[12pt,article]{article} + +\usepackage{indentfirst} +\usepackage{amsmath} + +\begin{document} + +\begin{eqnarray*} + r_{c}^{fcc} & = & \frac{1}{2} \left(\frac{\sqrt{2}}{2} + 1\right) \mathrm{a} \simeq 0.8536 \:\mathrm{a} \\ + r_{c}^{bcc} & = & \frac{1}{2}(\sqrt{2} + 1) \mathrm{a} \simeq 1.207 \:\mathrm{a} \\ + r_{c}^{hcp} & = & \frac{1}{2}\left(1+\sqrt{\frac{4+2x^{2}}{3}}\right) \mathrm{a} +\end{eqnarray*} + +\end{document} diff --git a/doc/src/Eqs/cnp_cutoff2.jpg b/doc/src/Eqs/cnp_cutoff2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..744b61e9b45fd5401a745ab968d9faa607a88b19 Binary files /dev/null and b/doc/src/Eqs/cnp_cutoff2.jpg differ diff --git a/doc/src/Eqs/cnp_cutoff2.tex b/doc/src/Eqs/cnp_cutoff2.tex new file mode 100644 index 0000000000000000000000000000000000000000..fcec31fd2404d4f76ce6df45b27e4f24cd6c6d62 --- /dev/null +++ b/doc/src/Eqs/cnp_cutoff2.tex @@ -0,0 +1,12 @@ +\documentclass[12pt,article]{article} + +\usepackage{indentfirst} +\usepackage{amsmath} + +\begin{document} + +$$ + Rc + Rs > 2*{\rm cutoff} +$$ + +\end{document} diff --git a/doc/src/Eqs/cnp_eq.jpg b/doc/src/Eqs/cnp_eq.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d4213144424331690936b0fd96014ba8ea8eaca0 Binary files /dev/null and b/doc/src/Eqs/cnp_eq.jpg differ diff --git a/doc/src/Eqs/cnp_eq.tex b/doc/src/Eqs/cnp_eq.tex new file mode 100644 index 0000000000000000000000000000000000000000..e5f157e6ba858a09c74e0be9d191c33fbededd09 --- /dev/null +++ b/doc/src/Eqs/cnp_eq.tex @@ -0,0 +1,9 @@ +\documentclass[12pt]{article} + +\begin{document} + +$$ + Q_{i} = \frac{1}{n_i}\sum_{j = 1}^{n_i} | \sum_{k = 1}^{n_{ij}} \vec{R}_{ik} + \vec{R}_{jk} |^2 +$$ + +\end{document} diff --git a/doc/src/Eqs/pair_lj_sf.jpg b/doc/src/Eqs/pair_lj_sf.jpg deleted file mode 100644 index a702240003cd4e1bd5043fc5e6a523998fadf8de..0000000000000000000000000000000000000000 Binary files a/doc/src/Eqs/pair_lj_sf.jpg and /dev/null differ diff --git a/doc/src/Eqs/pair_lj_sf.tex b/doc/src/Eqs/pair_lj_sf.tex deleted file mode 100644 index e78e2ca75f17c4fa0467be2b08d27748069e6782..0000000000000000000000000000000000000000 --- a/doc/src/Eqs/pair_lj_sf.tex +++ /dev/null @@ -1,11 +0,0 @@ -\documentclass[12pt]{article} - -\begin{document} - -\begin{eqnarray*} - F & = & F_{\mathrm{LJ}}(r) - F_{\mathrm{LJ}}(r_{\mathrm{c}}) \qquad r < r_{\mathrm{c}} \\ - E & = & E_{\mathrm{LJ}}(r) - E_{\mathrm{LJ}}(r_{\mathrm{c}}) + (r - r_{\mathrm{c}}) F_{\mathrm{LJ}}(r_{\mathrm{c}}) \qquad r < r_{\mathrm{c}} \\ - \mathrm{with} \qquad E_{\mathrm{LJ}}(r) & = & 4 \epsilon \left[ \left(\frac{\sigma}{r}\right)^{12} - \left(\frac{\sigma}{r}\right)^6 \right] \qquad \mathrm{and} \qquad F_{\mathrm{LJ}}(r) = - E^\prime_{\mathrm{LJ}}(r) -\end{eqnarray*} - -\end{document} diff --git a/doc/src/JPG/user_intel.png b/doc/src/JPG/user_intel.png index 0ebb2d1ae08cdd8ddd0d150f29d0da7b12e5520d..302b50124a0429d0f64df1a9979a5265051f8112 100755 Binary files a/doc/src/JPG/user_intel.png and b/doc/src/JPG/user_intel.png differ diff --git a/doc/src/Manual.txt b/doc/src/Manual.txt index dd24f8465a0af4ebd2032db6fd91e6347dcc9eb0..e6d44733e2544534b9f29a6cdfc9b61500687ccb 100644 --- a/doc/src/Manual.txt +++ b/doc/src/Manual.txt @@ -1,7 +1,7 @@ <!-- HTML_ONLY --> <HEAD> <TITLE>LAMMPS Users Manual</TITLE> -<META NAME="docnumber" CONTENT="19 May 2017 version"> +<META NAME="docnumber" CONTENT="20 Jun 2017 version"> <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories"> <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License."> </HEAD> @@ -21,7 +21,7 @@ <H1></H1> LAMMPS Documentation :c,h3 -19 May 2017 version :c,h4 +20 Jun 2017 version :c,h4 Version info: :h4 diff --git a/doc/src/Section_commands.txt b/doc/src/Section_commands.txt index dc7ddebe58dcbf6a8b02b17983db5946459534ef..bcffc30549db4198f907cfbd88700dd0d63449ea 100644 --- a/doc/src/Section_commands.txt +++ b/doc/src/Section_commands.txt @@ -717,7 +717,7 @@ package"_Section_start.html#start_3. "phonon"_fix_phonon.html, "pimd"_fix_pimd.html, "qbmsst"_fix_qbmsst.html, -"qeq/reax"_fix_qeq_reax.html, +"qeq/reax (ko)"_fix_qeq_reax.html, "qmmm"_fix_qmmm.html, "qtb"_fix_qtb.html, "reax/c/bonds"_fix_reax_bonds.html, @@ -831,6 +831,7 @@ package"_Section_start.html#start_3. "ackland/atom"_compute_ackland_atom.html, "basal/atom"_compute_basal_atom.html, +"cnp/atom"_compute_cnp_atom.html, "dpd"_compute_dpd.html, "dpd/atom"_compute_dpd_atom.html, "fep"_compute_fep.html, @@ -963,7 +964,7 @@ KOKKOS, o = USER-OMP, t = OPT. "lj/expand (gko)"_pair_lj_expand.html, "lj/gromacs (gko)"_pair_gromacs.html, "lj/gromacs/coul/gromacs (ko)"_pair_gromacs.html, -"lj/long/coul/long (o)"_pair_lj_long.html, +"lj/long/coul/long (io)"_pair_lj_long.html, "lj/long/dipole/long"_pair_dipole.html, "lj/long/tip4p/long"_pair_lj_long.html, "lj/smooth (o)"_pair_lj_smooth.html, @@ -1038,7 +1039,6 @@ package"_Section_start.html#start_3. "lj/sdk (gko)"_pair_sdk.html, "lj/sdk/coul/long (go)"_pair_sdk.html, "lj/sdk/coul/msm (o)"_pair_sdk.html, -"lj/sf (o)"_pair_lj_sf.html, "meam/spline (o)"_pair_meam_spline.html, "meam/sw/spline"_pair_meam_sw_spline.html, "mgpt"_pair_mgpt.html, @@ -1057,7 +1057,7 @@ package"_Section_start.html#start_3. "oxdna2/excv"_pair_oxdna2.html, "oxdna2/stk"_pair_oxdna2.html, "quip"_pair_quip.html, -"reax/c (k)"_pair_reaxc.html, +"reax/c (ko)"_pair_reaxc.html, "smd/hertz"_pair_smd_hertz.html, "smd/tlsph"_pair_smd_tlsph.html, "smd/triangulated/surface"_pair_smd_triangulated_surface.html, @@ -1225,7 +1225,7 @@ USER-OMP, t = OPT. "msm/cg (o)"_kspace_style.html, "pppm (go)"_kspace_style.html, "pppm/cg (o)"_kspace_style.html, -"pppm/disp"_kspace_style.html, +"pppm/disp (i)"_kspace_style.html, "pppm/disp/tip4p"_kspace_style.html, "pppm/stagger"_kspace_style.html, "pppm/tip4p (o)"_kspace_style.html :tb(c=4,ea=c) diff --git a/doc/src/Section_errors.txt b/doc/src/Section_errors.txt index 5e0574b390ff4b3da9c13a98ce7636ad349ffa1f..23942a75e55950cc36db03116aed3f045e069f19 100644 --- a/doc/src/Section_errors.txt +++ b/doc/src/Section_errors.txt @@ -8890,6 +8890,14 @@ This is a requirement to use this potential. :dd See the newton command. This is a restriction to use this potential. :dd +{Pair style vashishta/gpu requires atom IDs} :dt + +This is a requirement to use this potential. :dd + +{Pair style vashishta/gpu requires newton pair off} :dt + +See the newton command. This is a restriction to use this potential. :dd + {Pair style tersoff/gpu requires atom IDs} :dt This is a requirement to use the tersoff/gpu potential. :dd diff --git a/doc/src/Section_packages.txt b/doc/src/Section_packages.txt index cc44c0590608ded468e9ceb1fe7b03dcad8dff0f..14b2c0baa32e9387974718fc6b6d7043589ce996 100644 --- a/doc/src/Section_packages.txt +++ b/doc/src/Section_packages.txt @@ -1502,7 +1502,7 @@ oxDNA model of Doye, Louis and Ouldridge at the University of Oxford. This includes Langevin-type rigid-body integrators with improved stability. -[Author:] Oliver Henrich (University of Edinburgh). +[Author:] Oliver Henrich (University of Strathclyde, Glasgow). [Install or un-install:] @@ -2027,8 +2027,8 @@ algorithm to formulate single-particle constraint functions g(xi,yi,zi) = 0 and their derivative (i.e. the normal of the manifold) n = grad(g). -[Author:] Stefan Paquay (Eindhoven University of Technology (TU/e), The -Netherlands) +[Author:] Stefan Paquay (until 2017: Eindhoven University of Technology (TU/e), The +Netherlands; since 2017: Brandeis University, Waltham, MA, USA) [Install or un-install:] diff --git a/doc/src/accelerate_intel.txt b/doc/src/accelerate_intel.txt index d629828f122b4885ea368ee149db17a1ae6523f8..ed9e4ae833afeac0bbc59c105ce2ad67fea137d7 100644 --- a/doc/src/accelerate_intel.txt +++ b/doc/src/accelerate_intel.txt @@ -30,8 +30,8 @@ Dihedral Styles: charmm, harmonic, opls :l Fixes: nve, npt, nvt, nvt/sllod :l Improper Styles: cvff, harmonic :l Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne, -charmm/coul/long, lj/cut, lj/cut/coul/long, sw, tersoff :l -K-Space Styles: pppm :l +charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, sw, tersoff :l +K-Space Styles: pppm, pppm/disp :l :ule [Speed-ups to expect:] @@ -42,62 +42,88 @@ precision mode. Performance improvements are shown compared to LAMMPS {without using other acceleration packages} as these are under active development (and subject to performance changes). The measurements were performed using the input files available in -the src/USER-INTEL/TEST directory. These are scalable in size; the -results given are with 512K particles (524K for Liquid Crystal). -Most of the simulations are standard LAMMPS benchmarks (indicated -by the filename extension in parenthesis) with modifications to the -run length and to add a warmup run (for use with offload -benchmarks). +the src/USER-INTEL/TEST directory with the provided run script. +These are scalable in size; the results given are with 512K +particles (524K for Liquid Crystal). Most of the simulations are +standard LAMMPS benchmarks (indicated by the filename extension in +parenthesis) with modifications to the run length and to add a +warmup run (for use with offload benchmarks). :c,image(JPG/user_intel.png) Results are speedups obtained on Intel Xeon E5-2697v4 processors (code-named Broadwell) and Intel Xeon Phi 7250 processors -(code-named Knights Landing) with "18 Jun 2016" LAMMPS built with -Intel Parallel Studio 2016 update 3. Results are with 1 MPI task +(code-named Knights Landing) with "June 2017" LAMMPS built with +Intel Parallel Studio 2017 update 2. Results are with 1 MPI task per physical core. See {src/USER-INTEL/TEST/README} for the raw simulation rates and instructions to reproduce. :line +[Accuracy and order of operations:] + +In most molecular dynamics software, parallelization parameters +(# of MPI, OpenMP, and vectorization) can change the results due +to changing the order of operations with finite-precision +calculations. The USER-INTEL package is deterministic. This means +that the results should be reproducible from run to run with the +{same} parallel configurations and when using determinstic +libraries or library settings (MPI, OpenMP, FFT). However, there +are differences in the USER-INTEL package that can change the +order of operations compared to LAMMPS without acceleration: + +Neighbor lists can be created in a different order :ulb,l +Bins used for sorting atoms can be oriented differently :l +The default stencil order for PPPM is 7. By default, LAMMPS will +calculate other PPPM parameters to fit the desired acuracy with +this order :l +The {newton} setting applies to all atoms, not just atoms shared +between MPI tasks :l +Vectorization can change the order for adding pairwise forces :l +:ule + +The precision mode (described below) used with the USER-INTEL +package can change the {accuracy} of the calculations. For the +default {mixed} precision option, calculations between pairs or +triplets of atoms are performed in single precision, intended to +be within the inherent error of MD simulations. All accumulation +is performed in double precision to prevent the error from growing +with the number of atoms in the simulation. {Single} precision +mode should not be used without appropriate validation. + +:line + [Quick Start for Experienced Users:] LAMMPS should be built with the USER-INTEL package installed. Simulations should be run with 1 MPI task per physical {core}, not {hardware thread}. -For Intel Xeon CPUs: - Edit src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi as necessary. :ulb,l -If using {kspace_style pppm} in the input script, add "neigh_modify binsize cutoff" and "kspace_modify diff ad" to the input script for better -performance. Cutoff should be roughly the neighbor list cutoff. By -default the binsize is half the neighbor list cutoff. :l -"-pk intel 0 omp 2 -sf intel" added to LAMMPS command-line :l +Set the environment variable KMP_BLOCKTIME=0 :l +"-pk intel 0 omp $t -sf intel" added to LAMMPS command-line :l +$t should be 2 for Intel Xeon CPUs and 2 or 4 for Intel Xeon Phi :l +For some of the simple 2-body potentials without long-range +electrostatics, performance and scalability can be better with +the "newton off" setting added to the input script :l +If using {kspace_style pppm} in the input script, add +"kspace_modify diff ad" for better performance :l :ule -For Intel Xeon Phi CPUs for simulations without {kspace_style -pppm} in the input script : +For Intel Xeon Phi CPUs: -Edit src/MAKE/OPTIONS/Makefile.knl as necessary. :ulb,l -Runs should be performed using MCDRAM. :l -"-pk intel 0 omp 2 -sf intel" {or} "-pk intel 0 omp 4 -sf intel" -should be added to the LAMMPS command-line. Choice for best -performance will depend on the simulation. :l +Runs should be performed using MCDRAM. :ulb,l :ule -For Intel Xeon Phi CPUs for simulations with {kspace_style -pppm} in the input script: - -Edit src/MAKE/OPTIONS/Makefile.knl as necessary. :ulb,l -Runs should be performed using MCDRAM. :l -Add "neigh_modify binsize 3" to the input script for better -performance. :l -Add "kspace_modify diff ad" to the input script for better -performance. :l -export KMP_AFFINITY=none :l -"-pk intel 0 omp 3 lrt yes -sf intel" or "-pk intel 0 omp 1 lrt yes --sf intel" added to LAMMPS command-line. Choice for best performance -will depend on the simulation. :l +For simulations using {kspace_style pppm} on Intel CPUs +supporting AVX-512: + +Add "kspace_modify diff ad" to the input script :ulb,l +The command-line option should be changed to +"-pk intel 0 omp $r lrt yes -sf intel" where $r is the number of +threads minus 1. :l +Do not use thread affinity (set KMP_AFFINITY=none) :l +The "newton off" setting may provide better scalability :l :ule For Intel Xeon Phi coprocessors (Offload): @@ -169,6 +195,10 @@ cat /proc/cpuinfo :pre [Building LAMMPS with the USER-INTEL package:] +NOTE: See the src/USER-INTEL/README file for additional flags that +might be needed for best performance on Intel server processors +code-named "Skylake". + The USER-INTEL package must be installed into the source directory: make yes-user-intel :pre @@ -322,8 +352,8 @@ follow in the input script. NOTE: The USER-INTEL package will perform better with modifications to the input script when "PPPM"_kspace_style.html is used: -"kspace_modify diff ad"_kspace_modify.html and "neigh_modify binsize -3"_neigh_modify.html should be added to the input script. +"kspace_modify diff ad"_kspace_modify.html should be added to the +input script. Long-Range Thread (LRT) mode is an option to the "package intel"_package.html command that can improve performance when using @@ -342,6 +372,10 @@ would normally perform best with "-pk intel 0 omp 4", instead use environment variable "KMP_AFFINITY=none". LRT mode is not supported when using offload. +NOTE: Changing the "newton"_newton.html setting to off can improve +performance and/or scalability for simple 2-body potentials such as +lj/cut or when using LRT mode on processors supporting AVX-512. + Not all styles are supported in the USER-INTEL package. You can mix the USER-INTEL package with styles from the "OPT"_accelerate_opt.html package or the "USER-OMP package"_accelerate_omp.html. Of course, @@ -467,7 +501,7 @@ supported. Brown, W.M., Carrillo, J.-M.Y., Mishra, B., Gavhane, N., Thakker, F.M., De Kraker, A.R., Yamada, M., Ang, J.A., Plimpton, S.J., "Optimizing Classical Molecular Dynamics in LAMMPS," in Intel Xeon Phi Processor High Performance Programming: Knights Landing Edition, J. Jeffers, J. Reinders, A. Sodani, Eds. Morgan Kaufmann. :ulb,l -Brown, W. M., Semin, A., Hebenstreit, M., Khvostov, S., Raman, K., Plimpton, S.J. Increasing Molecular Dynamics Simulation Rates with an 8-Fold Increase in Electrical Power Efficiency. 2016 International Conference for High Performance Computing. In press. :l +Brown, W. M., Semin, A., Hebenstreit, M., Khvostov, S., Raman, K., Plimpton, S.J. "Increasing Molecular Dynamics Simulation Rates with an 8-Fold Increase in Electrical Power Efficiency."_http://dl.acm.org/citation.cfm?id=3014915 2016 High Performance Computing, Networking, Storage and Analysis, SC16: International Conference (pp. 82-95). :l Brown, W.M., Carrillo, J.-M.Y., Gavhane, N., Thakkar, F.M., Plimpton, S.J. Optimizing Legacy Molecular Dynamics Software with Directive-Based Offload. Computer Physics Communications. 2015. 195: p. 95-101. :l :ule diff --git a/doc/src/compute_cna_atom.txt b/doc/src/compute_cna_atom.txt index 74240b515d2199c28d9b7e7be66cff64746123a1..23289b01325b4afac23c938788a8ef1e775ecc59 100644 --- a/doc/src/compute_cna_atom.txt +++ b/doc/src/compute_cna_atom.txt @@ -26,7 +26,7 @@ Define a computation that calculates the CNA (Common Neighbor Analysis) pattern for each atom in the group. In solid-state systems the CNA pattern is a useful measure of the local crystal structure around an atom. The CNA methodology is described in "(Faken)"_#Faken -and "(Tsuzuki)"_#Tsuzuki. +and "(Tsuzuki)"_#Tsuzuki1. Currently, there are five kinds of CNA patterns LAMMPS recognizes: @@ -93,5 +93,5 @@ above. :link(Faken) [(Faken)] Faken, Jonsson, Comput Mater Sci, 2, 279 (1994). -:link(Tsuzuki) +:link(Tsuzuki1) [(Tsuzuki)] Tsuzuki, Branicio, Rino, Comput Phys Comm, 177, 518 (2007). diff --git a/doc/src/compute_cnp_atom.txt b/doc/src/compute_cnp_atom.txt new file mode 100644 index 0000000000000000000000000000000000000000..9aa63c84de2be20dbdb5e4494427404f637e664e --- /dev/null +++ b/doc/src/compute_cnp_atom.txt @@ -0,0 +1,111 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +compute cnp/atom command :h3 + +[Syntax:] + +compute ID group-ID cnp/atom cutoff :pre + +ID, group-ID are documented in "compute"_compute.html command +cnp/atom = style name of this compute command +cutoff = cutoff distance for nearest neighbors (distance units) :ul + +[Examples:] + +compute 1 all cnp/atom 3.08 :pre + +[Description:] + +Define a computation that calculates the Common Neighborhood +Parameter (CNP) for each atom in the group. In solid-state systems +the CNP is a useful measure of the local crystal structure +around an atom and can be used to characterize whether the +atom is part of a perfect lattice, a local defect (e.g. a dislocation +or stacking fault), or at a surface. + +The value of the CNP parameter will be 0.0 for atoms not in the +specified compute group. Note that normally a CNP calculation should +only be performed on single component systems. + +This parameter is computed using the following formula from +"(Tsuzuki)"_#Tsuzuki2 + +:c,image(Eqs/cnp_eq.jpg) + +where the index {j} goes over the {n}i nearest neighbors of atom +{i}, and the index {k} goes over the {n}ij common nearest neighbors +between atom {i} and atom {j}. Rik and Rjk are the vectors connecting atom +{k} to atoms {i} and {j}. The quantity in the double sum is computed +for each atom. + +The CNP calculation is sensitive to the specified cutoff value. +You should ensure that the appropriate nearest neighbors of an atom are +found within the cutoff distance for the presumed crystal structure. +E.g. 12 nearest neighbor for perfect FCC and HCP crystals, 14 nearest +neighbors for perfect BCC crystals. These formulas can be used to +obtain a good cutoff distance: + +:c,image(Eqs/cnp_cutoff.jpg) + +where a is the lattice constant for the crystal structure concerned +and in the HCP case, x = (c/a) / 1.633, where 1.633 is the ideal c/a +for HCP crystals. + +Also note that since the CNP calculation in LAMMPS uses the neighbors +of an owned atom to find the nearest neighbors of a ghost atom, the +following relation should also be satisfied: + +:c,image(Eqs/cnp_cutoff2.jpg) + +where Rc is the cutoff distance of the potential, Rs is the skin +distance as specified by the "neighbor"_neighbor.html command, and +cutoff is the argument used with the compute cnp/atom command. LAMMPS +will issue a warning if this is not the case. + +The neighbor list needed to compute this quantity is constructed each +time the calculation is performed (e.g. each time a snapshot of atoms +is dumped). Thus it can be inefficient to compute/dump this quantity +too frequently or to have multiple compute/dump commands, each with a +{cnp/atom} style. + +[Output info:] + +This compute calculates a per-atom vector, which can be accessed by +any command that uses per-atom values from a compute as input. See +"Section 6.15"_Section_howto.html#howto_15 for an overview of +LAMMPS output options. + +The per-atom vector values will be real positive numbers. Some typical CNP +values: + +FCC lattice = 0.0 +BCC lattice = 0.0 +HCP lattice = 4.4 :pre + +FCC (111) surface ~ 13.0 +FCC (100) surface ~ 26.5 +FCC dislocation core ~ 11 :pre + +[Restrictions:] + +This compute is part of the USER-MISC package. It is only enabled if +LAMMPS was built with that package. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +[Related commands:] + +"compute cna/atom"_compute_cna_atom.html +"compute centro/atom"_compute_centro_atom.html + +[Default:] none + +:line + +:link(Tsuzuki2) +[(Tsuzuki)] Tsuzuki, Branicio, Rino, Comput Phys Comm, 177, 518 (2007). diff --git a/doc/src/computes.txt b/doc/src/computes.txt index 1d01798791ad4db5c21d3cc3c4c6953e849b4df1..5a6ca66c461cdbff262e18ac51d660640e37f297 100644 --- a/doc/src/computes.txt +++ b/doc/src/computes.txt @@ -17,6 +17,7 @@ Computes :h1 compute_chunk_atom compute_cluster_atom compute_cna_atom + compute_cnp_atom compute_com compute_com_chunk compute_contact_atom diff --git a/doc/src/dump_modify.txt b/doc/src/dump_modify.txt index b5daa6416e91912ab30eef406dec4d4078deb419..2ea1da3db3b6295531b913bc9e25665a9c4c7ba7 100644 --- a/doc/src/dump_modify.txt +++ b/doc/src/dump_modify.txt @@ -16,7 +16,8 @@ dump-ID = ID of dump to modify :ulb,l one or more keyword/value pairs may be appended :l these keywords apply to various dump styles :l keyword = {append} or {buffer} or {element} or {every} or {fileper} or {first} or {flush} or {format} or {image} or {label} or {nfile} or {pad} or {precision} or {region} or {scale} or {sort} or {thresh} or {unwrap} :l - {append} arg = {yes} or {no} + {append} arg = {yes} or {no} or {at} N + N = index of frame written upon first dump {buffer} arg = {yes} or {no} {element} args = E1 E2 ... EN, where N = # of atom types E1,...,EN = element name, e.g. C or Fe or Ga @@ -41,6 +42,7 @@ keyword = {append} or {buffer} or {element} or {every} or {fileper} or {first} o {region} arg = region-ID or "none" {scale} arg = {yes} or {no} {sfactor} arg = coordinate scaling factor (> 0.0) + {thermo} arg = {yes} or {no} {tfactor} arg = time scaling factor (> 0.0) {sort} arg = {off} or {id} or N or -N off = no sorting of per-atom lines within a snapshot @@ -139,12 +141,13 @@ and {dcd}. It also applies only to text output files, not to binary or gzipped or image/movie files. If specified as {yes}, then dump snapshots are appended to the end of an existing dump file. If specified as {no}, then a new dump file will be created which will -overwrite an existing file with the same name. This keyword can only -take effect if the dump_modify command is used after the -"dump"_dump.html command, but before the first command that causes -dump snapshots to be output, e.g. a "run"_run.html or -"minimize"_minimize.html command. Once the dump file has been opened, -this keyword has no further effect. +overwrite an existing file with the same name. If the {at} option is present +({netcdf} only), then the frame to append to can be specified. Negative values +are counted from the end of the file. This keyword can only take effect if the +dump_modify command is used after the "dump"_dump.html command, but before the +first command that causes dump snapshots to be output, e.g. a "run"_run.html or +"minimize"_minimize.html command. Once the dump file has been opened, this +keyword has no further effect. :line @@ -413,6 +416,13 @@ most effective when the typical magnitude of position data is between :line +The {thermo} keyword ({netcdf} only) triggers writing of "thermo"_thermo.html +information to the dump file alongside per-atom data. The data included in the +dump file is identical to the data specified by +"thermo_style"_thermo_style.html. + +:line + The {region} keyword only applies to the dump {custom}, {cfg}, {image}, and {movie} styles. If specified, only atoms in the region will be written to the dump file or included in the image/movie. Only diff --git a/doc/src/dump_netcdf.txt b/doc/src/dump_netcdf.txt index 4e82656698a61860a9b376e2eac19e11f05ec1c8..63568137a65ec5d6891db69a9d7cf33d1be1a098 100644 --- a/doc/src/dump_netcdf.txt +++ b/doc/src/dump_netcdf.txt @@ -24,7 +24,7 @@ args = list of atom attributes, same as for "dump_style custom"_dump.html :l,ule [Examples:] dump 1 all netcdf 100 traj.nc type x y z vx vy vz -dump_modify 1 append yes at -1 global c_thermo_pe c_thermo_temp c_thermo_press +dump_modify 1 append yes at -1 thermo yes dump 1 all netcdf/mpiio 1000 traj.nc id type x y z :pre [Description:] @@ -44,7 +44,7 @@ rank. NetCDF files can be directly visualized via the following tools: Ovito (http://www.ovito.org/). Ovito supports the AMBER convention and -all of the above extensions. :ule,b +all extensions of this dump style. :ule,b VMD (http://www.ks.uiuc.edu/Research/vmd/). :l @@ -52,15 +52,9 @@ AtomEye (http://www.libatoms.org/). The libAtoms version of AtomEye contains a NetCDF reader that is not present in the standard distribution of AtomEye. :l,ule -In addition to per-atom data, global data can be included in the dump -file, which are the kinds of values output by the -"thermo_style"_thermo_style.html command . See "Section howto -6.15"_Section_howto.html#howto_15 for an explanation of per-atom -versus global data. The global output written into the dump file can -be from computes, fixes, or variables, by prefixing the compute/fix ID -or variable name with "c_" or "f_" or "v_" respectively, as in the -example above. These global values are specified via the "dump_modify -global"_dump_modify.html command. +In addition to per-atom data, "thermo"_thermo.html data can be included in the +dump file. The data included in the dump file is identical to the data specified +by "thermo_style"_thermo_style.html. :link(netcdf-home,http://www.unidata.ucar.edu/software/netcdf/) :link(pnetcdf-home,http://trac.mcs.anl.gov/projects/parallel-netcdf/) diff --git a/doc/src/fix_adapt.txt b/doc/src/fix_adapt.txt index d7c32bef3d0d360fb52cc098684ff8dc9ba3c08b..19d1009b8a838829c0dfee1adde7526134d29053 100644 --- a/doc/src/fix_adapt.txt +++ b/doc/src/fix_adapt.txt @@ -47,7 +47,7 @@ keyword = {scale} or {reset} :l fix 1 all adapt 1 pair soft a 1 1 v_prefactor fix 1 all adapt 1 pair soft a 2* 3 v_prefactor fix 1 all adapt 1 pair lj/cut epsilon * * v_scale1 coul/cut scale 3 3 v_scale2 scale yes reset yes -fix 1 all adapt 10 atom diameter v_size +fix 1 all adapt 10 atom diameter v_size :pre variable ramp_up equal "ramp(0.01,0.5)" fix stretch all adapt 1 bond harmonic r0 1 v_ramp_up :pre diff --git a/doc/src/fix_deform.txt b/doc/src/fix_deform.txt index 8c3a9fa49976103e38300676ab35113b944a4fe8..d3254eece6b1a8e8508b799196c628bb249477c5 100644 --- a/doc/src/fix_deform.txt +++ b/doc/src/fix_deform.txt @@ -565,8 +565,10 @@ more instructions on how to use the accelerated styles effectively. [Restart, fix_modify, output, run start/stop, minimize info:] -No information about this fix is written to "binary restart -files"_restart.html. None of the "fix_modify"_fix_modify.html options +This fix will restore the initial box settings from "binary restart +files"_restart.html, which allows the fix to be properly continue +deformation, when using the start/stop options of the "run"_run.html +command. None of the "fix_modify"_fix_modify.html options are relevant to this fix. No global or per-atom quantities are stored by this fix for access by various "output commands"_Section_howto.html#howto_15. diff --git a/doc/src/fix_neb.txt b/doc/src/fix_neb.txt index 9d11b7289e4d2ae40da9f66e0e974a156e780fac..52d8a7df84da725f0c37af433966446cf2720604 100644 --- a/doc/src/fix_neb.txt +++ b/doc/src/fix_neb.txt @@ -10,68 +10,183 @@ fix neb command :h3 [Syntax:] -fix ID group-ID neb Kspring :pre - -ID, group-ID are documented in "fix"_fix.html command -neb = style name of this fix command -Kspring = inter-replica spring constant (force/distance units) :ul +fix ID group-ID neb Kspring keyword value :pre + +ID, group-ID are documented in "fix"_fix.html command :ulb,l +neb = style name of this fix command :l +Kspring = spring constant for parallel nudging force (force/distance units or force units, see parallel keyword) :l +zero or more keyword/value pairs may be appended :l +keyword = {parallel} or {perp} or {end} :l + {parallel} value = {neigh} or {ideal} + {neigh} = parallel nudging force based on distance to neighbor replicas (Kspring = force/distance units) + {ideal} = parallel nudging force based on interpolated ideal position (Kspring = force units) + {perp} value = {Kspring2} + {Kspring2} = spring constant for perpendicular nudging force (force/distance units) + {end} values = estyle Kspring3 + {estyle} = {first} or {last} or {last/efirst} or {last/efirst/middle} + {first} = apply force to first replica + {last} = apply force to last replica + {last/efirst} = apply force to last replica and set its target energy to that of first replica + {last/efirst/middle} = same as {last/efirst} plus prevent middle replicas having lower energy than first replica + {Kspring3} = spring constant for target energy term (1/distance units) :pre,ule [Examples:] -fix 1 active neb 10.0 :pre +fix 1 active neb 10.0 +fix 2 all neb 1.0 perp 1.0 end last +fix 2 all neb 1.0 perp 1.0 end first 1.0 end last 1.0 +fix 1 all neb 1.0 nudge ideal end last/efirst 1 :pre [Description:] -Add inter-replica forces to atoms in the group for a multi-replica +Add nudging forces to atoms in the group for a multi-replica simulation run via the "neb"_neb.html command to perform a nudged -elastic band (NEB) calculation for transition state finding. Hi-level -explanations of NEB are given with the "neb"_neb.html command and in -"Section 6.5"_Section_howto.html#howto_5 of the manual. The fix -neb command must be used with the "neb" command to define how -inter-replica forces are computed. - -Only the N atoms in the fix group experience inter-replica forces. -Atoms in the two end-point replicas do not experience these forces, -but those in intermediate replicas do. During the initial stage of -NEB, the 3N-length vector of interatomic forces Fi = -Grad(V) acting -on the atoms of each intermediate replica I is altered, as described -in the "(Henkelman1)"_#Henkelman1 paper, to become: - -Fi = -Grad(V) + (Grad(V) dot That) That + Kspring (| Ri+i - Ri | - | Ri - Ri-1 |) That :pre - -Ri are the atomic coordinates of replica I; Ri-1 and Ri+1 are the -coordinates of its neighbor replicas. That (t with a hat over it) is -the unit "tangent" vector for replica I which is a function of Ri, +elastic band (NEB) calculation for finding the transition state. +Hi-level explanations of NEB are given with the "neb"_neb.html command +and in "Section_howto 5"_Section_howto.html#howto_5 of the manual. +The fix neb command must be used with the "neb" command and defines +how inter-replica nudging forces are computed. A NEB calculation is +divided in two stages. In the first stage n replicas are relaxed +toward a MEP until convergence. In the second stage, the climbing +image scheme (see "(Henkelman2)"_#Henkelman2) is enabled, so that the +replica having the highest energy relaxes toward the saddle point +(i.e. the point of highest energy along the MEP), and a second +relaxation is performed. + +A key purpose of the nudging forces is to keep the replicas equally +spaced. During the NEB calculation, the 3N-length vector of +interatomic force Fi = -Grad(V) for each replica I is altered. For +all intermediate replicas (i.e. for 1 < I < N, except the climbing +replica) the force vector becomes: + +Fi = -Grad(V) + (Grad(V) dot T') T' + Fnudge_parallel + Fnudge_perp :pre + +T' is the unit "tangent" vector for replica I and is a function of Ri, Ri-1, Ri+1, and the potential energy of the 3 replicas; it points roughly in the direction of (Ri+i - Ri-1); see the -"(Henkelman1)"_#Henkelman1 paper for details. +"(Henkelman1)"_#Henkelman1 paper for details. Ri are the atomic +coordinates of replica I; Ri-1 and Ri+1 are the coordinates of its +neighbor replicas. The term (Grad(V) dot T') is used to remove the +component of the gradient parallel to the path which would tend to +distribute the replica unevenly along the path. Fnudge_parallel is an +artificial nudging force which is applied only in the tangent +direction and which maintains the equal spacing between replicas (see +below for more information). Fnudge_perp is an optional artificial +spring which is applied in a direction perpendicular to the tangent +direction and which prevent the paths from forming acute kinks (see +below for more information). + +In the second stage of the NEB calculation, the interatomic force Fi +for the climbing replica (the replica of highest energy after the +first stage) is changed to: + +Fi = -Grad(V) + 2 (Grad(V) dot T') T' :pre + +and the relaxation procedure is continued to a new converged MEP. + +:line + +The keyword {parallel} specifies how the parallel nudging force is +computed. With a value of {neigh}, the parallel nudging force is +computed as in "(Henkelman1)"_#Henkelman1 by connecting each +intermediate replica with the previous and the next image: + +Fnudge_parallel = {Kspring} * (|Ri+1 - Ri| - |Ri - Ri-1|) :pre + +Note that in this case the specified {Kspring) is in force/distance +units. + +With a value of {ideal}, the spring force is computed as suggested in +"(WeinenE)"_#WeinenE : + +Fnudge_parallel = -{Kspring} * (RD-RDideal) / (2 * meanDist) :pre + +where RD is the "reaction coordinate" see "neb"_neb.html section, and +RDideal is the ideal RD for which all the images are equally spaced. +I.e. RDideal = (I-1)*meanDist when the climbing replica is off, where +I is the replica number). The meanDist is the average distance +between replicas. Note that in this case the specified {Kspring) is +in force units. + +Note that the {ideal} form of nudging can often be more effective at +keeping the replicas equally spaced. -The first two terms in the above equation are the component of the -interatomic forces perpendicular to the tangent vector. The last term -is a spring force between replica I and its neighbors, parallel to the -tangent vector direction with the specified spring constant {Kspring}. +:line + +The keyword {perp} specifies if and how a perpendicual nudging force +is computed. It adds a spring force perpendicular to the path in +order to prevent the path from becoming too kinky. It can +significantly improve the convergence of the NEB calculation when the +resolution is poor. I.e. when few replicas are used; see +"(Maras)"_#Maras1 for details. -The effect of the first two terms is to push the atoms of each replica -toward the minimum energy path (MEP) of conformational states that -transition over the energy barrier. The MEP for an energy barrier is -defined as a sequence of 3N-dimensional states which cross the barrier -at its saddle point, each of which has a potential energy gradient -parallel to the MEP itself. +The perpendicular spring force is given by -The effect of the last term is to push each replica away from its two -neighbors in a direction along the MEP, so that the final set of -states are equidistant from each other. +Fnudge_perp = {Kspring2} * F(Ri-1,Ri,Ri+1) (Ri+1 + Ri-1 - 2 Ri) :pre -During the second stage of NEB, the forces on the N atoms in the -replica nearest the top of the energy barrier are altered so that it -climbs to the top of the barrier and finds the saddle point. The -forces on atoms in this replica are described in the -"(Henkelman2)"_#Henkelman2 paper, and become: +where {Kspring2} is the specified value. F(Ri-1 Ri R+1) is a smooth +scalar function of the angle Ri-1 Ri Ri+1. It is equal to 0.0 when +the path is straight and is equal to 1 when the angle Ri-1 Ri Ri+1 is +acute. F(Ri-1 Ri R+1) is defined in "(Jonsson)"_#Jonsson. -Fi = -Grad(V) + 2 (Grad(V) dot That) That :pre +If {Kspring2} is set to 0.0 (the default) then no perpendicular spring +force is added. -The inter-replica forces for the other replicas are unchanged from the -first equation. +:line + +By default, no additional forces act on the first and last replicas +during the NEB relaxation, so these replicas simply relax toward their +respective local minima. By using the key word {end}, additional +forces can be applied to the first and/or last replicas, to enable +them to relax toward a MEP while constraining their energy. + +The interatomic force Fi for the specified replica becomes: + +Fi = -Grad(V) + (Grad(V) dot T' + (E-ETarget)*Kspring3) T', {when} Grad(V) dot T' < 0 +Fi = -Grad(V) + (Grad(V) dot T' + (ETarget- E)*Kspring3) T', {when} Grad(V) dot T' > 0 +:pre + +where E is the current energy of the replica and ETarget is the target +energy. The "spring" constant on the difference in energies is the +specified {Kspring3} value. + +When {estyle} is specified as {first}, the force is applied to the +first replica. When {estyle} is specified as {last}, the force is +applied to the last replica. Note that the {end} keyword can be used +twice to add forces to both the first and last replicas. + +For both these {estyle} settings, the target energy {ETarget} is set +to the initial energy of the replica (at the start of the NEB +calculation). + +If the {estyle} is specified as {last/efirst} or {last/efirst/middle}, +force is applied to the last replica, but the target energy {ETarget} +is continuously set to the energy of the first replica, as it evolves +during the NEB relaxation. + +The difference between these two {estyle} options is as follows. When +{estyle} is specified as {last/efirst}, no change is made to the +inter-replica force applied to the intermediate replicas (neither +first or last). If the initial path is too far from the MEP, an +intermediate repilica may relax "faster" and reach a lower energy than +the last replica. In this case the intermediate replica will be +relaxing toward its own local minima. This behavior can be prevented +by specifying {estyle} as {last/efirst/middle} which will alter the +inter-replica force applied to intermediate replicas by removing the +contribution of the gradient to the inter-replica force. This will +only be done if a particular intermediate replica has a lower energy +than the first replica. This should effectively prevent the +intermediate replicas from over-relaxing. + +After converging a NEB calculation using an {estyle} of +{last/efirst/middle}, you should check that all intermediate replicas +have a larger energy than the first replica. If this is not the case, +the path is probably not a MEP. + +Finally, note that if the last replica converges toward a local +minimum which has a larger energy than the energy of the first +replica, a NEB calculation using an {estyle} of {last/efirst} or +{last/efirst/middle} cannot reach final convergence. [Restart, fix_modify, output, run start/stop, minimize info:] @@ -96,7 +211,12 @@ for more info on packages. "neb"_neb.html -[Default:] none +[Default:] + +The option defaults are nudge = neigh, perp = 0.0, ends is not +specified (no inter-replica force on the end replicas). + +:line :link(Henkelman1) [(Henkelman1)] Henkelman and Jonsson, J Chem Phys, 113, 9978-9985 (2000). @@ -104,3 +224,15 @@ for more info on packages. :link(Henkelman2) [(Henkelman2)] Henkelman, Uberuaga, Jonsson, J Chem Phys, 113, 9901-9904 (2000). + +:link(WeinenE) +[(WeinenE)] E, Ren, Vanden-Eijnden, Phys Rev B, 66, 052301 (2002). + +:link(Jonsson) +[(Jonsson)] Jonsson, Mills and Jacobsen, in Classical and Quantum +Dynamics in Condensed Phase Simulations, edited by Berne, Ciccotti, +and Coker World Scientific, Singapore, 1998, p 385. + +:link(Maras1) +[(Maras)] Maras, Trushin, Stukowski, Ala-Nissila, Jonsson, +Comp Phys Comm, 205, 13-21 (2016). diff --git a/doc/src/fix_qeq_reax.txt b/doc/src/fix_qeq_reax.txt index aed043f6c0e93382bf377f07df855560e7da82ab..a1a19b73686e642d06bd75667c7c2ffb0df29f92 100644 --- a/doc/src/fix_qeq_reax.txt +++ b/doc/src/fix_qeq_reax.txt @@ -8,17 +8,19 @@ fix qeq/reax command :h3 fix qeq/reax/kk command :h3 +fix qeq/reax/omp command :h3 [Syntax:] -fix ID group-ID qeq/reax Nevery cutlo cuthi tolerance params :pre +fix ID group-ID qeq/reax Nevery cutlo cuthi tolerance params args :pre ID, group-ID are documented in "fix"_fix.html command qeq/reax = style name of this fix command Nevery = perform QEq every this many steps cutlo,cuthi = lo and hi cutoff for Taper radius tolerance = precision to which charges will be equilibrated -params = reax/c or a filename :ul +params = reax/c or a filename +args = {dual} (optional) :ul [Examples:] @@ -59,6 +61,10 @@ potential file, except that eta is defined here as twice the eta value in the ReaxFF file. Note that unlike the rest of LAMMPS, the units of this fix are hard-coded to be A, eV, and electronic charge. +The optional {dual} keyword allows to perform the optimization +of the S and T matrices in parallel. This is only supported for +the {qeq/reax/omp} style. Otherwise they are processed separately. + [Restart, fix_modify, output, run start/stop, minimize info:] No information about this fix is written to "binary restart diff --git a/doc/src/fix_rigid.txt b/doc/src/fix_rigid.txt index 03edf61ed8fd997370513f209bf4fc417bace630..dbadd3fa634a173b15485554a7f7a79bd3a79e91 100644 --- a/doc/src/fix_rigid.txt +++ b/doc/src/fix_rigid.txt @@ -31,11 +31,12 @@ bodystyle = {single} or {molecule} or {group} :l groupID1, groupID2, ... = list of N group IDs :pre zero or more keyword/value pairs may be appended :l -keyword = {langevin} or {temp} or {iso} or {aniso} or {x} or {y} or {z} or {couple} or {tparam} or {pchain} or {dilate} or {force} or {torque} or {infile} :l +keyword = {langevin} or {reinit} or {temp} or {iso} or {aniso} or {x} or {y} or {z} or {couple} or {tparam} or {pchain} or {dilate} or {force} or {torque} or {infile} :l {langevin} values = Tstart Tstop Tperiod seed Tstart,Tstop = desired temperature at start/stop of run (temperature units) Tdamp = temperature damping parameter (time units) seed = random number seed to use for white noise (positive integer) + {reinit} = {yes} or {no} {temp} values = Tstart Tstop Tdamp Tstart,Tstop = desired temperature at start/stop of run (temperature units) Tdamp = temperature damping parameter (time units) @@ -68,10 +69,10 @@ keyword = {langevin} or {temp} or {iso} or {aniso} or {x} or {y} or {z} or {coup [Examples:] -fix 1 clump rigid single +fix 1 clump rigid single reinit yes fix 1 clump rigid/small molecule fix 1 clump rigid single force 1 off off on langevin 1.0 1.0 1.0 428984 -fix 1 polychains rigid/nvt molecule temp 1.0 1.0 5.0 +fix 1 polychains rigid/nvt molecule temp 1.0 1.0 5.0 reinit no fix 1 polychains rigid molecule force 1*5 off off off force 6*10 off off on fix 1 polychains rigid/small molecule langevin 1.0 1.0 1.0 428984 fix 2 fluid rigid group 3 clump1 clump2 clump3 torque * off off off @@ -87,7 +88,12 @@ means that each timestep the total force and torque on each rigid body is computed as the sum of the forces and torques on its constituent particles. The coordinates, velocities, and orientations of the atoms in each body are then updated so that the body moves and rotates as a -single entity. +single entity. This is implemented by creating internal data structures +for each rigid body and performing time integration on these data +structures. Positions, velocities, and orientations of the constituent +particles are regenerated from the rigid body data structures in every +time step. This restricts which operations and fixes can be applied to +rigid bodies. See below for a detailed discussion. Examples of large rigid bodies are a colloidal particle, or portions of a biomolecule such as a protein. @@ -148,8 +154,9 @@ differences may accumulate to produce divergent trajectories. NOTE: You should not update the atoms in rigid bodies via other time-integration fixes (e.g. "fix nve"_fix_nve.html, "fix -nvt"_fix_nh.html, "fix npt"_fix_nh.html), or you will be integrating -their motion more than once each timestep. When performing a hybrid +nvt"_fix_nh.html, "fix npt"_fix_nh.html, "fix move"_fix_move.html), +or you will have conflicting updates to positions and velocities +resulting in unphysical behavior in most cases. When performing a hybrid simulation with some atoms in rigid bodies, and some not, a separate time integration fix like "fix nve"_fix_nve.html or "fix nvt"_fix_nh.html should be used for the non-rigid particles. @@ -165,23 +172,29 @@ setting the force on them to 0.0 (via the "fix setforce"_fix_setforce.html command), and integrating them as usual (e.g. via the "fix nve"_fix_nve.html command). -NOTE: The aggregate properties of each rigid body are calculated one -time at the start of the first simulation run after these fixes are -specified. The properties include the position and velocity of the -center-of-mass of the body, its moments of inertia, and its angular -momentum. This is done using the properties of the constituent atoms -of the body at that point in time (or see the {infile} keyword -option). Thereafter, changing properties of individual atoms in the -body will have no effect on a rigid body's dynamics, unless they -affect the "pair_style"_pair_style.html interactions that individual -particles are part of. For example, you might think you could -displace the atoms in a body or add a large velocity to each atom in a -body to make it move in a desired direction before a 2nd run is +IMPORTANT NOTE: The aggregate properties of each rigid body are +calculated at the start of a simulation run and are maintained in +internal data structures. The properties include the position and +velocity of the center-of-mass of the body, its moments of inertia, and +its angular momentum. This is done using the properties of the +constituent atoms of the body at that point in time (or see the {infile} +keyword option). Thereafter, changing these properties of individual +atoms in the body will have no effect on a rigid body's dynamics, unless +they effect any computation of per-atom forces or torques. If the +keyword {reinit} is set to {yes} (the default), the rigid body data +structures will be recreated at the beginning of each {run} command; +if the keyword {reinit} is set to {no}, the rigid body data structures +will be built only at the very first {run} command and maintained for +as long as the rigid fix is defined. For example, you might think you +could displace the atoms in a body or add a large velocity to each atom +in a body to make it move in a desired direction before a 2nd run is performed, using the "set"_set.html or "displace_atoms"_displace_atoms.html or "velocity"_velocity.html -command. But these commands will not affect the internal attributes -of the body, and the position and velocity of individual atoms in the -body will be reset when time integration starts. +commands. But these commands will not affect the internal attributes +of the body unless {reinit} is set to {yes}. With {reinit} set to {no} +(or using the {infile} option, which implies {reinit} {no}) the position +and velocity of individual atoms in the body will be reset when time +integration starts again. :line @@ -401,6 +414,14 @@ couple none :pre The keyword/value option pairs are used in the following ways. +The {reinit} keyword determines, whether the rigid body properties +are reinitialized between run commands. With the option {yes} (the +default) this is done, with the option {no} this is not done. Turning +off the reinitialization can be helpful to protect rigid bodies against +unphysical manipulations between runs or when properties cannot be +easily recomputed (e.g. when read from a file). When using the {infile} +keyword, the {reinit} option is automatically set to {no}. + The {langevin} and {temp} and {tparam} keywords perform thermostatting of the rigid bodies, altering both their translational and rotational degrees of freedom. What is meant by "temperature" of a collection of @@ -778,7 +799,7 @@ exclude, "fix shake"_fix_shake.html The option defaults are force * on on on and torque * on on on, meaning all rigid bodies are acted on by center-of-mass force and -torque. Also Tchain = Pchain = 10, Titer = 1, Torder = 3. +torque. Also Tchain = Pchain = 10, Titer = 1, Torder = 3, reinit = yes. :line diff --git a/doc/src/kspace_modify.txt b/doc/src/kspace_modify.txt index b488df9627800e47d27d62849d0e0e18c0348e7d..66091f4973fa23eec67605fd2e61055e03a05971 100644 --- a/doc/src/kspace_modify.txt +++ b/doc/src/kspace_modify.txt @@ -308,7 +308,8 @@ The option defaults are mesh = mesh/disp = 0 0 0, order = order/disp = gewald = gewald/disp = 0.0, slab = 1.0, compute = yes, cutoff/adjust = yes (MSM), pressure/scalar = yes (MSM), fftbench = yes (PPPM), diff = ik (PPPM), mix/disp = pair, force/disp/real = -1.0, force/disp/kspace = -1.0, -split = 0, tol = 1.0e-6, and disp/auto = no. +split = 0, tol = 1.0e-6, and disp/auto = no. For pppm/intel, order = +order/disp = 7. :line diff --git a/doc/src/kspace_style.txt b/doc/src/kspace_style.txt index 371540bd68b6e5c95c2473c66dc39c8aae9826da..4f27c9aa78e95cb2819b65d44be17acfb53e4a6f 100644 --- a/doc/src/kspace_style.txt +++ b/doc/src/kspace_style.txt @@ -33,12 +33,16 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg} accuracy = desired relative error in forces {pppm/gpu} value = accuracy accuracy = desired relative error in forces + {pppm/intel} value = accuracy + accuracy = desired relative error in forces {pppm/kk} value = accuracy accuracy = desired relative error in forces {pppm/omp} value = accuracy accuracy = desired relative error in forces {pppm/cg/omp} value = accuracy accuracy = desired relative error in forces + {pppm/disp/intel} value = accuracy + accuracy = desired relative error in forces {pppm/tip4p/omp} value = accuracy accuracy = desired relative error in forces {pppm/stagger} value = accuracy diff --git a/doc/src/lammps.book b/doc/src/lammps.book index 1769f2982572c04ec79432f380c10fc9b77b3624..d901ef3f771db54baff445be71a474908b731470 100644 --- a/doc/src/lammps.book +++ b/doc/src/lammps.book @@ -301,6 +301,7 @@ compute_centro_atom.html compute_chunk_atom.html compute_cluster_atom.html compute_cna_atom.html +compute_cnp_atom.html compute_com.html compute_com_chunk.html compute_contact_atom.html @@ -446,7 +447,6 @@ pair_lj96.html pair_lj_cubic.html pair_lj_expand.html pair_lj_long.html -pair_lj_sf.html pair_lj_smooth.html pair_lj_smooth_linear.html pair_lj_soft.html diff --git a/doc/src/manifolds.txt b/doc/src/manifolds.txt index eb3bd6d486aef0fda13b215010cec47d3b75a590..c9bb1ce57f850242b8940365a4a4cebfff4fa162 100644 --- a/doc/src/manifolds.txt +++ b/doc/src/manifolds.txt @@ -24,8 +24,9 @@ to the relevant fixes. {manifold} @ {parameters} @ {equation} @ {description} cylinder @ R @ x^2 + y^2 - R^2 = 0 @ Cylinder along z-axis, axis going through (0,0,0) cylinder_dent @ R l a @ x^2 + y^2 - r(z)^2 = 0, r(x) = R if | z | > l, r(z) = R - a*(1 + cos(z/l))/2 otherwise @ A cylinder with a dent around z = 0 -dumbbell @ a A B c @ -( x^2 + y^2 ) * (a^2 - z^2/c^2) * ( 1 + (A*sin(B*z^2))^4) = 0 @ A dumbbell @ +dumbbell @ a A B c @ -( x^2 + y^2 ) + (a^2 - z^2/c^2) * ( 1 + (A*sin(B*z^2))^4) = 0 @ A dumbbell ellipsoid @ a b c @ (x/a)^2 + (y/b)^2 + (z/c)^2 = 0 @ An ellipsoid +gaussian_bump @ A l rc1 rc2 @ if( x < rc1) -z + A * exp( -x^2 / (2 l^2) ); else if( x < rc2 ) -z + a + b*x + c*x^2 + d*x^3; else z @ A Gaussian bump at x = y = 0, smoothly tapered to a flat plane z = 0. plane @ a b c x0 y0 z0 @ a*(x-x0) + b*(y-y0) + c*(z-z0) = 0 @ A plane with normal (a,b,c) going through point (x0,y0,z0) plane_wiggle @ a w @ z - a*sin(w*x) = 0 @ A plane with a sinusoidal modulation on z along x. sphere @ R @ x^2 + y^2 + z^2 - R^2 = 0 @ A sphere of radius R diff --git a/doc/src/neb.txt b/doc/src/neb.txt index 3a62a77a6e372c48ddf50fa0f97940660741b814..a4afc2fe6d529e4efa4507e141ec5ef769775acc 100644 --- a/doc/src/neb.txt +++ b/doc/src/neb.txt @@ -10,28 +10,31 @@ neb command :h3 [Syntax:] -neb etol ftol N1 N2 Nevery file-style arg :pre +neb etol ftol N1 N2 Nevery file-style arg keyword :pre etol = stopping tolerance for energy (energy units) :ulb,l ftol = stopping tolerance for force (force units) :l N1 = max # of iterations (timesteps) to run initial NEB :l N2 = max # of iterations (timesteps) to run barrier-climbing NEB :l Nevery = print replica energies and reaction coordinates every this many timesteps :l -file-style= {final} or {each} or {none} :l +file-style = {final} or {each} or {none} :l {final} arg = filename filename = file with initial coords for final replica - coords for intermediate replicas are linearly interpolated between first and last replica + coords for intermediate replicas are linearly interpolated + between first and last replica {each} arg = filename - filename = unique filename for each replica (except first) with its initial coords - {none} arg = no argument - all replicas assumed to already have their initial coords :pre + filename = unique filename for each replica (except first) + with its initial coords + {none} arg = no argument all replicas assumed to already have + their initial coords :pre +keyword = {verbose} :ule [Examples:] neb 0.1 0.0 1000 500 50 final coords.final neb 0.0 0.001 1000 500 50 each coords.initial.$i -neb 0.0 0.001 1000 500 50 none :pre +neb 0.0 0.001 1000 500 50 none verbose :pre [Description:] @@ -43,8 +46,8 @@ NEB is a method for finding both the atomic configurations and height of the energy barrier associated with a transition state, e.g. for an atom to perform a diffusive hop from one energy basin to another in a coordinated fashion with its neighbors. The implementation in LAMMPS -follows the discussion in these 3 papers: "(HenkelmanA)"_#HenkelmanA, -"(HenkelmanB)"_#HenkelmanB, and "(Nakano)"_#Nakano3. +follows the discussion in these 4 papers: "(HenkelmanA)"_#HenkelmanA, +"(HenkelmanB)"_#HenkelmanB, "(Nakano)"_#Nakano3 and "(Maras)"_#Maras2. Each replica runs on a partition of one or more processors. Processor partitions are defined at run-time using the -partition command-line @@ -70,18 +73,17 @@ I.e. the simulation domain, the number of atoms, the interaction potentials, and the starting configuration when the neb command is issued should be the same for every replica. -In a NEB calculation each atom in a replica is connected to the same -atom in adjacent replicas by springs, which induce inter-replica -forces. These forces are imposed by the "fix neb"_fix_neb.html -command, which must be used in conjunction with the neb command. The -group used to define the fix neb command defines the NEB atoms which -are the only ones that inter-replica springs are applied to. If the -group does not include all atoms, then non-NEB atoms have no -inter-replica springs and the forces they feel and their motion is -computed in the usual way due only to other atoms within their -replica. Conceptually, the non-NEB atoms provide a background force -field for the NEB atoms. They can be allowed to move during the NEB -minimization procedure (which will typically induce different +In a NEB calculation each replica is connected to other replicas by +inter-replica nudging forces. These forces are imposed by the "fix +neb"_fix_neb.html command, which must be used in conjunction with the +neb command. The group used to define the fix neb command defines the +NEB atoms which are the only ones that inter-replica springs are +applied to. If the group does not include all atoms, then non-NEB +atoms have no inter-replica springs and the forces they feel and their +motion is computed in the usual way due only to other atoms within +their replica. Conceptually, the non-NEB atoms provide a background +force field for the NEB atoms. They can be allowed to move during the +NEB minimization procedure (which will typically induce different coordinates for non-NEB atoms in different replicas), or held fixed using other LAMMPS commands such as "fix setforce"_fix_setforce.html. Note that the "partition"_partition.html command can be used to invoke @@ -93,33 +95,18 @@ specified in different manners via the {file-style} setting, as discussed below. Only atoms whose initial coordinates should differ from the current configuration need be specified. -Conceptually, the initial configuration for the first replica should -be a state with all the atoms (NEB and non-NEB) having coordinates on -one side of the energy barrier. A perfect energy minimum is not -required, since atoms in the first replica experience no spring forces -from the 2nd replica. Thus the damped dynamics minimization will -drive the first replica to an energy minimum if it is not already -there. However, you will typically get better convergence if the -initial state is already at a minimum. For example, for a system with -a free surface, the surface should be fully relaxed before attempting -a NEB calculation. - -Likewise, the initial configuration of the final replica should be a -state with all the atoms (NEB and non-NEB) on the other side of the -energy barrier. Again, a perfect energy minimum is not required, -since the atoms in the last replica also experience no spring forces -from the next-to-last replica, and thus the damped dynamics -minimization will drive it to an energy minimum. +Conceptually, the initial and final configurations for the first +replica should be states on either side of an energy barrier. As explained below, the initial configurations of intermediate replicas can be atomic coordinates interpolated in a linear fashion -between the first and last replicas. This is often adequate state for +between the first and last replicas. This is often adequate for simple transitions. For more complex transitions, it may lead to slow convergence or even bad results if the minimum energy path (MEP, see below) of states over the barrier cannot be correctly converged to -from such an initial configuration. In this case, you will want to -generate initial states for the intermediate replicas that are -geometrically closer to the MEP and read them in. +from such an initial path. In this case, you will want to generate +initial states for the intermediate replicas that are geometrically +closer to the MEP and read them in. :line @@ -135,10 +122,11 @@ is assigned to be a fraction of the distance. E.g. if there are 10 replicas, the 2nd replica will assign a position that is 10% of the distance along a line between the starting and final point, and the 9th replica will assign a position that is 90% of the distance along -the line. Note that this procedure to produce consistent coordinates -across all the replicas, the current coordinates need to be the same -in all replicas. LAMMPS does not check for this, but invalid initial -configurations will likely result if it is not the case. +the line. Note that for this procedure to produce consistent +coordinates across all the replicas, the current coordinates need to +be the same in all replicas. LAMMPS does not check for this, but +invalid initial configurations will likely result if it is not the +case. NOTE: The "distance" between the starting and final point is calculated in a minimum-image sense for a periodic simulation box. @@ -150,8 +138,8 @@ interpolation is outside the periodic box, the atom will be wrapped back into the box when the NEB calculation begins. For a {file-style} setting of {each}, a filename is specified which is -assumed to be unique to each replica. This can be done by -using a variable in the filename, e.g. +assumed to be unique to each replica. This can be done by using a +variable in the filename, e.g. variable i equal part neb 0.0 0.001 1000 500 50 each coords.initial.$i :pre @@ -198,11 +186,10 @@ The minimizer tolerances for energy and force are set by {etol} and A non-zero {etol} means that the NEB calculation will terminate if the energy criterion is met by every replica. The energies being compared to {etol} do not include any contribution from the inter-replica -forces, since these are non-conservative. A non-zero {ftol} means -that the NEB calculation will terminate if the force criterion is met -by every replica. The forces being compared to {ftol} include the -inter-replica forces between an atom and its images in adjacent -replicas. +nudging forces, since these are non-conservative. A non-zero {ftol} +means that the NEB calculation will terminate if the force criterion +is met by every replica. The forces being compared to {ftol} include +the inter-replica nudging forces. The maximum number of iterations in each stage is set by {N1} and {N2}. These are effectively timestep counts since each iteration of @@ -220,27 +207,27 @@ finding a good energy barrier. {N1} and {N2} must both be multiples of {Nevery}. In the first stage of NEB, the set of replicas should converge toward -the minimum energy path (MEP) of conformational states that transition -over the barrier. The MEP for a barrier is defined as a sequence of -3N-dimensional states that cross the barrier at its saddle point, each -of which has a potential energy gradient parallel to the MEP itself. -The replica states will also be roughly equally spaced along the MEP -due to the inter-replica spring force added by the "fix -neb"_fix_neb.html command. - -In the second stage of NEB, the replica with the highest energy -is selected and the inter-replica forces on it are converted to a -force that drives its atom coordinates to the top or saddle point of -the barrier, via the barrier-climbing calculation described in +a minimum energy path (MEP) of conformational states that transition +over a barrier. The MEP for a transition is defined as a sequence of +3N-dimensional states, each of which has a potential energy gradient +parallel to the MEP itself. The configuration of highest energy along +a MEP corresponds to a saddle point. The replica states will also be +roughly equally spaced along the MEP due to the inter-replica nugding +force added by the "fix neb"_fix_neb.html command. + +In the second stage of NEB, the replica with the highest energy is +selected and the inter-replica forces on it are converted to a force +that drives its atom coordinates to the top or saddle point of the +barrier, via the barrier-climbing calculation described in "(HenkelmanB)"_#HenkelmanB. As before, the other replicas rearrange themselves along the MEP so as to be roughly equally spaced. When both stages are complete, if the NEB calculation was successful, -one of the replicas should be an atomic configuration at the top or -saddle point of the barrier, the potential energies for the set of -replicas should represent the energy profile of the barrier along the -MEP, and the configurations of the replicas should be a sequence of -configurations along the MEP. +the configurations of the replicas should be along (close to) the MEP +and the replica with the highest energy should be an atomic +configuration at (close to) the saddle point of the transition. The +potential energies for the set of replicas represents the energy +profile of the transition along the MEP. :line @@ -284,9 +271,9 @@ ID2 x2 y2 z2 ... IDN xN yN zN :pre -The fields are the atom ID, followed by the x,y,z coordinates. -The lines can be listed in any order. Additional trailing information -on the line is OK, such as a comment. +The fields are the atom ID, followed by the x,y,z coordinates. The +lines can be listed in any order. Additional trailing information on +the line is OK, such as a comment. Note that for a typical NEB calculation you do not need to specify initial coordinates for very many atoms to produce differing starting @@ -310,38 +297,54 @@ this case), the print-out to the screen and master log.lammps file contains a line of output, printed once every {Nevery} timesteps. It contains the timestep, the maximum force per replica, the maximum force per atom (in any replica), potential gradients in the initial, -final, and climbing replicas, the forward and backward energy barriers, -the total reaction coordinate (RDT), and the normalized reaction -coordinate and potential energy of each replica. - -The "maximum force per replica" is -the two-norm of the 3N-length force vector for the atoms in each -replica, maximized across replicas, which is what the {ftol} setting -is checking against. In this case, N is all the atoms in each -replica. The "maximum force per atom" is the maximum force component -of any atom in any replica. The potential gradients are the two-norm -of the 3N-length force vector solely due to the interaction potential i.e. -without adding in inter-replica forces. Note that inter-replica forces -are zero in the initial and final replicas, and only affect -the direction in the climbing replica. For this reason, the "maximum -force per replica" is often equal to the potential gradient in the -climbing replica. In the first stage of NEB, there is no climbing -replica, and so the potential gradient in the highest energy replica -is reported, since this replica will become the climbing replica -in the second stage of NEB. - -The "reaction coordinate" (RD) for each -replica is the two-norm of the 3N-length vector of distances between -its atoms and the preceding replica's atoms, added to the RD of the -preceding replica. The RD of the first replica RD1 = 0.0; -the RD of the final replica RDN = RDT, the total reaction coordinate. -The normalized RDs are divided by RDT, -so that they form a monotonically increasing sequence -from zero to one. When computing RD, N only includes the atoms -being operated on by the fix neb command. - -The forward (reverse) energy barrier is the potential energy of the highest -replica minus the energy of the first (last) replica. +final, and climbing replicas, the forward and backward energy +barriers, the total reaction coordinate (RDT), and the normalized +reaction coordinate and potential energy of each replica. + +The "maximum force per replica" is the two-norm of the 3N-length force +vector for the atoms in each replica, maximized across replicas, which +is what the {ftol} setting is checking against. In this case, N is +all the atoms in each replica. The "maximum force per atom" is the +maximum force component of any atom in any replica. The potential +gradients are the two-norm of the 3N-length force vector solely due to +the interaction potential i.e. without adding in inter-replica +forces. + +The "reaction coordinate" (RD) for each replica is the two-norm of the +3N-length vector of distances between its atoms and the preceding +replica's atoms, added to the RD of the preceding replica. The RD of +the first replica RD1 = 0.0; the RD of the final replica RDN = RDT, +the total reaction coordinate. The normalized RDs are divided by RDT, +so that they form a monotonically increasing sequence from zero to +one. When computing RD, N only includes the atoms being operated on by +the fix neb command. + +The forward (reverse) energy barrier is the potential energy of the +highest replica minus the energy of the first (last) replica. + +Supplementary informations for all replicas can be printed out to the +screen and master log.lammps file by adding the verbose keyword. These +informations include the following. The "path angle" (pathangle) for +the replica i which is the angle between the 3N-length vectors (Ri-1 - +Ri) and (Ri+1 - Ri) (where Ri is the atomic coordinates of replica +i). A "path angle" of 180 indicates that replicas i-1, i and i+1 are +aligned. "angletangrad" is the angle between the 3N-length tangent +vector and the 3N-length force vector at image i. The tangent vector +is calculated as in "(HenkelmanA)"_#HenkelmanA for all intermediate +replicas and at R2 - R1 and RM - RM-1 for the first and last replica, +respectively. "anglegrad" is the angle between the 3N-length energy +gradient vector of replica i and that of replica i+1. It is not +defined for the final replica and reads nan. gradV is the norm of the +energy gradient of image i. ReplicaForce is the two-norm of the +3N-length force vector (including nudging forces) for replica i. +MaxAtomForce is the maximum force component of any atom in replica i. + +When a NEB calculation does not converge properly, these suplementary +informations can help understanding what is going wrong. For instance +when the path angle becomes accute the definition of tangent used in +the NEB calculation is questionable and the NEB cannot may diverge +"(Maras)"_#Maras2. + When running on multiple partitions, LAMMPS produces additional log files for each partition, e.g. log.lammps.0, log.lammps.1, etc. For a @@ -396,12 +399,16 @@ This command can only be used if LAMMPS was built with the REPLICA package. See the "Making LAMMPS"_Section_start.html#start_3 section for more info on packages. +:line + [Related commands:] -"prd"_prd.html, "temper"_temper.html, "fix -langevin"_fix_langevin.html, "fix viscous"_fix_viscous.html +"prd"_prd.html, "temper"_temper.html, "fix langevin"_fix_langevin.html, +"fix viscous"_fix_viscous.html -[Default:] none +[Default:] + +none :line @@ -414,3 +421,7 @@ langevin"_fix_langevin.html, "fix viscous"_fix_viscous.html :link(Nakano3) [(Nakano)] Nakano, Comp Phys Comm, 178, 280-289 (2008). + +:link(Maras2) +[(Maras)] Maras, Trushin, Stukowski, Ala-Nissila, Jonsson, +Comp Phys Comm, 205, 13-21 (2016) diff --git a/doc/src/pair_lj_long.txt b/doc/src/pair_lj_long.txt index d559871f9d47204da4e45347b631ea6583df6982..da9f37b9c324207c0ea62027c7ef8198b04943ce 100644 --- a/doc/src/pair_lj_long.txt +++ b/doc/src/pair_lj_long.txt @@ -7,6 +7,7 @@ :line pair_style lj/long/coul/long command :h3 +pair_style lj/long/coul/long/intel command :h3 pair_style lj/long/coul/long/omp command :h3 pair_style lj/long/coul/long/opt command :h3 pair_style lj/long/tip4p/long command :h3 diff --git a/doc/src/pair_lj_sf.txt b/doc/src/pair_lj_sf.txt deleted file mode 100644 index 65e28b6f51a9d084ffe6e3773a945ca029b553ea..0000000000000000000000000000000000000000 --- a/doc/src/pair_lj_sf.txt +++ /dev/null @@ -1,114 +0,0 @@ -"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c - -:link(lws,http://lammps.sandia.gov) -:link(ld,Manual.html) -:link(lc,Section_commands.html#comm) - -:line - -pair_style lj/sf command :h3 -pair_style lj/sf/omp command :h3 - -[Syntax:] - -pair_style lj/sf cutoff :pre - -cutoff = global cutoff for Lennard-Jones interactions (distance units) :ul - -[Examples:] - -pair_style lj/sf 2.5 -pair_coeff * * 1.0 1.0 -pair_coeff 1 1 1.0 1.0 3.0 :pre - -[Description:] - -Style {lj/sf} computes a truncated and force-shifted LJ interaction -(Shifted Force Lennard-Jones), so that both the potential and the -force go continuously to zero at the cutoff "(Toxvaerd)"_#Toxvaerd: - -:c,image(Eqs/pair_lj_sf.jpg) - -The following coefficients must be defined for each pair of atoms -types via the "pair_coeff"_pair_coeff.html command as in the examples -above, or in the data file or restart files read by the -"read_data"_read_data.html or "read_restart"_read_restart.html -commands, or by mixing as described below: - -epsilon (energy units) -sigma (distance units) -cutoff (distance units) :ul - -The last coefficient is optional. If not specified, the global -LJ cutoff specified in the pair_style command is used. - -:line - -Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are -functionally the same as the corresponding style without the suffix. -They have been optimized to run faster, depending on your available -hardware, as discussed in "Section 5"_Section_accelerate.html -of the manual. The accelerated styles take the same arguments and -should produce the same results, except for round-off and precision -issues. - -These accelerated styles are part of the GPU, USER-INTEL, KOKKOS, -USER-OMP and OPT packages, respectively. They are only enabled if -LAMMPS was built with those packages. See the "Making -LAMMPS"_Section_start.html#start_3 section for more info. - -You can specify the accelerated styles explicitly in your input script -by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can -use the "suffix"_suffix.html command in your input script. - -See "Section 5"_Section_accelerate.html of the manual for -more instructions on how to use the accelerated styles effectively. - -:line - -[Mixing, shift, table, tail correction, restart, rRESPA info]: - -For atom type pairs I,J and I != J, the epsilon and sigma -coefficients and cutoff distance for this pair style can be mixed. -Rin is a cutoff value and is mixed like the cutoff. The -default mix value is {geometric}. See the "pair_modify" command for -details. - -The "pair_modify"_pair_modify.html shift option is not relevant for -this pair style, since the pair interaction goes to 0.0 at the cutoff. - -The "pair_modify"_pair_modify.html table option is not relevant -for this pair style. - -This pair style does not support the "pair_modify"_pair_modify.html -tail option for adding long-range tail corrections to energy and -pressure, since the energy of the pair interaction is smoothed to 0.0 -at the cutoff. - -This pair style writes its information to "binary restart -files"_restart.html, so pair_style and pair_coeff commands do not need -to be specified in an input script that reads a restart file. - -This pair style can only be used via the {pair} keyword of the -"run_style respa"_run_style.html command. It does not support the -{inner}, {middle}, {outer} keywords. - -:line - -[Restrictions:] - -This pair style is part of the USER-MISC package. It is only enabled -if LAMMPS was built with that package. See the "Making -LAMMPS"_Section_start.html#start_3 section for more info. - -[Related commands:] - -"pair_coeff"_pair_coeff.html - -[Default:] none - -:line - -:link(Toxvaerd) -[(Toxvaerd)] Toxvaerd, Dyre, J Chem Phys, 134, 081102 (2011). diff --git a/doc/src/pair_lj_smooth_linear.txt b/doc/src/pair_lj_smooth_linear.txt index 8b9b9aa5faaa66f1aaad1fbad1fd99bdc4c7207b..a48c441f54381cd3bd6630bdb7c3012e77a33d9b 100644 --- a/doc/src/pair_lj_smooth_linear.txt +++ b/doc/src/pair_lj_smooth_linear.txt @@ -11,26 +11,26 @@ pair_style lj/smooth/linear/omp command :h3 [Syntax:] -pair_style lj/smooth/linear Rc :pre +pair_style lj/smooth/linear cutoff :pre -Rc = cutoff for lj/smooth/linear interactions (distance units) :ul +cutoff = global cutoff for Lennard-Jones interactions (distance units) :ul [Examples:] -pair_style lj/smooth/linear 5.456108274435118 -pair_coeff * * 0.7242785984051078 2.598146797350056 -pair_coeff 1 1 20.0 1.3 9.0 :pre +pair_style lj/smooth/linear 2.5 +pair_coeff * * 1.0 1.0 +pair_coeff 1 1 0.3 3.0 9.0 :pre [Description:] -Style {lj/smooth/linear} computes a LJ interaction that combines the -standard 12/6 Lennard-Jones function and subtracts a linear term that -includes the cutoff distance Rc, as in this formula: +Style {lj/smooth/linear} computes a truncated and force-shifted LJ +interaction (aka Shifted Force Lennard-Jones) that combines the +standard 12/6 Lennard-Jones function and subtracts a linear term based +on the cutoff distance, so that both, the potential and the force, go +continuously to zero at the cutoff Rc "(Toxvaerd)"_#Toxvaerd: :c,image(Eqs/pair_lj_smooth_linear.jpg) -At the cutoff Rc, the energy and force (its 1st derivative) will be 0.0. - The following coefficients must be defined for each pair of atoms types via the "pair_coeff"_pair_coeff.html command as in the examples above, or in the data file or restart files read by the @@ -41,8 +41,8 @@ epsilon (energy units) sigma (distance units) cutoff (distance units) :ul -The last coefficient is optional. If not specified, the global value -for Rc is used. +The last coefficient is optional. If not specified, the global +LJ cutoff specified in the pair_style command is used. :line @@ -76,10 +76,11 @@ and cutoff distance can be mixed. The default mix value is geometric. See the "pair_modify" command for details. This pair style does not support the "pair_modify"_pair_modify.html -shift option for the energy of the pair interaction. +shift option for the energy of the pair interaction, since it goes +to 0.0 at the cutoff by construction. -The "pair_modify"_pair_modify.html table option is not relevant for -this pair style. +The "pair_modify"_pair_modify.html table option is not relevant +for this pair style. This pair style does not support the "pair_modify"_pair_modify.html tail option for adding long-range tail corrections to energy and @@ -103,3 +104,8 @@ This pair style can only be used via the {pair} keyword of the "pair_coeff"_pair_coeff.html, "pair lj/smooth"_pair_lj_smooth.html [Default:] none + +:line + +:link(Toxvaerd) +[(Toxvaerd)] Toxvaerd, Dyre, J Chem Phys, 134, 081102 (2011). diff --git a/doc/src/pair_reaxc.txt b/doc/src/pair_reaxc.txt index 76a8e6fd5c4a2b27e6352f9c6abb53d838bcfc6b..cfa88673d7a4b4b129775748013b89dced4fbc50 100644 --- a/doc/src/pair_reaxc.txt +++ b/doc/src/pair_reaxc.txt @@ -8,6 +8,7 @@ pair_style reax/c command :h3 pair_style reax/c/kk command :h3 +pair_style reax/c/omp command :h3 [Syntax:] diff --git a/doc/src/pair_vashishta.txt b/doc/src/pair_vashishta.txt index 902e6012f860a0a5dd613d935a2eacfa154b5346..9c275a61d39bbc4756e15bd8124a49c89e9f54fe 100644 --- a/doc/src/pair_vashishta.txt +++ b/doc/src/pair_vashishta.txt @@ -7,6 +7,7 @@ :line pair_style vashishta command :h3 +pair_style vashishta/gpu command :h3 pair_style vashishta/omp command :h3 pair_style vashishta/kk command :h3 pair_style vashishta/table command :h3 diff --git a/doc/src/pairs.txt b/doc/src/pairs.txt index 538e2a7268c2a3c8bcf3a082a2b8a4022ef9cef8..2c1b20f4d3c847f92bd997bb77626d9826d54933 100644 --- a/doc/src/pairs.txt +++ b/doc/src/pairs.txt @@ -49,7 +49,6 @@ Pair Styles :h1 pair_lj_cubic pair_lj_expand pair_lj_long - pair_lj_sf pair_lj_smooth pair_lj_smooth_linear pair_lj_soft diff --git a/doc/src/set.txt b/doc/src/set.txt index 6b59bf1332c2868bd63e256c76bd6949f76ddfcf..14460c9741ea933e5ba8905a002c1e29754d89e6 100644 --- a/doc/src/set.txt +++ b/doc/src/set.txt @@ -80,6 +80,7 @@ keyword = {type} or {type/fraction} or {mol} or {x} or {y} or {z} or \ value can be an atom-style variable (see below) {image} nx ny nz nx,ny,nz = which periodic image of the simulation box the atom is in + any of nx,ny,nz can be an atom-style variable (see below) {bond} value = bond type for all bonds between selected atoms {angle} value = angle type for all angles between selected atoms {dihedral} value = dihedral type for all dihedrals between selected atoms @@ -363,9 +364,8 @@ A value of -1 means subtract 1 box length to get the true value. LAMMPS updates these flags as atoms cross periodic boundaries during the simulation. The flags can be output with atom snapshots via the "dump"_dump.html command. If a value of NULL is specified for any of -nx,ny,nz, then the current image value for that dimension is -unchanged. For non-periodic dimensions only a value of 0 can be -specified. This keyword does not allow use of atom-style variables. +nx,ny,nz, then the current image value for that dimension is unchanged. +For non-periodic dimensions only a value of 0 can be specified. This command can be useful after a system has been equilibrated and atoms have diffused one or more box lengths in various directions. This command can then reset the image values for atoms so that they diff --git a/doc/src/special_bonds.txt b/doc/src/special_bonds.txt index 6924b321a044652d5a469348cfa803e92e243f09..6a661015bda73c2837da23095afe3c567f37d330 100644 --- a/doc/src/special_bonds.txt +++ b/doc/src/special_bonds.txt @@ -65,7 +65,13 @@ sense to define permanent bonds between atoms that interact via these potentials, though such bonds may exist elsewhere in your system, e.g. when using the "pair_style hybrid"_pair_hybrid.html command. Thus LAMMPS ignores special_bonds settings when manybody potentials -are calculated. +are calculated. Please note, that the existence of explicit bonds +for atoms that are described by a manybody potential will alter the +neigborlist and thus can render the computation of those interactions +invalid, since those pairs are not only used to determine direct +pairwise interactions but also neighbors of neighbors and more. +The recommended course of action is to remove such bonds, or - if +that is not possible - use a special bonds setting of 1.0 1.0 1.0. NOTE: Unlike some commands in LAMMPS, you cannot use this command multiple times in an incremental fashion: e.g. to first set the LJ diff --git a/examples/USER/cgdna/util/generate.py b/examples/USER/cgdna/util/generate.py index d5a74e9bf7f85fb7d0f9742f4c1b7895c2ce1aca..eb97f482ccd167c3334a7fe8bfd7f3313231696d 100644 --- a/examples/USER/cgdna/util/generate.py +++ b/examples/USER/cgdna/util/generate.py @@ -14,7 +14,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Oliver Henrich (EPCC, University of Edinburgh) + Contributing author: Oliver Henrich (University of Strathclyde, Glasgow) ------------------------------------------------------------------------- */ """ diff --git a/examples/USER/misc/cnp/Cu_Mishin1.eam b/examples/USER/misc/cnp/Cu_Mishin1.eam new file mode 100644 index 0000000000000000000000000000000000000000..8ea788b16d995cb3682ea620157c565f16e57edf --- /dev/null +++ b/examples/USER/misc/cnp/Cu_Mishin1.eam @@ -0,0 +1,30009 @@ +#-> LAMMPS Potential File in DYNAMO 86 setfl Format <-# +# Mishin Cu EAM1 PRB(2001)63:224106 +# Implemented by G. Ziegenhain (2007) gerolf@ziegenhain.com +1 Cu +10001 0.00016401626143851118 10001 0.00089991000899910004 5.50678999999999962967 +1 63.54999999999999715783 3.61500000000000021316 FCC +0.00000499999999981071 +-0.00174266214468410396 +-0.00348892099090747365 +-0.00523377758263166015 +-0.00697723296321983710 +-0.00871928817543166090 +-0.01045994426141616529 +-0.01219920226272552810 +-0.01393706322030219269 +-0.01567352817448997016 +-0.01740859816502871027 +-0.01914227423105074877 +-0.02087455741109200957 +-0.02260544874308356711 +-0.02433494926435164629 +-0.02606306001162250752 +-0.02778978202102377892 +-0.02951511632807468644 +-0.03123906396769937643 +-0.03296162597421536944 +-0.03468280338134377416 +-0.03640259722220395844 +-0.03812100852931310513 +-0.03983803833458932075 +-0.04155368766935163549 +-0.04326795756431689455 +-0.04498084904960464314 +-0.04669236315473579424 +-0.04840250090862818766 +-0.05011126333960413959 +-0.05181865147538822214 +-0.05352466634310193427 +-0.05522930896927480404 +-0.05693258037983239817 +-0.05863448160010564791 +-0.06033501365483040502 +-0.06203417756813944806 +-0.06373197436357269652 +-0.06542840506407321399 +-0.06712347069198454363 +-0.06881717226905648133 +-0.07050951081644374341 +-0.07220048735470019352 +-0.07389010290379038892 +-0.07557835848307803417 +-0.07726525511133397472 +-0.07895079380673530878 +-0.08063497558686139044 +-0.08231780146869960291 +-0.08399927246864269392 +-0.08567938960248699942 +-0.08735815388543688442 +-0.08903556633210474303 +-0.09071162795650611343 +-0.09238633977206633929 +-0.09405970279161346426 +-0.09573171802738844605 +-0.09740238649103627466 +-0.09907170919360996919 +-0.10073968714557102189 +-0.10240632135679028636 +-0.10407161283654442485 +-0.10573556259352034914 +-0.10739817163581522053 +-0.10905944097092978851 +-0.11071937160578260162 +-0.11237796454669402024 +-0.11403522079939731881 +-0.11569114136903824175 +-0.11734572726016789801 +-0.11899897947674986654 +-0.12065089902216152851 +-0.12230148689918429739 +-0.12395074411001827386 +-0.12559867165626981134 +-0.12724527053895817730 +-0.12889054175851422102 +-0.13053448631478437036 +-0.13217710520701952959 +-0.13381839943389151060 +-0.13545836999347882212 +-0.13709701788327555150 +-0.13873434410018958829 +-0.14037034964053995978 +-0.14200503550005993958 +-0.14363840267389926808 +-0.14527045215661704702 +-0.14690118494219106537 +-0.14853060202401202616 +-0.15015870439488265831 +-0.15178549304702659839 +-0.15341096897207551208 +-0.15503513316108197273 +-0.15665798660451368818 +-0.15827953029225039216 +-0.15989976521359050565 +-0.16151869235725069274 +-0.16313631271136053158 +-0.16475262726346739939 +-0.16636763700053824877 +-0.16798134290895339049 +-0.16959374597451315481 +-0.17120484718243256239 +-0.17281464751734887386 +-0.17442314796331537252 +-0.17603034950380180845 +-0.17763625312169928350 +-0.17924085979931758672 +-0.18084417051838252988 +-0.18244618626004083239 +-0.18404690800486056546 +-0.18564633673282804338 +-0.18724447342334737954 +-0.18884131905524670358 +-0.19043687460677061196 +-0.19203114105558904967 +-0.19362411937878576396 +-0.19521581055287207107 +-0.19680621555377975085 +-0.19839533535685704990 +-0.19998317093687756341 +-0.20156972326803979101 +-0.20315499332395736687 +-0.20473898207767149415 +-0.20632169050164517188 +-0.20790311956776186264 +-0.20948327024733215396 +-0.21106214351108310012 +-0.21263974032917376533 +-0.21421606167118145692 +-0.21579110850610705441 +-0.21736488180237900636 +-0.21893738252784844533 +-0.22050861164979007611 +-0.22207857013490395204 +-0.22364725894931680727 +-0.22521467905857761593 +-0.22678083142766558566 +-0.22834571702097905543 +-0.22990933680234793002 +-0.23147169173502746276 +-0.23303278278169470283 +-0.23459261090445959752 +-0.23615117706485522220 +-0.23770848222384177717 +-0.23926452734180747584 +-0.24081931337856987696 +-0.24237284129336922334 +-0.24392511204488043219 +-0.24547612659119977252 +-0.24702588588985685547 +-0.24857439089780886121 +-0.25012164257143965074 +-0.25166764186656376268 +-0.25321238973842685738 +-0.25475588714169949967 +-0.25629813503048559653 +-0.25783913435831928851 +-0.25937888607816184106 +-0.26091739114240741770 +-0.26245465050287997144 +-0.26399066511083368880 +-0.26552543591695609848 +-0.26705896387136363046 +-0.26859124992360339235 +-0.27012229502265938663 +-0.27165210011694096437 +-0.27318066615429437149 +-0.27470799408199608749 +-0.27623408484675460173 +-0.27775893939471263394 +-0.27928255867144802238 +-0.28080494362196617431 +-0.28232609519071205639 +-0.28384601432155998069 +-0.28536470195781937775 +-0.28688215904223701713 +-0.28839838651698812555 +-0.28991338532368748915 +-0.29142715640338368033 +-0.29293970069655772548 +-0.29445101914312798996 +-0.29596111268245062220 +-0.29746998225331244825 +-0.29897762879394029767 +-0.30048405324199434219 +-0.30198925653457253659 +-0.30349323960821239510 +-0.30499600339888122136 +-0.30649754884198898708 +-0.30799787687238255884 +-0.30949698842434347767 +-0.31099488443159151174 +-0.31249156582728909726 +-0.31398703354402890398 +-0.31548128851384849014 +-0.31697433166822186479 +-0.31846616393806082002 +-0.31995678625371759551 +-0.32144619954498221404 +-0.32293440474108559002 +-0.32442140277069952958 +-0.32590719456193006920 +-0.32739178104233035427 +-0.32887516313889042507 +-0.33035734177804076950 +-0.33183831788565321119 +-0.33331809238704224185 +-0.33479666620695880397 +-0.33627404026960228123 +-0.33775021549860539949 +-0.33922519281705065808 +-0.34069897314745967165 +-0.34217155741179272610 +-0.34364294653145810443 +-0.34511314142730631360 +-0.34658214301962564363 +-0.34804995222815282574 +-0.34951656997206770328 +-0.35098199716999012310 +-0.35244623473998837326 +-0.35390928359957030125 +-0.35537114466569219573 +-0.35683181885475256934 +-0.35829130708259526727 +-0.35974961026450769097 +-0.36120672931522657123 +-0.36266266514892819828 +-0.36411741867923907989 +-0.36557099081923016826 +-0.36702338248141774812 +-0.36847459457776565728 +-0.36992462801968262198 +-0.37137348371802536562 +-0.37282116258309905277 +-0.37426766552465062787 +-0.37571299345187991747 +-0.37715714727343430113 +-0.37860012789740338235 +-0.38004193623133053492 +-0.38148257318220668566 +-0.38292203965646720576 +-0.38436033656000123671 +-0.38579746479814236437 +-0.38723342527567661264 +-0.38866821889683933477 +-0.39010184656531077252 +-0.39153430918422804652 +-0.39296560765617449817 +-0.39439574288318146600 +-0.39582471576673539104 +-0.39725252720777071147 +-0.39867917810667208300 +-0.40010466936327881982 +-0.40152900187687645683 +-0.40295217654620607561 +-0.40437419426945986345 +-0.40579505594427933701 +-0.40721476246776200369 +-0.40863331473645647662 +-0.41005071364636069831 +-0.41146696009293082241 +-0.41288205497107366426 +-0.41429599917514670082 +-0.41570879359896562022 +-0.41712043913579721632 +-0.41853093667836116509 +-0.41994028711883579774 +-0.42134849134884655442 +-0.42275555025948019505 +-0.42416146474127680577 +-0.42556623568422757842 +-0.42696986397778324829 +-0.42837235051084965320 +-0.42977369617178595718 +-0.43117390184840864720 +-0.43257296842799108916 +-0.43397089679725908695 +-0.43536768784240109653 +-0.43676334244905978821 +-0.43815786150233115848 +-0.43955124588677385589 +-0.44094349648640029926 +-0.44233461418468200677 +-0.44372459986454959591 +-0.44511345440838834264 +-0.44650117869804484272 +-0.44788777361482345896 +-0.44927324003948587716 +-0.45065757885225421475 +-0.45204079093281057666 +-0.45342287716029394673 +-0.45480383841330462857 +-0.45618367556990069289 +-0.45756238950760419470 +-0.45893998110339406793 +-0.46031645123370923400 +-0.46169180077445215460 +-0.46306603060098527891 +-0.46443914158813104365 +-0.46581113461017276123 +-0.46718201054085817248 +-0.46855177025339234120 +-0.46992041462044742417 +-0.47128794451415378930 +-0.47265436080610534475 +-0.47401966436735998300 +-0.47538385606843602815 +-0.47674693677931756497 +-0.47810890736945044210 +-0.47946976870774227208 +-0.48082952166256776039 +-0.48218816710176337637 +-0.48354570589263046188 +-0.48490213890193478719 +-0.48625746699590566280 +-0.48761169104023815990 +-0.48896481190009355444 +-0.49031683044009444217 +-0.49166774752433273221 +-0.49301756401636431804 +-0.49436628077921129787 +-0.49571389867536153062 +-0.49706041856676952406 +-0.49840584131485510255 +-0.49975016778050784794 +-0.50109339882407955002 +-0.50243553530539442065 +-0.50377657808374021187 +-0.50511652801787265687 +-0.50645538596601724635 +-0.50779315278586700799 +-0.50912982933458073020 +-0.51046541646878829113 +-0.51179991504458843821 +-0.51313332591754612366 +-0.51446564994269738946 +-0.51579688797454759097 +-0.51712704086707050877 +-0.51845610947371145727 +-0.51978409464738239976 +-0.52111099724046949788 +-0.52243681810482778261 +-0.52376155809178026601 +-0.52508521805212549083 +-0.52640779883612909273 +-0.52772930129352957351 +-0.52904972627353608061 +-0.53036907462483195985 +-0.53168734719556765000 +-0.53300454483337089684 +-0.53432066838533831543 +-0.53563571869803894288 +-0.53694969661751823509 +-0.53826260298929007320 +-0.53957443865834386898 +-0.54088520446914323259 +-0.54219490126562197574 +-0.54350352989119077307 +-0.54481109118873494168 +-0.54611758600061044433 +-0.54742301516864966260 +-0.54872737953416228507 +-0.55003067993792686963 +-0.55133291722020327796 +-0.55263409222072201743 +-0.55393420577869179056 +-0.55523325873279683051 +-0.55653125192119468068 +-0.55782818618152241186 +-0.55912406235089351370 +-0.56041888126589345376 +-0.56171264376258900342 +-0.56300535067652468513 +-0.56429700284271699928 +-0.56558760109566508234 +-0.56687714626934182505 +-0.56816563919720142195 +-0.56945308071217493051 +-0.57073947164666982701 +-0.57202481283257444744 +-0.57330910510125532298 +-0.57459234928355584771 +-0.57587454620980160769 +-0.57715569670979682826 +-0.57843580161282304175 +-0.57971486174764530475 +-0.58099287794250331629 +-0.58226985102512207604 +-0.58354578182270655518 +-0.58482067116193858780 +-0.58609451986898353226 +-0.58736732876948938298 +-0.58863909868858099728 +-0.58990983045086831105 +-0.59117952488044189785 +-0.59244818280087430118 +-0.59371580503521959038 +-0.59498239240601469291 +-0.59624794573527784003 +-0.59751246584451433996 +-0.59877595355470614180 +-0.60003840968632227160 +-0.60129983505931594578 +-0.60256023049311990825 +-0.60381959680665509005 +-0.60507793481832550242 +-0.60633524534601579425 +-0.60759152920709968981 +-0.60884678721843243920 +-0.61010102019635503723 +-0.61135422895669599974 +-0.61260641431476559049 +-0.61385757708536048405 +-0.61510771808276465400 +-0.61635683812074715249 +-0.61760493801256166613 +-0.61885201857095162303 +-0.62009808060814330943 +-0.62134312493585119874 +-0.62258715236527772952 +-0.62383016370711130705 +-0.62507215977152896791 +-0.62631314136819438154 +-0.62755310930625851640 +-0.62879206439436230447 +-0.63003000744063264449 +-0.63126693925268773100 +-0.63250286063763105915 +-0.63373777240205786399 +-0.63497167535205112365 +-0.63620457029318266962 +-0.63743645803051407484 +-0.63866733936859865217 +-0.63989721511147834576 +-0.64112608606268373101 +-0.64235395302523756733 +-0.64358081680165168947 +-0.64480667819393122642 +-0.64603153800357060454 +-0.64725539703155376969 +-0.64847825607836062645 +-0.64970011594395837839 +-0.65092097742780707925 +-0.65214084132886074308 +-0.65335970844556445769 +-0.65457757957585460673 +-0.65579445551716286644 +-0.65701033706641043253 +-0.65822522502001490352 +-0.65943912017388583990 +-0.66065202332342476410 +-0.66186393526352982342 +-0.66307485678859134914 +-0.66428478869249341088 +-0.66549373176861470469 +-0.66670168680983232790 +-0.66790865460851045476 +-0.66911463595651565761 +-0.67031963164520491638 +-0.67152364246543294612 +-0.67272666920755086473 +-0.67392871266140152997 +-0.67512977361632775519 +-0.67632985286116875656 +-0.67752895118425726650 +-0.67872706937342464073 +-0.67992420821599930392 +-0.68112036849880541745 +-0.68231555100816598802 +-0.68350975652989998110 +-0.68470298584932565156 +-0.68589523975125876731 +-0.68708651902001127709 +-0.68827682443939619539 +-0.68946615679272427180 +-0.69065451686280354693 +-0.69184190543194290512 +-0.69302832328195052014 +-0.69421377119413141266 +-0.69539824994929366753 +-0.69658176032774266062 +-0.69776430310928261314 +-0.69894587907322391906 +-0.70012648899837093275 +-0.70130613366303062861 +-0.70248481384501326730 +-0.70366253032162640046 +-0.70483928386968108804 +-0.70601507526549078797 +-0.70718990528486713742 +-0.70836377470312816840 +-0.70953668429508920390 +-0.71070863483507262792 +-0.71187962709689966978 +-0.71304966185389617728 +-0.71421873987889084034 +-0.71538686194421430287 +-0.71655402882170116108 +-0.71772024128269085175 +-0.71888550009802543173 +-0.72004980603804979999 +-0.72121315987261480629 +-0.72237556237107569679 +-0.72353701430229033775 +-0.72469751643462454460 +-0.72585706953594675284 +-0.72701567437363001645 +-0.72817333171455689289 +-0.72933004232510945108 +-0.73048580697118081773 +-0.73164062641816896004 +-0.73279450143097513148 +-0.73394743277401186532 +-0.73509942121119387082 +-0.73625046750594536071 +-0.73740057242119738667 +-0.73854973671938717317 +-0.73969796116246100404 +-0.74084524651187178002 +-0.74199159352857990690 +-0.74313700297305507192 +-0.74428147560527668780 +-0.74542501218472767555 +-0.74656761347040490051 +-0.74770928022081095676 +-0.74885001319395994024 +-0.74998981314737345194 +-0.75112868083808481678 +-0.75226661702263375453 +-0.75340362245707326316 +-0.75453969789696473391 +-0.75567484409737972761 +-0.75680906181290308332 +-0.75794235179762670107 +-0.75907471480515664730 +-0.76020615158860804783 +-0.76133666290060886261 +-0.76246624949329699916 +-0.76359491211832519753 +-0.76472265152685525713 +-0.76584946846956292177 +-0.76697536369663454892 +-0.76810033795777221677 +-0.76922439200218883926 +-0.77034752657861038649 +-0.77146974243527677295 +-0.77259104031994030315 +-0.77371142097986944641 +-0.77483088516184306371 +-0.77594943361215862332 +-0.77706706707662220879 +-0.77818378630056028733 +-0.77929959202880905167 +-0.78041448500572241365 +-0.78152846597517156013 +-0.78264153568053695942 +-0.78375369486471968550 +-0.78486494427013497877 +-0.78597528463871402238 +-0.78708471671190349817 +-0.78819324123066869525 +-0.78930085893548884712 +-0.79040757056636090638 +-0.79151337686279932271 +-0.79261827856383559876 +-0.79372227640801895632 +-0.79482537113341500401 +-0.79592756347760862390 +-0.79702885417770130694 +-0.79812924397031381751 +-0.79922873359158530526 +-0.80032732377717308303 +-0.80142501526225307096 +-0.80252180878152268306 +-0.80361770506919327772 +-0.80471270485900170399 +-0.80580680888420075370 +-0.80690001787756360230 +-0.80799233257138536324 +-0.80908375369747931316 +-0.81017428198717977850 +-0.81126391817134169138 +-0.81235266298034392030 +-0.81344051714407883402 +-0.81452748139196939903 +-0.81561355645295208205 +-0.81669874305549083893 +-0.81778304192756934299 +-0.81886645379669209532 +-0.81994897938988731134 +-0.82103061943370736486 +-0.82211137465422345905 +-0.82319124577703295387 +-0.82427023352725514727 +-0.82534833862953282946 +-0.82642556180803228294 +-0.82750190378644350453 +-0.82857736528798020537 +-0.82965194703538136523 +-0.83072564975090834594 +-0.83179847415634977637 +-0.83287042097301666743 +-0.83394149092174574278 +-0.83501168472289899469 +-0.83608100309636546044 +-0.83714944676155589320 +-0.83821701643741097776 +-0.83928371284239378092 +-0.84034953669449663494 +-0.84141448871123669662 +-0.84247856960965838979 +-0.84354178010633074081 +-0.84460412091735403983 +-0.84566559275835184728 +-0.84672619634447676695 +-0.84778593239040955787 +-0.84884480161035757995 +-0.84990280471805745854 +-0.85095994242677175379 +-0.85201621544929539986 +-0.85307162449794771142 +-0.85412617028458059920 +-0.85517985352057146464 +-0.85623267491682941710 +-0.85728463518379238728 +-0.85833573503142845951 +-0.85938597516923365127 +-0.86043535630623679822 +-0.86148387915099400303 +-0.86253154441159596288 +-0.86357835279565886566 +-0.86462430501033260555 +-0.86566940176230011694 +-0.86671364375777071309 +-0.86775703170249007812 +-0.86879956630173271748 +-0.86984124826030440047 +-0.87088207828254660114 +-0.87192205707232894873 +-0.87296118533305588905 +-0.87399946376766446399 +-0.87503689307862275726 +-0.87607347396793455729 +-0.87710920713713691477 +-0.87814409328729681192 +-0.87917813311902004436 +-0.88021132733244056290 +-0.88124367662723068761 +-0.88227518170259733310 +-0.88330584325727823369 +-0.88433566198954860482 +-0.88536463859721981073 +-0.88639277377763292520 +-0.88742006822767161012 +-0.88844652264374968098 +-0.88947213772181732416 +-0.89049691415736376143 +-0.89152085264541103271 +-0.89254395388051888105 +-0.89356621855678364241 +-0.89458764736783757954 +-0.89560824100685065829 +-0.89662800016653032564 +-0.89764692553911995532 +-0.89866501781640195645 +-0.89968227768969533109 +-0.90069870584985767259 +-0.90171430298728494357 +-0.90272906979190992161 +-0.90374300695320575194 +-0.90475611516018417113 +-0.90576839510139395273 +-0.90677984746492468204 +-0.90779047293840609001 +-0.90880027220900538865 +-0.90980924596342926947 +-0.91081739488792790027 +-0.91182471966828693155 +-0.91283122098983682235 +-0.91383689953744418055 +-0.91484175599552020053 +-0.91584579104801511207 +-0.91684900537841951262 +-0.91785139966976747594 +-0.91885297460463344343 +-0.91985373086513311236 +-0.92085366913292521218 +-0.92185279008921039434 +-0.92285109441473034408 +-0.92384858278977177726 +-0.92484525589416155533 +-0.92584111440727046016 +-0.92683615900801430421 +-0.92783039037484882350 +-0.92882380918577545081 +-0.92981641611833953931 +-0.93080821184962925230 +-0.93179919705627756166 +-0.93278937241446224782 +-0.93377873859990412342 +-0.93476729628787058601 +-0.93575504615317117718 +-0.93674198887016491000 +-0.93772812511275271952 +-0.93871345555438256980 +-0.93969798086804590120 +-0.94068170172628473580 +-0.94166461880118190741 +-0.94264673276436994342 +-0.94362804428702773407 +-0.94460855403987920020 +-0.94558826269319662394 +-0.94656717091679909437 +-0.94754527938005250753 +-0.94852258875187089870 +-0.94949909970071733056 +-0.95047481289459923026 +-0.95144972900107571689 +-0.95242384868725227243 +-0.95339717261978340623 +-0.95436970146487265509 +-0.95534143588827213911 +-0.95631237655528322783 +-0.95728252413075676230 +-0.95825187927909261099 +-0.95922044266424077996 +-0.96018821494970230113 +-0.96115519679852501334 +-0.96212138887331155601 +-0.96308679183621048736 +-0.96405140634892450002 +-0.96501523307270731245 +-0.96597827266835922799 +-0.96694052579623845922 +-0.96790199311624980361 +-0.96886267528785152692 +-0.96982257297005358687 +-0.97078168682141896539 +-0.97174001750006078204 +-0.97269756566364762307 +-0.97365433196939710214 +-0.97461031707408296576 +-0.97556552163403087441 +-0.97651994630511951279 +-0.97747359174278058980 +-0.97842645860200216923 +-0.97937854753732289659 +-0.98032985920283599590 +-0.98128039425219149017 +-0.98223015333859153841 +-0.98317913711479443251 +-0.98412734623311037829 +-0.98507478134540926717 +-0.98602144310311290454 +-0.98696733215719900656 +-0.98791244915820186634 +-0.98885679475621213186 +-0.98980036960087414144 +-0.99074317434139014260 +-0.99168520962652007000 +-0.99262647610457777070 +-0.99356697442343566706 +-0.99450670523052253635 +-0.99544566917282528706 +-0.99638386689688651643 +-0.99732129904880939542 +-0.99825796627425189556 +-0.99919386921843167393 +-1.00012900852612385272 +-1.00106338484166190739 +-1.00199699880894010917 +-1.00292985107140819601 +-1.00386194227207781182 +-1.00479327305351673338 +-1.00572384405785530959 +-1.00665365592678179851 +-1.00758270930154525402 +-1.00851100482295241711 +-1.00943854313137393319 +-1.01036532486673724662 +-1.01129135066853215186 +-1.01221662117580946116 +-1.01314113702718056054 +-1.01406489886081785379 +-1.01498790731445454050 +-1.01591016302538661442 +-1.01683166663047175327 +-1.01775241876612843051 +-1.01867242006833813583 +-1.01959167117264537517 +-1.02051017271415478405 +-1.02142792532753645673 +-1.02234492964702261553 +-1.02326118630640672258 +-1.02417669593904858694 +-1.02509145917787036772 +-1.02600547665535568598 +-1.02691874900355628597 +-1.02783127685408559593 +-1.02874306083812050439 +-1.02965410158640446880 +-1.03056439972924418491 +-1.03147395589651091896 +-1.03238277071764361636 +-1.03329084482164379466 +-1.03419817883707976236 +-1.03510477339208462055 +-1.03601062911435803926 +-1.03691574663116536925 +-1.03782012656933986250 +-1.03872376955527800924 +-1.03962667621494664338 +-1.04052884717387539304 +-1.04143028305716378590 +-1.04233098448947902881 +-1.04323095209505400938 +-1.04413018649768996049 +-1.04502868832075446193 +-1.04592645818718676942 +-1.04682349671949070924 +-1.04771980453974045133 +-1.04861538226957784481 +-1.04951023053021419429 +-1.05040434994242914968 +-1.05129774112657203844 +-1.05219040470256186559 +-1.05308234128988620348 +-1.05397355150760385634 +-1.05486403597434152957 +-1.05575379530829738250 +-1.05664283012724036226 +-1.05753114104850975963 +-1.05841872868901476501 +-1.05930559366523602272 +-1.06019173659322496484 +-1.06107715808860492146 +-1.06196185876657134273 +-1.06284583924188891224 +-1.06372910012889687614 +-1.06461164204150571244 +-1.06549346559319757510 +-1.06637457139702651610 +-1.06725496006562270424 +-1.06813463221118487567 +-1.06901358844548788340 +-1.06989182937987781230 +-1.07076935562527508772 +-1.07164616779217447551 +-1.07252226649064374975 +-1.07339765233032502501 +-1.07427232592043364612 +-1.07514628786976085273 +-1.07601953878667178088 +-1.07689207927910657325 +-1.07776390995457971300 +-1.07863503142018291037 +-1.07950544428257977358 +-1.08037514914801291432 +-1.08124414662229817452 +-1.08211243731083084363 +-1.08298002181857677684 +-1.08384690075008416343 +-1.08471307470947375684 +-1.08557854430044420369 +-1.08644331012627159971 +-1.08730737278980904570 +-1.08817073289348642540 +-1.08903339103931062759 +-1.08989534782886798858 +-1.09075660386332140561 +-1.09161715974341255730 +-1.09247701606946057140 +-1.09333617344136424521 +-1.09419463245859915901 +-1.09505239372022211697 +-1.09590945782486692828 +-1.09676582537074795987 +-1.09762149695565902618 +-1.09847647317697227898 +-1.09933075463164042773 +-1.10018434191619696172 +-1.10103723562675526182 +-1.10188943635900749030 +-1.10274094470822903169 +-1.10359176126927360784 +-1.10444188663657771876 +-1.10529132140415775609 +-1.10614006616561266760 +-1.10698812151412129268 +-1.10783548804244635910 +-1.10868216634293026424 +-1.10952815700749973793 +-1.11037346062766251187 +-1.11121807779450820775 +-1.11206200909871166793 +-1.11290525513052673823 +-1.11374781647979470556 +-1.11458969373593630436 +-1.11543088748795837795 +-1.11627139832445143597 +-1.11711122683358587970 +-1.11795037360312177199 +-1.11878883922039973342 +-1.11962662427234649343 +-1.12046372934547266986 +-1.12130015502587365717 +-1.12213590189922896023 +-1.12297097055080530303 +-1.12380536156545374205 +-1.12463907552761077646 +-1.12547211302129945842 +-1.12630447463012650644 +-1.12713616093728741241 +-1.12796717252556311095 +-1.12879750997732131168 +-1.12962717387451494488 +-1.13045616479868638038 +-1.13128448333096254252 +-1.13211213005205890703 +-1.13293910554227950094 +-1.13376541038151290586 +-1.13459104514923869722 +-1.13541601042452344750 +-1.13624030678602117028 +-1.13706393481197509665 +-1.13788689508021745311 +-1.13870918816816790731 +-1.13953081465283689866 +-1.14035177511082186363 +-1.14117207011831189867 +-1.14199170025108442950 +-1.14281066608450676547 +-1.14362896819353587752 +-1.14444660715272039653 +-1.14526358353619661656 +-1.14607989791769449006 +-1.14689555087053163263 +-1.14771054296761909619 +-1.14852487478145715016 +-1.14933854688413883416 +-1.15015155984734662731 +-1.15096391424235733325 +-1.15177561064003852742 +-1.15258664961084811296 +-1.15339703172483876159 +-1.15420675755165325072 +-1.15501582766053001450 +-1.15582424262029714868 +-1.15663200299937707349 +-1.15743910936578608961 +-1.15824556228713149153 +-1.15905136233061734075 +-1.15985651006303935873 +-1.16066100605078759145 +-1.16146485085984685348 +-1.16226804505579539573 +-1.16307058920380512745 +-1.16387248386864472494 +-1.16467372961467807713 +-1.16547432700586073295 +-1.16627427660574722879 +-1.16707357897748509323 +-1.16787223468381795577 +-1.16867024428708687900 +-1.16946760834922702799 +-1.17026432743177055684 +-1.17106040209584638667 +-1.17185583290217820718 +-1.17265062041108847346 +-1.17344476518249707375 +-1.17423826777591822079 +-1.17503112875046622499 +-1.17582334866485105351 +-1.17661492807738099486 +-1.17740586754596265884 +-1.17819616762810186472 +-1.17898582888089897835 +-1.17977485186105668369 +-1.18056323712487398758 +-1.18135098522824932843 +-1.18213809672668168638 +-1.18292457217526636448 +-1.18371041212870076187 +-1.18449561714127926670 +-1.18528018776689858527 +-1.18606412455905418923 +-1.18684742807084187000 +-1.18763009885495751661 +-1.18841213746369733784 +-1.18919354444895919443 +-1.18997432036224060070 +-1.19075446575464205523 +-1.19153398117686237789 +-1.19231286717920559326 +-1.19309112431157360312 +-1.19386875312347262579 +-1.19464575416401008745 +-1.19542212798189662060 +-1.19619787512544295538 +-1.19697299614256480460 +-1.19774749158077886690 +-1.19852136198720615745 +-1.19929460790857111974 +-1.20006722989119873901 +-1.20083922848102120362 +-1.20161060422357190980 +-1.20238135766398879234 +-1.20315148934701410255 +-1.20392099981699551847 +-1.20468988961788237013 +-1.20545815929323074656 +-1.20622580938620060920 +-1.20699284043955823442 +-1.20775925299567354898 +-1.20852504759652301658 +-1.20929022478368719540 +-1.21005478509835473488 +-1.21081872908131815691 +-1.21158205727297629828 +-1.21234477021333697522 +-1.21310686844200987800 +-1.21386835249821567473 +-1.21462922292077935005 +-1.21538948024813420190 +-1.21614912501832006519 +-1.21690815776898464406 +-1.21766657903738328983 +-1.21842438936037922304 +-1.21918158927444308937 +-1.21993817931565429191 +-1.22069416001969965890 +-1.22144953192187655233 +-1.22220429555708909319 +-1.22295845145985193625 +-1.22371200016428627322 +-1.22446494220412560594 +-1.22521727811271197162 +-1.22596900842299616485 +-1.22672013366753929198 +-1.22747065437851277103 +-1.22822057108769788769 +-1.22896988432648712752 +-1.22971859462588239964 +-1.23046670251649792327 +-1.23121420852855623096 +-1.23196111319189505195 +-1.23270741703595865246 +-1.23345312058980760561 +-1.23419822438211079785 +-1.23494272894115031391 +-1.23568663479482010459 +-1.23642994247062554258 +-1.23717265249568697527 +-1.23791476539673528379 +-1.23865628170011365938 +-1.23939720193177960184 +-1.24013752661730425331 +-1.24087725628187039995 +-1.24161639145027624664 +-1.24235493264693186433 +-1.24309288039586318675 +-1.24383023522070823574 +-1.24456699764472089598 +-1.24530316819076958268 +-1.24603874738133590938 +-1.24677373573851735244 +-1.24750813378402680698 +-1.24824194203919103252 +-1.24897516102495487189 +-1.24970779126187525598 +-1.25043983327012653284 +-1.25117128756950091173 +-1.25190215467940291205 +-1.25263243511885535852 +-1.25336212940649982528 +-1.25409123806058997452 +-1.25481976159899999423 +-1.25554770053921904704 +-1.25627505539835482296 +-1.25700182669313220707 +-1.25772801493989350163 +-1.25845362065459775991 +-1.25917864435282389479 +-1.25990308654976823632 +-1.26062694776024497578 +-1.26135022849868771999 +-1.26207292927914682679 +-1.26279505061529517818 +-1.26351659302042085287 +-1.26423755700743356556 +-1.26495794308886200241 +-1.26567775177685470922 +-1.26639698358317742688 +-1.26711563901922108499 +-1.26783371859599225395 +-1.26855122282411958423 +-1.26926815221385291821 +-1.26998450727506106972 +-1.27070028851723448859 +-1.27141549644948659292 +-1.27213013158054888407 +-1.27284419441877694190 +-1.27355768547214687203 +-1.27427060524825597199 +-1.27498295425432583983 +-1.27569473299719637893 +-1.27640594198333401366 +-1.27711658171882547208 +-1.27782665270938156077 +-1.27853615546033383410 +-1.27924509047663903516 +-1.27995345826287576507 +-1.28066125932324736958 +-1.28136849416158038473 +-1.28207516328132475891 +-1.28278126718555540720 +-1.28348680637696999085 +-1.28419178135789113782 +-1.28489619263026710883 +-1.28560004069567046514 +-1.28630332605529718037 +-1.28700604920997041525 +-1.28770821066013652079 +-1.28840981090586970126 +-1.28911085044686934964 +-1.28981132978245760512 +-1.29051124941158756876 +-1.29121060983283397761 +-1.29190941154440097627 +-1.29260765504411789806 +-1.29330534082944259566 +-1.29400246939745655617 +-1.29469904124487156238 +-1.29539505686802502993 +-1.29609051676288222765 +-1.29678542142503672174 +-1.29747977134970904345 +-1.29817356703174713317 +-1.29886680896562944909 +-1.29955949764545986014 +-1.30025163356497408529 +-1.30094321721753436449 +-1.30163424909613190117 +-1.30232472969338797242 +-1.30301465950155215268 +-1.30370403901250475620 +-1.30439286871775483867 +-1.30508114910844086332 +-1.30576888067533314342 +-1.30645606390882962344 +-1.30714269929896031996 +-1.30782878733538643345 +-1.30851432850739790581 +-1.30919932330391675102 +-1.30988377221349683310 +-1.31056767572432164570 +-1.31125103432420742067 +-1.31193384850060068558 +-1.31261611874058115035 +-1.31329784553086170718 +-1.31397902935778310152 +-1.31465967070732214772 +-1.31533977006508751018 +-1.31601932791632103559 +-1.31669834474589508844 +-1.31737682103831810210 +-1.31805475727772902772 +-1.31873215394790221922 +-1.31940901153224632303 +-1.32008533051380050338 +-1.32076111137524243588 +-1.32143635459887875960 +-1.32211106066665484704 +-1.32278523006014880892 +-1.32345886326057393667 +-1.32413196074877759223 +-1.32480452300524320641 +-1.32547655051008783644 +-1.32614804374306638479 +-1.32681900318356782442 +-1.32748942931061675310 +-1.32815932260287450362 +-1.32882868353863781152 +-1.32949751259584081353 +-1.33016581025205349320 +-1.33083357698448168094 +-1.33150081326996927444 +-1.33216751958499757258 +-1.33283369640568349901 +-1.33349934420778293287 +-1.33416446346668804424 +-1.33482905465743018070 +-1.33549311825467631465 +-1.33615665473273548258 +-1.33681966456555012535 +-1.33748214822670519197 +-1.33814410618942258857 +-1.33880553892656228854 +-1.33946644691062544119 +-1.34012683061375059701 +-1.34078669050771592808 +-1.34144602706393967217 +-1.34210484075348013278 +-1.34276313204703412474 +-1.34342090141494052702 +-1.34407814932717650791 +-1.34473487625336041162 +-1.34539108266275242443 +-1.34604676902425079987 +-1.34670193580639807607 +-1.34735658347737574658 +-1.34801071250500648091 +-1.34866432335675634491 +-1.34931741649973124808 +-1.34996999240067983017 +-1.35062205152599279501 +-1.35127359434170246644 +-1.35192462131348412058 +-1.35257513290665620787 +-1.35322512958617857670 +-1.35387461181665536003 +-1.35452358006233186671 +-1.35517203478709880038 +-1.35581997645448959489 +-1.35646740552768130250 +-1.35711432246949437186 +-1.35776072774239442431 +-1.35840662180848981144 +-1.35905200512953427960 +-1.35969687816692541560 +-1.36034124138170575691 +-1.36098509523456456805 +-1.36162844018583184535 +-1.36227127669548675470 +-1.36291360522315319059 +-1.36355542622810022024 +-1.36419674016924075133 +-1.36483754750513730514 +-1.36547784869399579932 +-1.36611764419366932266 +-1.36675693446165880118 +-1.36739571995510900138 +-1.36803400113081385925 +-1.36867177844521270558 +-1.36930905235439404066 +-1.36994582331409198162 +-1.37058209177968892689 +-1.37121785820621577834 +-1.37185312304834949870 +-1.37248788676041533208 +-1.37312214979638946843 +-1.37375591260989393660 +-1.37438917565419949085 +-1.37502193938222738723 +-1.37565420424654538678 +-1.37628597069937308461 +-1.37691723919257769104 +-1.37754801017767603000 +-1.37817828410583476106 +-1.37880806142787104562 +-1.37943734259425054844 +-1.38006612805509165653 +-1.38069441826016126029 +-1.38132221365887608577 +-1.38194951470030513718 +-1.38257632183316792052 +-1.38320263550583577583 +-1.38382845616632899066 +-1.38445378426232235114 +-1.38507862024113936883 +-1.38570296454975805389 +-1.38632681763480558601 +-1.38695017994256319938 +-1.38757305191896485042 +-1.38819543400959499735 +-1.38881732665969170881 +-1.38943873031414799613 +-1.39005964541750559604 +-1.39068007241396229823 +-1.39130001174736994685 +-1.39191946386123177604 +-1.39253842919870640671 +-1.39315690820260451588 +-1.39377490131539305551 +-1.39439240897919169981 +-1.39500943163577506567 +-1.39562596972657271266 +-1.39624202369266825485 +-1.39685759397480024901 +-1.39747268101336308277 +-1.39808728524840630847 +-1.39870140711963486524 +-1.39931504706640907898 +-1.39992820552774355214 +-1.40054088294231293688 +-1.40115307974844416350 +-1.40176479638412310180 +-1.40237603328699012017 +-1.40298679089434408240 +-1.40359706964313901700 +-1.40420686996998744789 +-1.40481619231115839597 +-1.40542503710257804528 +-1.40603340477983174139 +-1.40664129577816021666 +-1.40724871053246269881 +-1.40785564947729913143 +-1.40846211304688395671 +-1.40906810167509233267 +-1.40967361579545746864 +-1.41027865584117151343 +-1.41088322224508511127 +-1.41148731543970917812 +-1.41209093585721245923 +-1.41269408392942374952 +-1.41329676008783233776 +-1.41389896476358645216 +-1.41450069838749392659 +-1.41510196139002397686 +-1.41570275420130498034 +-1.41630307725112780659 +-1.41690293096894137648 +-1.41750231578385732512 +-1.41810123212464778142 +-1.41869968041974625628 +-1.41929766109724719847 +-1.41989517458490732693 +-1.42049222131014474257 +-1.42108880170003892829 +-1.42168491618133296939 +-1.42228056518043066703 +-1.42287574912339898070 +-1.42347046843596780619 +-1.42406472354352864329 +-1.42465851487113726037 +-1.42525184284351214004 +-1.42584470788503425709 +-1.42643711041974863285 +-1.42702905087136544537 +-1.42762052966325603265 +-1.42821154721845822166 +-1.42880210395967122139 +-1.42939220030926139593 +-1.42998183668925826773 +-1.43057101352135562777 +-1.43115973122691375607 +-1.43174799022695609096 +-1.43233579094217233774 +-1.43292313379291691433 +-1.43351001919921072769 +-1.43409644758074072968 +-1.43468241935685791866 +-1.43526793494658000405 +-1.43585299476859251655 +-1.43643759924124569949 +-1.43702174878255695134 +-1.43760544381021082572 +-1.43818868474155747705 +-1.43877147199361621333 +-1.43935380598307194333 +-1.43993568712627739714 +-1.44051711583925357019 +-1.44109809253768883508 +-1.44167861763693894162 +-1.44225869155202901517 +-1.44283831469765155830 +-1.44341748748816778303 +-1.44399621033760805489 +-1.44457448365966989456 +-1.44515230786772219673 +-1.44572968337480212142 +-1.44630661059361464993 +-1.44688308993653680368 +-1.44745912181561386944 +-1.44803470664256028755 +-1.44860984482876253843 +-1.44918453678527470174 +-1.44975878292282445159 +-1.45033258365180683924 +-1.45090593938229073245 +-1.45147885052401237616 +-1.45205131748638183176 +-1.45262334067847875829 +-1.45319492050905596514 +-1.45376605738653719158 +-1.45433675171901599654 +-1.45490700391425997751 +-1.45547681437970855001 +-1.45604618352247250357 +-1.45661511174933555601 +-1.45718359946675568573 +-1.45775164708086002463 +-1.45831925499745196362 +-1.45888642362200515734 +-1.45945315335966974146 +-1.46001944461526722563 +-1.46058529779329271392 +-1.46115071329791579302 +-1.46171569153297942201 +-1.46228023290200193074 +-1.46284433780817391124 +-1.46340800665436332473 +-1.46397123984310884026 +-1.46453403777662782836 +-1.46509640085681014376 +-1.46565832948522167811 +-1.46621982406310413793 +-1.46678088499137415646 +-1.46734151267062307156 +-1.46790170750112025644 +-1.46846146988280845669 +-1.46902080021530934140 +-1.46957969889791995044 +-1.47013816632961180630 +-1.47069620290903579907 +-1.47125380903451929981 +-1.47181098510406460633 +-1.47236773151535427218 +-1.47292404866574599964 +-1.47347993695227508226 +-1.47403539677165618116 +-1.47459042852028021642 +-1.47514503259421614345 +-1.47569920938921317344 +-1.47625295930069633243 +-1.47680628272377090227 +-1.47735918005321953395 +-1.47791165168350469017 +-1.47846369800876886735 +-1.47901531942283126497 +-1.47956651631919289258 +-1.48011728909103235097 +-1.48066763813120938487 +-1.48121756383226399478 +-1.48176706658641466063 +-1.48231614678556056219 +-1.48286480482128313341 +-1.48341304108484051127 +-1.48396085596717641764 +-1.48450824985891149943 +-1.48505522315034954595 +-1.48560177623147593451 +-1.48614790949195607617 +-1.48669362332113630387 +-1.48723891810804875746 +-1.48778379424140183573 +-1.48832825210959107665 +-1.48887229210069071961 +-1.48941591460245970069 +-1.48995912000233854400 +-1.49050190868745113804 +-1.49104428104460295934 +-1.49158623746028418111 +-1.49212777832066834094 +-1.49266890401161056445 +-1.49320961491865089599 +-1.49374991142701429858 +-1.49428979392160687922 +-1.49482926278702166201 +-1.49536831840753348111 +-1.49590696116710364372 +-1.49644519144937770960 +-1.49698300963768504701 +-1.49752041611504038698 +-1.49805741126414471154 +-1.49859399546738192299 +-1.49913016910682395100 +-1.49966593256422742186 +-1.50020128622103410265 +-1.50073623045837223344 +-1.50127076565705630529 +-1.50180489219758639408 +-1.50233861046015038099 +-1.50287192082462262022 +-1.50340482367056216262 +-1.50393731937721764069 +-1.50446940832352371586 +-1.50500109088810152258 +-1.50553236744926133284 +-1.50606323838500055778 +-1.50659370407300419181 +-1.50712376489064325824 +-1.50765342121498124861 +-1.50818267342276612908 +-1.50871152189043700176 +-1.50923996699411944178 +-1.50976800910962882796 +-1.51029564861246901053 +-1.51082288587783430955 +-1.51134972128060773855 +-1.51187615519536056041 +-1.51240218799635517399 +-1.51292782005754289365 +-1.51345305175256616970 +-1.51397788345475636795 +-1.51450231553713554611 +-1.51502634837241600962 +-1.51554998233300297628 +-1.51607321779098791481 +-1.51659605511815698264 +-1.51711849468598680701 +-1.51764053686564448498 +-1.51816218202798913772 +-1.51868343054357102240 +-1.51920428278263219823 +-1.51972473911510919109 +-1.52024479991062722029 +-1.52076446553850486154 +-1.52128373636775338085 +-1.52180261276707740059 +-1.52232109510487356729 +-1.52283918374923166184 +-1.52335687906793459945 +-1.52387418142845865177 +-1.52439109119797322478 +-1.52490760874334174702 +-1.52542373443112166953 +-1.52593946862756402183 +-1.52645481169861296777 +-1.52696976400991024647 +-1.52748432592678784481 +-1.52799849781427532491 +-1.52851228003709582737 +-1.52902567295966784755 +-1.52953867694610501360 +-1.53005129236021542027 +-1.53056351956550340532 +-1.53107535892517043763 +-1.53158681080211067638 +-1.53209787555891518984 +-1.53260855355787350973 +-1.53311884516096830211 +-1.53362875072988069647 +-1.53413827062598717710 +-1.53464740521036224763 +-1.53515615484377576649 +-1.53566451988669627760 +-1.53617250069928923395 +-1.53668009764141633156 +-1.53718731107263861801 +-1.53769414135221316187 +-1.53820058883909727143 +-1.53870665389194294370 +-1.53921233686910441385 +-1.53971763812863082777 +-1.54022255802827201521 +-1.54072709692547693550 +-1.54123125517739056889 +-1.54173503314085991178 +-1.54223843117243042400 +-1.54274144962834691697 +-1.54324408886455244350 +-1.54374634923669251663 +-1.54424823110011000260 +-1.54474973480984889562 +-1.54525086072065365173 +-1.54575160918696941081 +-1.54625198056293977622 +-1.54675197520241169968 +-1.54725159345893148455 +-1.54775083568574700621 +-1.54824970223580749007 +-1.54874819346176284540 +-1.54924630971596566376 +-1.54974405135046877646 +-1.55024141871702791917 +-1.55073841216710039959 +-1.55123503205184687381 +-1.55173127872212868184 +-1.55222715252850940182 +-1.55272265382125795874 +-1.55321778295034418349 +-1.55371254026544036719 +-1.55420692611592370369 +-1.55470094085087362501 +-1.55519458481907202341 +-1.55568785836900769226 +-1.55618076184886988678 +-1.55667329560655387510 +-1.55716545998965871789 +-1.55765725534548682418 +-1.55814868202104572781 +-1.55863974036304786530 +-1.55913043071791057592 +-1.55962075343175587960 +-1.56011070885041047696 +-1.56060029731940685949 +-1.56108951918398286551 +-1.56157837478908234630 +-1.56206686447935449991 +-1.56255498859915475940 +-1.56304274749254390464 +-1.56353014150328939458 +-1.56401717097486581132 +-1.56450383625045397196 +-1.56499013767294092858 +-1.56547607558492107849 +-1.56596165032869527600 +-1.56644686224627283089 +-1.56693171167936995403 +-1.56741619896940997947 +-1.56790032445752380852 +-1.56838408848455101996 +-1.56886749139103898187 +-1.56935053351724329573 +-1.56983321520312779640 +-1.57031553678836521826 +-1.57079749861233586294 +-1.57127910101412981980 +-1.57176034433254674383 +-1.57224122890609452341 +-1.57272175507299105668 +-1.57320192317116269720 +-1.57368173353824736260 +-1.57416118651158987163 +-1.57464028242824882753 +-1.57511902162498884650 +-1.57559740443828899537 +-1.57607543120433568617 +-1.57655310225902689503 +-1.57703041793797238412 +-1.57750737857649170337 +-1.57798398450961574468 +-1.57846023607208674200 +-1.57893613359835982557 +-1.57941167742259858109 +-1.57988686787868126693 +-1.58036170530019681735 +-1.58083619002044595270 +-1.58131032237244273375 +-1.58178410268891322943 +-1.58225753130229462862 +-1.58273060854473990311 +-1.58320333474811136831 +-1.58367571024398756663 +-1.58414773536365771633 +-1.58461941043812637453 +-1.58509073579811055055 +-1.58556171177404126027 +-1.58603233869606352613 +-1.58650261689403593302 +-1.58697254669753107237 +-1.58744212843583754058 +-1.58791136243795594218 +-1.58838024903260310872 +-1.58884878854821098848 +-1.58931698131292575837 +-1.58978482765460871207 +-1.59025232790083670409 +-1.59071948237890170574 +-1.59118629141581191533 +-1.59165275533829198018 +-1.59211887447277922192 +-1.59258464914543074187 +-1.59305007968211809199 +-1.59351516640843060557 +-1.59397990964967273264 +-1.59444430973086603842 +-1.59490836697674898126 +-1.59537208171177757876 +-1.59583545426012474167 +-1.59629848494568049588 +-1.59676117409205353681 +-1.59722352202256767661 +-1.59768552906026783944 +-1.59814719552791517643 +-1.59860852174798884207 +-1.59906950804268710442 +-1.59953015473392667900 +-1.59999046214334184057 +-1.60045043059228708771 +-1.60091006040183470027 +-1.60136935189277762603 +-1.60182830538562681610 +-1.60228692120061300130 +-1.60274519965768580398 +-1.60320314107651595847 +-1.60366074577649442290 +-1.60411801407673015873 +-1.60457494629605479375 +-1.60503154275301751497 +-1.60548780376589039776 +-1.60594372965266618536 +-1.60639932073105740074 +-1.60685457731849812291 +-1.60730949973214309878 +-1.60776408828886996361 +-1.60821834330527568824 +-1.60867226509768057596 +-1.60912585398212715226 +-1.60957911027437794438 +-1.61003203428991836788 +-1.61048462634395717075 +-1.61093688675142487909 +-1.61138881582697468531 +-1.61184041388498222602 +-1.61229168123954713643 +-1.61274261820449016369 +-1.61319322509335760785 +-1.61364350221941799113 +-1.61409344989566339024 +-1.61454306843480965838 +-1.61499235814929664734 +-1.61544131935128798538 +-1.61588995235267285366 +-1.61633825746506332166 +-1.61678623499979545741 +-1.61723388526793177000 +-1.61768120858025854503 +-1.61812820524728806504 +-1.61857487557925594501 +-1.61902121988612535120 +-1.61946723847758144998 +-1.61991293166303984563 +-1.62035829975163814254 +-1.62080334305224083025 +-1.62124806187344017161 +-1.62169245652355220599 +-1.62213652731062096812 +-1.62258027454241604559 +-1.62302369852643391113 +-1.62346679956990014304 +-1.62390957797976343002 +-1.62435203406270356474 +-1.62479416812512433843 +-1.62523598047315931403 +-1.62567747141266849553 +-1.62611864124924143660 +-1.62655949028819257762 +-1.62700001883456812912 +-1.62744022719313963243 +-1.62788011566840884470 +-1.62831968456460507433 +-1.62875893418568828963 +-1.62919786483534423382 +-1.62963647681699042025 +-1.63007477043377213555 +-1.63051274598856488218 +-1.63095040378397326819 +-1.63138774412233233946 +-1.63182476730570491519 +-1.63226147363588669492 +-1.63269786341440159561 +-1.63313393694250463817 +-1.63356969452118128139 +-1.63400513645114697781 +-1.63444026303284850599 +-1.63487507456646374848 +-1.63530957135190191387 +-1.63574375368880287063 +-1.63617762187653714712 +-1.63661117621420926227 +-1.63704441700065306264 +-1.63747734453443483105 +-1.63790995911385439676 +-1.63834226103694335919 +-1.63877425060146264535 +-1.63920592810490939328 +-1.63963729384451140092 +-1.64006834811723134493 +-1.64049909121976278392 +-1.64092952344853260094 +-1.64135964509970233571 +-1.64178945646916485401 +-1.64221895785254923261 +-1.64264814954521609636 +-1.64307703184226094884 +-1.64350560503851417238 +-1.64393386942853769739 +-1.64436182530663099755 +-1.64478947296682598278 +-1.64521681270289077403 +-1.64564384480832659463 +-1.64607056957637043482 +-1.64649698729999505176 +-1.64692309827190785931 +-1.64734890278455203827 +-1.64777440113010631428 +-1.64819959360048473584 +-1.64862448048733756245 +-1.64904906208205126461 +-1.64947333867574852384 +-1.64989731055928845471 +-1.65032097802326704894 +-1.65074434135801606516 +-1.65116740085360436119 +-1.65159015679983833813 +-1.65201260948626149627 +-1.65243475920215399100 +-1.65285660623653440915 +-1.65327815087815821471 +-1.65369939341551885903 +-1.65412033413684711469 +-1.65454097333011329596 +-1.65496131128302526037 +-1.65538134828302929691 +-1.65580108461730879377 +-1.65622052057278801307 +-1.65663965643612876022 +-1.65705849249373260434 +-1.65747702903173999012 +-1.65789526633603045980 +-1.65831320469222309733 +-1.65873084438567652832 +-1.65914818570149069643 +-1.65956522892450242246 +-1.65998197433929117750 +-1.66039842223017530820 +-1.66081457288121403515 +-1.66123042657620811902 +-1.66164598359869697397 +-1.66206124423196199835 +-1.66247620875902635262 +-1.66289087746265251688 +-1.66330525062534584357 +-1.66371932852935255909 +-1.66413311145666109603 +-1.66454659968899987277 +-1.66495979350784173434 +-1.66537269319439884541 +-1.66578529902962846343 +-1.66619761129422894186 +-1.66660963026863973013 +-1.66702135623304537049 +-1.66743278946737261137 +-1.66784393025128974131 +-1.66825477886420925344 +-1.66866533558528784553 +-1.66907560069342464359 +-1.66948557446726164599 +-1.66989525718518638797 +-1.67030464912532816690 +-1.67071375056556337135 +-1.67112256178350904179 +-1.67153108305652930987 +-1.67193931466173162370 +-1.67234725687596874621 +-1.67275490997583720088 +-1.67316227423767949212 +-1.67356934993758299512 +-1.67397613735137973379 +-1.67438263675464882319 +-1.67478884842271225075 +-1.67519477263064087147 +-1.67560040965324907880 +-1.67600575976509880149 +-1.67641082324049639496 +-1.67681560035349663806 +-1.67722009137789962452 +-1.67762429658725231718 +-1.67802821625484854806 +-1.67843185065372813014 +-1.67883520005667974395 +-1.67923826473623849509 +-1.67964104496468680239 +-1.68004354101405439792 +-1.68044575315611899313 +-1.68084768166240627885 +-1.68124932680419014730 +-1.68165068885249224806 +-1.68205176807808287620 +-1.68245256475147964004 +-1.68285307914295101384 +-1.68325331152251234101 +-1.68365326215992827663 +-1.68405293132471300943 +-1.68445231928613003980 +-1.68485142631319195772 +-1.68525025267465955459 +-1.68564879863904604207 +-1.68604706447461194507 +-1.68644505044937043081 +-1.68684275683108131361 +-1.68724018388725749418 +-1.68763733188516096284 +-1.68803420109180457587 +-1.68843079177395316570 +-1.68882710419811998825 +-1.68922313863057205197 +-1.68961889533732434465 +-1.69001437458414649484 +-1.69040957663655699861 +-1.69080450175982877070 +-1.69119915021898292729 +-1.69159352227879589137 +-1.69198761820379384169 +-1.69238143825825582134 +-1.69277498270621462595 +-1.69316825181145413914 +-1.69356124583751110890 +-1.69395396504767470347 +-1.69434640970498850976 +-1.69473858007224786881 +-1.69513047641200231830 +-1.69552209898655381615 +-1.69591344805795918305 +-1.69630452388802810404 +-1.69669532673832423875 +-1.69708585687016588750 +-1.69747611454462465908 +-1.69786610002252658091 +-1.69825581356445298731 +-1.69864525543073940916 +-1.69903442588147579606 +-1.69942332517650762647 +-1.69981195357543501956 +-1.70020031133761317932 +-1.70058839872215372679 +-1.70097621598792203557 +-1.70136376339354078446 +-1.70175104119738818120 +-1.70213804965759796239 +-1.70252478903205961558 +-1.70291125957841993355 +-1.70329746155408123798 +-1.70368339521620404398 +-1.70406906082170284122 +-1.70445445862725120101 +-1.70483958888927866759 +-1.70522445186397320072 +-1.70560904780727895513 +-1.70599337697489672472 +-1.70637743962228727312 +-1.70676123600466689290 +-1.70714476637701162431 +-1.70752803099405414677 +-1.70791103011028577718 +-1.70829376397995735815 +-1.70867623285707637137 +-1.70905843699540938019 +-1.70944037664848291769 +-1.70982205206958126631 +-1.71020346351174845623 +-1.71058461122778782126 +-1.71096549547026155480 +-1.71134611649149137591 +-1.71172647454355897345 +-1.71210656987830711628 +-1.71248640274733587852 +-1.71286597340200819062 +-1.71324528209344495444 +-1.71362432907252859593 +-1.71400311458990306512 +-1.71438163889597161571 +-1.71475990224089902547 +-1.71513790487460959788 +-1.71551564704679182505 +-1.71589312900689372476 +-1.71627035100412372870 +-1.71664731328745423511 +-1.71702401610561694589 +-1.71740045970710841772 +-1.71777664434018384476 +-1.71815257025286349801 +-1.71852823769292850642 +-1.71890364690792263325 +-1.71927879814515360835 +-1.71965369165168979748 +-1.72002832767436331096 +-1.72040270645977044772 +-1.72077682825427014102 +-1.72115069330398418046 +-1.72152430185479743407 +-1.72189765415236051282 +-1.72227075044208532972 +-1.72264359096914954073 +-1.72301617597849521246 +-1.72338850571482637974 +-1.72376058042261348646 +-1.72413240034609183127 +-1.72450396572925912508 +-1.72487527681588082018 +-1.72524633384948522519 +-1.72561713707336616963 +-1.72598768673058411416 +-1.72635798306396281987 +-1.72672802631609378921 +-1.72709781672933271324 +-1.72746735454580191416 +-1.72783664000738967914 +-1.72820567335574915013 +-1.72857445483230276473 +-1.72894298467823648302 +-1.72931126313450467258 +-1.72967929044182722187 +-1.73004706684069131661 +-1.73041459257135166183 +-1.73078186787383025980 +-1.73114889298791529981 +-1.73151566815316448888 +-1.73188219360889972265 +-1.73224846959421485693 +-1.73261449634796860231 +-1.73298027410878940913 +-1.73334580311507324701 +-1.73371108360498382694 +-1.73407611581645482168 +-1.73444089998718742329 +-1.73480543635465234154 +-1.73516972515608847161 +-1.73553376662850489254 +-1.73589756100867820265 +-1.73626110853315607230 +-1.73662440943825502337 +-1.73698746396006131754 +-1.73735027233443117822 +-1.73771283479698990249 +-1.73807515158313341530 +-1.73843722292802937979 +-1.73879904906661364450 +-1.73916063023359379613 +-1.73952196666344782727 +-1.73988305859042391432 +-1.74024390624854241594 +-1.74060450987159454073 +-1.74096486969314145909 +-1.74132498594651785595 +-1.74168485886482837799 +-1.74204448868094941005 +-1.74240387562753062944 +-1.74276301993699167525 +-1.74312192184152636720 +-1.74348058157309915295 +-1.74383899936344755055 +-1.74419717544408214849 +-1.74455511004628416316 +-1.74491280340111210023 +-1.74527025573939176262 +-1.74562746729172668658 +-1.74598443828849125836 +-1.74634116895983382278 +-1.74669765953567757144 +-1.74705391024571698999 +-1.74740992131942318721 +-1.74776569298603789981 +-1.74812122547458081989 +-1.74847651901384337769 +-1.74883157383239229432 +-1.74918639015856847152 +-1.74954096822048832394 +-1.74989530824604200276 +-1.75024941046289561619 +-1.75060327509849011918 +-1.75095690238004220163 +-1.75131029253454295613 +-1.75166344578876009841 +-1.75201636236923663503 +-1.75236904250229130753 +-1.75272148641401903646 +-1.75307369433029136552 +-1.75342566647675579539 +-1.75377740307883533966 +-1.75412890436173052322 +-1.75448017055042027046 +-1.75483120186965679821 +-1.75518199854397227710 +-1.75553256079767461273 +-1.75588288885484922197 +-1.75623298293935925507 +-1.75658284327484603971 +-1.75693247008472686055 +-1.75728186359219940016 +-1.75763102402023663196 +-1.75797995159159192724 +-1.75832864652879594658 +-1.75867710905415775002 +-1.75902533938976524119 +-1.75937333775748494524 +-1.75972110437896267499 +-1.76006863947562264272 +-1.76041594326866857045 +-1.76076301597908391194 +-1.76110985782763029839 +-1.76145646903484998091 +-1.76180284982106538649 +-1.76214900040637778567 +-1.76249492101066929095 +-1.76284061185360041435 +-1.76318607315461450824 +-1.76353130513293399062 +-1.76387630800756189942 +-1.76422108199728233657 +-1.76456562732066046806 +-1.76490994419604141363 +-1.76525403284155202321 +-1.76559789347510109891 +-1.76594152631437939505 +-1.76628493157685650949 +-1.76662810947978599074 +-1.76697106024020289539 +-1.76731378407492445426 +-1.76765628120054940631 +-1.76799855183345977494 +-1.76834059618981820350 +-1.76868241448557217410 +-1.76902400693645067697 +-1.76936537375796509863 +-1.76970651516541077619 +-1.77004743137386566509 +-1.77038812259819211548 +-1.77072858905303354149 +-1.77106883095281952833 +-1.77140884851176116932 +-1.77174864194385506266 +-1.77208821146288153514 +-1.77242755728240397595 +-1.77276667961577061305 +-1.77310557867611562344 +-1.77344425467635469218 +-1.77378270782919078563 +-1.77412093834711059870 +-1.77445894644238566507 +-1.77479673232707302333 +-1.77513429621301543904 +-1.77547163831184007243 +-1.77580875883496025480 +-1.77614565799357526643 +-1.77648233599866833821 +-1.77681879306101153659 +-1.77715502939116154479 +-1.77749104519946077296 +-1.77782684069603891253 +-1.77816241609081138186 +-1.77849777159348154676 +-1.77883290741353805586 +-1.77916782376025728318 +-1.77950252084270332809 +-1.77983699886972712711 +-1.78017125804996467764 +-1.78050529859184303305 +-1.78083912070357541779 +-1.78117272459316211553 +-1.78150611046839202345 +-1.78183927853684243026 +-1.78217222900587857204 +-1.78250496208265341025 +-1.78283747797410940805 +-1.78316977688697653193 +-1.78350185902777558233 +-1.78383372460281308669 +-1.78416537381818685049 +-1.78449680687978351479 +-1.78482802399327944443 +-1.78515902536413939572 +-1.78548981119761851488 +-1.78582038169876033962 +-1.78615073707240079592 +-1.78648087752316397925 +-1.78681080325546415288 +-1.78714051447350685820 +-1.78747001138128691622 +-1.78779929418259131424 +-1.78812836308099543103 +-1.78845721827986792185 +-1.78878585998236694365 +-1.78911428839144215353 +-1.78944250370983470866 +-1.78977050614007748841 +-1.79009829588449309590 +-1.79042587314519807684 +-1.79075323812410047708 +-1.79108039102289851030 +-1.79140733204308411075 +-1.79173406138594160097 +-1.79206057925254635954 +-1.79238688584376815172 +-1.79271298136026691061 +-1.79303886600249740013 +-1.79336453997070743860 +-1.79369000346493612241 +-1.79401525668501782285 +-1.79434029983057863333 +-1.79466513310103881196 +-1.79498975669561167123 +-1.79531417081330624264 +-1.79563837565292283571 +-1.79596237141305747897 +-1.79628615829210014354 +-1.79660973648823452109 +-1.79693310619943891204 +-1.79725626762348689169 +-1.79757922095794664408 +-1.79790196640018073992 +-1.79822450414734613666 +-1.79854683439639684295 +-1.79886895734407925573 +-1.79919087318693926569 +-1.79951258212131448566 +-1.79983408434334091197 +-1.80015538004894803947 +-1.80047646943386308038 +-1.80079735269360852179 +-1.80111803002350479019 +-1.80143850161866536652 +-1.80175876767400278133 +-1.80207882838422617233 +-1.80239868394384106232 +-1.80271833454714935918 +-1.80303778038824980001 +-1.80335702166104017152 +-1.80367605855921420144 +-1.80399489127626244667 +-1.80431352000547517989 +-1.80463194493993861478 +-1.80495016627253734853 +-1.80526818419595413978 +-1.80558599890267035271 +-1.80590361058496484681 +-1.80622101943491508713 +-1.80653822564439892062 +-1.80685522940508902501 +-1.80717203090846023628 +-1.80748863034578555187 +-1.80780502790813635272 +-1.80812122378638506781 +-1.80843721817120051121 +-1.80875301125305432137 +-1.80906860322221541004 +-1.80938399426875395903 +-1.80969918458253942184 +-1.81001417435324074567 +-1.81032896377032881396 +-1.81064355302307333773 +-1.81095794230054396579 +-1.81127213179161405954 +-1.81158612168495536388 +-1.81189991216903867333 +-1.81221350343214071543 +-1.81252689566233549101 +-1.81284008904749871505 +-1.81315308377531003714 +-1.81346588003324837857 +-1.81377847800859370864 +-1.81409087788843037536 +-1.81440307985964310866 +-1.81471508410891857466 +-1.81502689082274781818 +-1.81533850018742093368 +-1.81564991238903328252 +-1.81596112761348105202 +-1.81627214604646525231 +-1.81658296787348705337 +-1.81689359327985400228 +-1.81720402245067380598 +-1.81751425557085921625 +-1.81782429282512647539 +-1.81813413439799487215 +-1.81844378047378718577 +-1.81875323123663079627 +-1.81906248687045657420 +-1.81937154755900065695 +-1.81968041348580111816 +-1.81998908483420263060 +-1.82029756178735402372 +-1.82060584452820872770 +-1.82091393323952321914 +-1.82122182810386212815 +-1.82152952930359268713 +-1.82183703702088828358 +-1.82214435143772779391 +-1.82245147273589624959 +-1.82275840109698150648 +-1.82306513670238001801 +-1.82337167973329372650 +-1.82367803037072984118 +-1.82398418879550194838 +-1.82429015518823001152 +-1.82459592972934059318 +-1.82490151259906685510 +-1.82520690397744833611 +-1.82551210404433073009 +-1.82581711297936966076 +-1.82612193096202313214 +-1.82642655817156152054 +-1.82673099478705780463 +-1.82703524098739711334 +-1.82733929695126828818 +-1.82764316285716965638 +-1.82794683888340792066 +-1.82825032520809704906 +-1.82855362200915894100 +-1.82885672946432542574 +-1.82915964775113426555 +-1.82946237704693381865 +-1.82976491752888059672 +-1.83006726937393948695 +-1.83036943275888486227 +-1.83067140786030058131 +-1.83097319485457910027 +-1.83127479391792169494 +-1.83157620522634090321 +-1.83187742895565719436 +-1.83217846528150185570 +-1.83247931437931632637 +-1.83277997642435042103 +-1.83308045159166632665 +-1.83338074005613416162 +-1.83368084199243708277 +-1.83398075757506773265 +-1.83428048697832779546 +-1.83458003037633288201 +-1.83487938794300675660 +-1.83517855985208666603 +-1.83547754627712000897 +-1.83577634739146500209 +-1.83607496336829090211 +-1.83637339438058178054 +-1.83667164060112986235 +-1.83696970220254174322 +-1.83726757935723439275 +-1.83756527223743781896 +-1.83786278101519462425 +-1.83816010586235933921 +-1.83845724695059775655 +-1.83875420445139181602 +-1.83905097853603316516 +-1.83934756937562782220 +-1.83964397714109439974 +-1.83994020200316410474 +-1.84023624413238429121 +-1.84053210369911224298 +-1.84082778087352161300 +-1.84112327582559753836 +-1.84141858872514130319 +-1.84171371974176634190 +-1.84200866904490179188 +-1.84230343680379005100 +-1.84259802318748766581 +-1.84289242836486666377 +-1.84318665250461410920 +-1.84348069577523032692 +-1.84377455834503201082 +-1.84406824038214955941 +-1.84436174205452974029 +-1.84465506352993480199 +-1.84494820497594202990 +-1.84524116655994374625 +-1.84553394844914886441 +-1.84582655081058155666 +-1.84611897381108258642 +-1.84641121761730842010 +-1.84670328239573189322 +-1.84699516831264221040 +-1.84728687553414561151 +-1.84757840422616448350 +-1.84786975455443736038 +-1.84816092668452136571 +-1.84845192078178888195 +-1.84874273701143154724 +-1.84903337553845537045 +-1.84932383652768672633 +-1.84961412014376791468 +-1.84990422655116026895 +-1.85019415591414082556 +-1.85048390839680609865 +-1.85077348416307119194 +-1.85106288337666757826 +-1.85135210620114709634 +-1.85164115279987950835 +-1.85193002333605205578 +-1.85221871797267123583 +-1.85250723687256368954 +-1.85279558019837375937 +-1.85308374811256548753 +-1.85337174077742172784 +-1.85365955835504481186 +-1.85394720100735721502 +-1.85423466889610066843 +-1.85452196218283593687 +-1.85480908102894570533 +-1.85509602559563058222 +-1.85538279604391243005 +-1.85566939253463325521 +-1.85595581522845542999 +-1.85624206428586235873 +-1.85652813986715781169 +-1.85681404213246592505 +-1.85709977124173231111 +-1.85738532735472272606 +-1.85767071063102662265 +-1.85795592123005248730 +-1.85824095931103050461 +-1.85852582503301411165 +-1.85881051855487577917 +-1.85909504003531234062 +-1.85937938963284232763 +-1.85966356750580374957 +-1.85994757381236142102 +-1.86023140871049807998 +-1.86051507235802104923 +-1.86079856491256090401 +-1.86108188653157013981 +-1.86136503737232406053 +-1.86164801759192188868 +-1.86193082734728410088 +-1.86221346679515642464 +-1.86249593609210695178 +-1.86277823539452791479 +-1.86306036485863502072 +-1.86334232464046745115 +-1.86362411489588852831 +-1.86390573578058660331 +-1.86418718745007239157 +-1.86446847005968141531 +-1.86474958376457622400 +-1.86503052871973928895 +-1.86531130507998166301 +-1.86559191299993765156 +-1.86587235263406681085 +-1.86615262413665394803 +-1.86643272766180867706 +-1.86671266336346564074 +-1.86699243139538628711 +-1.86727203191115687098 +-1.86755146506418867602 +-1.86783073100772067932 +-1.86810982989481577654 +-1.86838876187836411269 +-1.86866752711108152774 +-1.86894612574551199913 +-1.86922455793402342294 +-1.86950282382881161070 +-1.86978092358189940114 +-1.87005885734513643825 +-1.87033662527019872712 +-1.87061422750858930009 +-1.87089166421164021514 +-1.87116893553050833710 +-1.87144604161618000049 +-1.87172298261946856712 +-1.87199975869101553627 +-1.87227636998128987855 +-1.87255281664058759183 +-1.87282909881903547600 +-1.87310521666658669204 +-1.87338117033302298253 +-1.87365695996795400546 +-1.87393258572082133107 +-1.87420804774089022615 +-1.87448334617725897999 +-1.87475848117885335320 +-1.87503345289442879817 +-1.87530826147256979297 +-1.87558290706168939721 +-1.87585738981003191661 +-1.87613170986567046050 +-1.87640586737650805205 +-1.87667986249027674006 +-1.87695369535454026355 +-1.87722736611669183127 +-1.87750087492395478783 +-1.87777422192338283580 +-1.87804740726186070177 +-1.87832043108610391435 +-1.87859329354265747192 +-1.87886599477789983936 +-1.87913853493803784112 +-1.87941091416911199019 +-1.87968313261699271344 +-1.87995519042738057358 +-1.88022708774581115421 +-1.88049882471764884251 +-1.88077040148809104814 +-1.88104181820216664889 +-1.88131307500473821115 +-1.88158417204049754901 +-1.88185510945397194149 +-1.88212588738951924761 +-1.88239650599132968267 +-1.88266696540342759469 +-1.88293726576966968800 +-1.88320740723374524528 +-1.88347738993917634964 +-1.88374721402931966097 +-1.88401687964736352932 +-1.88428638693633110357 +-1.88455573603907788893 +-1.88482492709829552169 +-1.88509396025650555195 +-1.88536283565606721524 +-1.88563155343917276952 +-1.88590011374784727316 +-1.88616851672395169359 +-1.88643676250918046478 +-1.88670485124506392971 +-1.88697278307296589794 +-1.88724055813408586602 +-1.88750817656945768519 +-1.88777563851995089372 +-1.88804294412627005073 +-1.88831009352895429210 +-1.88857708686837999501 +-1.88884392428475700321 +-1.88911060591813262377 +-1.88937713190838918464 +-1.88964350239524514485 +-1.88990971751825553859 +-1.89017577741681064296 +-1.89044168223013819841 +-1.89070743209730118828 +-1.89097302715719983723 +-1.89123846754857138919 +-1.89150375340999077345 +-1.89176888487986816223 +-1.89203386209645008087 +-1.89229868519782384872 +-1.89256335432191025170 +-1.89282786960647131380 +-1.89309223118910185946 +-1.89335643920724017164 +-1.89362049379815644556 +-1.89388439509896455704 +-1.89414814324661229250 +-1.89441173837788756629 +-1.89467518062941620016 +-1.89493847013766170129 +-1.89520160703892792675 +-1.89546459146935575291 +-1.89572742356492618399 +-1.89599010346145835371 +-1.89625263129461130163 +-1.89651500719988130861 +-1.89677723131260700384 +-1.89703930376796425783 +-1.89730122470096884690 +-1.89756299424647689733 +-1.89782461253918333099 +-1.89808607971362519606 +-1.89834739590417722610 +-1.89860856124505561482 +-1.89886957587031557360 +-1.89913043991385466214 +-1.89939115350941012395 +-1.89965171679055933041 +-1.89991212989072133510 +-1.90017239294315576359 +-1.90043250608096347953 +-1.90069246943708569653 +-1.90095228314430619854 +-1.90121194733524978560 +-1.90147146214238249584 +-1.90173082769801182756 +-1.90199004413428784943 +-1.90224911158320209026 +-1.90250803017658820515 +-1.90276680004612197550 +-1.90302542132332175306 +-1.90328389413954801590 +-1.90354221862600292425 +-1.90380039491373342919 +-1.90405842313362771989 +-1.90431630341641744408 +-1.90457403589267615374 +-1.90483162069282241369 +-1.90508905794711624893 +-1.90534634778566269731 +-1.90560349033840892297 +-1.90586048573514710291 +-1.90611733410551154044 +-1.90637403557898221784 +-1.90663059028488146573 +-1.90688699835237596147 +-1.90714325991047917164 +-1.90739937508804557886 +-1.90765534401377534479 +-1.90791116681621386597 +-1.90816684362375044159 +-1.90842237456461960576 +-1.90867775976690179363 +-1.90893299935852089888 +-1.90918809346724693832 +-1.90944304222069449750 +-1.90969784574632539531 +-1.90995250417144446509 +-1.91020701762320532779 +-1.91046138622860439682 +-1.91071561011448620704 +-1.91096968940753964006 +-1.91122362423430192102 +-1.91147741472115506589 +-1.91173106099432832394 +-1.91198456317989573527 +-1.91223792140377990556 +-1.91249113579175000766 +-1.91274420646942133750 +-1.91299713356225753458 +-1.91324991719556791736 +-1.91350255749450992582 +-1.91375505458408756709 +-1.91400740858915430209 +-1.91425961963440882663 +-1.91451168784439884618 +-1.91476361334352151999 +-1.91501539625601813199 +-1.91526703670598164031 +-1.91551853481735223639 +-1.91576989071391712294 +-1.91602110451931340052 +-1.91627217635702717935 +-1.91652310635039202502 +-1.91677389462259051278 +-1.91702454129665600391 +-1.91727504649546798277 +-1.91752541034175827406 +-1.91777563295810482558 +-1.91802571446693770341 +-1.91827565499053620535 +-1.91852545465102730660 +-1.91877511357039098883 +-1.91902463187045291271 +-1.91927400967289307765 +-1.91952324709923982660 +-1.91977234427087095625 +-1.92002130130901571547 +-1.92027011833475413916 +-1.92051879546901682616 +-1.92076733283258405116 +-1.92101573054608820712 +-1.92126398873001225098 +-1.92151210750469014776 +-1.92176008699030753668 +-1.92200792730690062093 +-1.92225562857435816611 +-1.92250319091241994585 +-1.92275061444067740801 +-1.92299789927857389671 +-1.92324504554540487433 +-1.92349205336031880975 +-1.92373892284231362559 +-1.92398565411024335958 +-1.92423224728281150320 +-1.92447870247857455439 +-1.92472501981594334985 +-1.92497119941317995639 +-1.92521724138839989138 +-1.92546314585957234478 +-1.92570891294451862485 +-1.92595454276091326840 +-1.92620003542628426274 +-1.92644539105801571033 +-1.92669060977334138940 +-1.92693569168935119329 +-1.92718063692298779976 +-1.92742544559104955759 +-1.92767011781018693384 +-1.92791465369690517839 +-1.92815905336756543420 +-1.92840331693838096250 +-1.92864744452542113962 +-1.92889143624460945858 +-1.92913529221172486139 +-1.92937901254240062876 +-1.92962259735212504630 +-1.92986604675624207061 +-1.93010936086995088523 +-1.93035253980830567855 +-1.93059558368621564384 +-1.93083849261844830991 +-1.93108126671962332388 +-1.93132390610421866839 +-1.93156641088656666483 +-1.93180878118085863626 +-1.93205101710113846813 +-1.93229311876130926962 +-1.93253508627512893270 +-1.93277691975621301879 +-1.93301861931803231620 +-1.93326018507391705903 +-1.93350161713705159805 +-1.93374291562047928572 +-1.93398408063709892346 +-1.93422511229966831436 +-1.93446601072080293093 +-1.93470677601297347259 +-1.93494740828851052861 +-1.93518790765960146949 +-1.93542827423829111311 +-1.93566850813648350105 +-1.93590860946594012226 +-1.93614857833828080125 +-1.93638841486498347599 +-1.93662811915738464208 +-1.93686769132668046289 +-1.93710713148392366101 +-1.93734643974002751499 +-1.93758561620576386098 +-1.93782466099176331475 +-1.93806357420851504969 +-1.93830235596636946127 +-1.93854100637553483644 +-1.93877952554607957403 +-1.93901791358793085251 +-1.93925617061087640636 +-1.93949429672456385987 +-1.93973229203850050517 +-1.93997015666205441242 +-1.94020789070445287550 +-1.94044549427478374426 +-1.94068296748199675683 +-1.94092031043489909869 +-1.94115752324216273017 +-1.94139460601231705894 +-1.94163155885375360299 +-1.94186838187472554651 +-1.94210507518334685173 +-1.94234163888759159278 +-1.94257807309529728634 +-1.94281437791416089489 +-1.94305055345174304549 +-1.94328659981546403301 +-1.94352251711260715084 +-1.94375830545031802465 +-1.94399396493560372434 +-1.94422949567533431825 +-1.94446489777623954254 +-1.94470017134491479638 +-1.94493531648781758925 +-1.94517033331126643070 +-1.94540522192144305080 +-1.94563998242439306630 +-1.94587461492602420421 +-1.94610911953210807823 +-1.94634349634827841236 +-1.94657774548003370541 +-1.94681186703273456651 +-1.94704586111160660167 +-1.94727972782173730515 +-1.94751346726807961218 +-1.94774707955544945648 +-1.94798056478852776863 +-1.94821392307185847770 +-1.94844715450985050964 +-1.94868025920677712115 +-1.94891323726677478945 +-1.94914608879384787521 +-1.94937881389186218328 +-1.94961141266454962562 +-1.94984388521550711104 +-1.95007623164819587913 +-1.95030845206594372065 +-1.95054054657194386735 +-1.95077251526925299352 +-1.95100435826079410262 +-1.95123607564935630521 +-1.95146766753759570712 +-1.95169913402803230085 +-1.95193047522305196395 +-1.95216169122490779131 +-1.95239278213571987308 +-1.95262374805747196405 +-1.95285458909201703470 +-1.95308530534107327448 +-1.95331589690622520195 +-1.95354636388892410892 +-1.95377670639049050294 +-1.95400692451210833411 +-1.95423701835483099032 +-1.95446698801957996494 +-1.95469683360714130416 +-1.95492655521817093600 +-1.95515615295319045153 +-1.95538562691258976933 +-1.95561497719662935602 +-1.95584420390543289869 +-1.95607330713899507657 +-1.95630228699717889640 +-1.95653114357971436021 +-1.95675987698620046373 +-1.95698848731610408613 +-1.95721697466876176641 +-1.95744533914337903724 +-1.95767358083902798249 +-1.95790169985465167812 +-1.95812969628906241581 +-1.95835757024094037071 +-1.95858532180883626594 +-1.95881295109116848607 +-1.95904045818622640773 +-1.95926784319216951147 +-1.95949510620702582742 +-1.95972224732869282349 +-1.95994926665494006990 +-1.96017616428340479828 +-1.96040294031159567645 +-1.96062959483689236428 +-1.96085612795654351537 +-1.96108253976766855331 +-1.96130883036725833790 +-1.96153499985217383283 +-1.96176104831914810411 +-1.96198697586478254529 +-1.96221278258555287266 +-1.96243846857780379622 +-1.96266403393775146213 +-1.96288947876148500704 +-1.96311480314496322741 +-1.96334000718401813224 +-1.96356509097435272260 +-1.96379005461154121370 +-1.96401489819103192147 +-1.96423962180814326572 +-1.96446422555806599064 +-1.96468870953586516315 +-1.96491307383647617613 +-1.96513731855470630272 +-1.96536144378523980336 +-1.96558544962262815581 +-1.96580933616129960306 +-1.96603310349555404635 +-1.96625675171956482146 +-1.96648028092737758854 +-1.96670369121291255254 +-1.96692698266996268686 +-1.96715015539219528762 +-1.96737320947314953123 +-1.96759614500624113731 +-1.96781896208475681753 +-1.96804166080185893861 +-1.96826424125058441206 +-1.96848670352384313986 +-1.96870904771441956882 +-1.96893127391497402279 +-1.96915338221803937202 +-1.96937537271602458588 +-1.96959724550121251241 +-1.96981900066576098851 +-1.97004063830170417226 +-1.97026215850094965631 +-1.97048356135528179855 +-1.97070484695635816941 +-1.97092601539571399272 +-1.97114706676475925917 +-1.97136800115477828221 +-1.97158881865693413893 +-1.97180951936226311894 +-1.97203010336167849914 +-1.97225057074597032170 +-1.97247092160580272946 +-1.97269115603171885098 +-1.97291127411413746984 +-1.97313127594335191439 +-1.97335116160953538689 +-1.97357093120273541231 +-1.97379058481287739113 +-1.97401012252976326700 +-1.97422954444307374722 +-1.97444885064236475003 +-1.97466804121706940300 +-1.97488711625649937531 +-1.97510607584984443363 +-1.97532492008617133195 +-1.97554364905442336742 +-1.97576226284342371109 +-1.97598076154187141107 +-1.97619914523834605546 +-1.97641741402130399763 +-1.97663556797908013252 +-1.97685360719988700851 +-1.97707153177181726988 +-1.97728934178284121437 +-1.97750703732080790331 +-1.97772461847344560582 +-1.97794208532836224279 +-1.97815943797304294449 +-1.97837667649485338117 +-1.97859380098103865286 +-1.97881081151872173507 +-1.97902770819490791965 +-1.97924449109647992984 +-1.97946116031020036274 +-1.97967771592271235548 +-1.97989415802053847493 +-1.98011048669008227208 +-1.98032670201762561746 +-1.98054280408933425228 +-1.98075879299124957278 +-1.98097466880929706790 +-1.98119043162928165636 +-1.98140608153688835280 +-1.98162161861768426618 +-1.98183704295711660137 +-1.98205235464051376937 +-1.98226755375308538731 +-1.98248264037992294462 +-1.98269761460599780456 +-1.98291247651616431291 +-1.98312722619515779954 +-1.98334186372759480044 +-1.98355638919797483410 +-1.98377080269067906926 +-1.98398510428996921462 +-1.98419929407999062754 +-1.98441337214477120376 +-1.98462733856821915701 +-1.98484119343412834802 +-1.98505493682617228934 +-1.98526856882790814218 +-1.98548208952277605022 +-1.98569549899409913962 +-1.98590879732508351907 +-1.98612198459881805768 +-1.98633506089827482910 +-1.98654802630631044380 +-1.98676088090566294042 +-1.98697362477895445032 +-1.98718625800869319598 +-1.98739878067726705169 +-1.98761119286695087105 +-1.98782349465990226811 +-1.98803568613816361577 +-1.98824776738366026940 +-1.98845973847820345348 +-1.98867159950348781905 +-1.98888335054109188782 +-1.98909499167248049467 +-1.98930652297900145697 +-1.98951794454188868322 +-1.98972925644226128483 +-1.98994045876112135574 +-1.99015155157935863528 +-1.99036253497774606736 +-1.99057340903694424128 +-1.99078417383749628478 +-1.99099482945983385918 +-1.99120537598427183035 +-1.99141581349101270959 +-1.99162614206014398910 +-1.99183636177163969627 +-1.99204647270535883941 +-1.99225647494104829427 +-1.99246636855834013957 +-1.99267615363675254514 +-1.99288583025569154827 +-1.99309539849444949944 +-1.99330485843220439612 +-1.99351421014802188125 +-1.99372345372085457704 +-1.99393258922954319523 +-1.99414161675281276231 +-1.99435053636927839271 +-1.99455934815744218014 +-1.99476805219569208738 +-1.99497664856230505492 +-1.99518513733544522459 +-1.99539351859316549387 +-1.99560179241340507339 +-1.99580995887399281763 +-1.99601801805264433831 +-1.99622597002696466895 +-1.99643381487444604439 +-1.99664155267247012127 +-1.99684918349830686779 +-1.99705670742911411963 +-1.99726412454194002244 +-1.99747143491371970114 +-1.99767863862127970087 +-1.99788573574133332400 +-1.99809272635048440492 +-1.99829961052522464549 +-1.99850638834193805593 +-1.99871305987689429351 +-1.99891962520625598998 +-1.99912608440607342253 +-1.99933243755228784444 +-1.99953868472072948670 +-1.99974482598711889025 +-1.99995086142706801624 +-2.00015679111607758145 +-2.00036261512953839059 +-2.00056833354273333470 +-2.00077394643083472658 +-2.00097945386890563313 +-2.00118485593189987526 +-2.00139015269466247204 +-2.00159534423192964070 +-2.00180043061832746432 +-2.00200541192837411231 +-2.00221028823648072859 +-2.00241505961694565841 +-2.00261972614396288606 +-2.00282428789161537352 +-2.00302874493387905730 +-2.00323309734462151610 +-2.00343734519760197088 +-2.00364148856647306118 +-2.00384552752477640425 +-2.00404946214594836817 +-2.00425329250331740738 +-2.00445701867010450670 +-2.00466064071942184910 +-2.00486415872427548024 +-2.00506757275756442027 +-2.00527088289207977567 +-2.00547408920050518333 +-2.00567719175541947507 +-2.00588019062929223679 +-2.00608308589448780523 +-2.00628587762326260346 +-2.00648856588776824950 +-2.00669115076004933584 +-2.00689363231204342952 +-2.00709601061558196022 +-2.00729828574239110850 +-2.00750045776409091758 +-2.00770252675219396110 +-2.00790449277811022810 +-2.00810635591314046167 +-2.00830811622848193210 +-2.00850977379522666055 +-2.00871132868436008678 +-2.00891278096676195730 +-2.00911413071320898993 +-2.00931537799437132108 +-2.00951652288081383801 +-2.00971756544299795522 +-2.00991850575127894984 +-2.01011934387590907036 +-2.01032007988703353973 +-2.01052071385469544040 +-2.01072124584883304976 +-2.01092167593927850788 +-2.01112200419576225841 +-2.01132223068791082810 +-2.01152235548524327413 +-2.01172237865717917771 +-2.01192230027303153861 +-2.01212212040201166019 +-2.01232183911322604075 +-2.01252145647567726172 +-2.01272097255826709628 +-2.01292038742979029209 +-2.01311970115894256494 +-2.01331891381431304922 +-2.01351802546439140329 +-2.01371703617756070415 +-2.01391594602210588505 +-2.01411475506620352149 +-2.01431346337793382162 +-2.01451207102526907988 +-2.01471057807608389112 +-2.01490898459814804511 +-2.01510729065912919111 +-2.01530549632659372605 +-2.01550360166800768269 +-2.01570160675073184464 +-2.01589951164202840772 +-2.01609731640905609495 +-2.01629502111887193294 +-2.01649262583843480456 +-2.01669013063459789947 +-2.01688753557411626360 +-2.01708484072364324646 +-2.01728204614972916886 +-2.01747915191882798425 +-2.01767615809728795284 +-2.01787306475135963524 +-2.01806987194719189560 +-2.01826657975083367802 +-2.01846318822823267425 +-2.01865969744523887641 +-2.01885610746759880385 +-2.01905241836096038810 +-2.01924863019087208471 +-2.01944474302278198508 +-2.01964075692203781642 +-2.01983667195388916227 +-2.02003248818348435378 +-2.02022820567587357843 +-2.02042382449600754768 +-2.02061934470873616476 +-2.02081476637881340963 +-2.02101008957088934537 +-2.02120531434952033223 +-2.02140044077916103404 +-2.02159546892416708275 +-2.02179039884879729883 +-2.02198523061720969451 +-2.02217996429346635878 +-2.02237459994152724008 +-2.02256913762526036038 +-2.02276357740842893662 +-2.02295791935470159473 +-2.02315216352764837282 +-2.02334630999074160940 +-2.02354035880735549924 +-2.02373431004076786976 +-2.02392816375415707242 +-2.02412192001060597946 +-2.02431557887309798716 +-2.02450914040452056852 +-2.02470260466766438512 +-2.02489597172522239887 +-2.02508924163978987210 +-2.02528241447386836427 +-2.02547549028985818254 +-2.02566846915006637531 +-2.02586135111670273545 +-2.02605413625187935622 +-2.02624682461761373986 +-2.02643941627582568898 +-2.02663191128833908294 +-2.02682430971688321009 +-2.02701661162309010322 +-2.02720881706849498372 +-2.02740092611454070237 +-2.02759293882257018993 +-2.02778485525383489474 +-2.02797667546948767736 +-2.02816839953058680734 +-2.02836002749809729551 +-2.02855155943288645304 +-2.02874299539572833240 +-2.02893433544729973050 +-2.02912557964818640599 +-2.02931672805887552968 +-2.02950778073976101368 +-2.02969873775114306724 +-2.02988959915322642047 +-2.03008036500612298880 +-2.03027103536984787624 +-2.03046161030432381622 +-2.03065208986937939528 +-2.03084247412474816485 +-2.03103276313007130582 +-2.03122295694489540807 +-2.03141305562867335865 +-2.03160305924076389772 +-2.03179296784043428303 +-2.03198278148685629318 +-2.03217250023910978030 +-2.03236212415618044957 +-2.03255165329696252385 +-2.03274108772025474678 +-2.03293042748476437964 +-2.03311967264910675723 +-2.03330882327180351155 +-2.03349787941128301583 +-2.03368684112588393731 +-2.03387570847384813177 +-2.03406448151332908125 +-2.03425316030238745313 +-2.03444174489898887970 +-2.03463023536101017541 +-2.03481863174623622825 +-2.03500693411235777930 +-2.03519514251697586360 +-2.03538325701759870157 +-2.03557127767164436349 +-2.03575920453643766095 +-2.03594703766921369947 +-2.03613477712711610224 +-2.03632242296719701002 +-2.03650997524641708125 +-2.03669743402164771240 +-2.03688479934966748530 +-2.03707207128716483169 +-2.03725924989073847726 +-2.03744633521689566535 +-2.03763332732205482145 +-2.03782022626254066822 +-2.03800703209459088683 +-2.03819374487435167609 +-2.03838036465787952878 +-2.03856689150114078757 +-2.03875332546001253320 +-2.03893966659028080812 +-2.03912591494764328104 +-2.03931207058770702645 +-2.03949813356599074510 +-2.03968410393792254354 +-2.03986998175884304274 +-2.04005576708400138131 +-2.04024145996855876817 +-2.04042706046758892668 +-2.04061256863607365375 +-2.04079798452890814886 +-2.04098330820089834958 +-2.04116853970676137564 +-2.04135367910112686118 +-2.04153872643853340207 +-2.04172368177343477313 +-2.04190854516019326681 +-2.04209331665308679860 +-2.04227799630630180161 +-2.04246258417393722340 +-2.04264708031000630228 +-2.04283148476843301466 +-2.04301579760305473954 +-2.04320001886761870580 +-2.04338414861578776538 +-2.04356818690113595238 +-2.04375213377715159169 +-2.04393598929723285806 +-2.04411975351469266116 +-2.04430342648275775730 +-2.04448700825456652908 +-2.04467049888317164985 +-2.04485389842153786333 +-2.04503720692254553626 +-2.04522042443898488528 +-2.04540355102356485872 +-2.04558658672890292252 +-2.04576953160753438610 +-2.04595238571190440879 +-2.04613514909437732570 +-2.04631782180722687770 +-2.04650040390264376100 +-2.04668289543273074216 +-2.04686529644950709894 +-2.04704760700490551173 +-2.04722982715077339577 +-2.04741195693887378937 +-2.04759399642088180116 +-2.04777594564838949509 +-2.04795780467290500226 +-2.04813957354584852411 +-2.04832125231855677328 +-2.04850284104228252957 +-2.04868433976819286357 +-2.04886574854737002482 +-2.04904706743081321818 +-2.04922829646943505111 +-2.04940943571406641865 +-2.04959048521545161847 +-2.04977144502425279171 +-2.04995231519104592621 +-2.05013309576632485332 +-2.05031378680050035967 +-2.05049438834389574637 +-2.05067490044675526661 +-2.05085532315923613211 +-2.05103565653141384217 +-2.05121590061327951915 +-2.05139605545474212889 +-2.05157612110562848073 +-2.05175609761567834255 +-2.05193598503455199022 +-2.05211578341182621088 +-2.05229549279699341469 +-2.05247511323946518758 +-2.05265464478856962671 +-2.05283408749355311684 +-2.05301344140357810986 +-2.05319270656772490113 +-2.05337188303499340591 +-2.05355097085429960657 +-2.05372997007447910534 +-2.05390888074428268339 +-2.05408770291238385042 +-2.05426643662736818641 +-2.05444508193774577620 +-2.05462363889194055133 +-2.05480210753829828363 +-2.05498048792507992388 +-2.05515878010046826319 +-2.05533698411256349203 +-2.05551510000938497669 +-2.05569312783887081508 +-2.05587106764887739274 +-2.05604891948718337957 +-2.05622668340148218036 +-2.05640435943938948427 +-2.05658194764844104441 +-2.05675944807608912512 +-2.05693686076970783105 +-2.05711418577659133078 +-2.05729142314395208047 +-2.05746857291892348840 +-2.05764563514855858273 +-2.05782260987983089962 +-2.05799949715963270691 +-2.05817629703477900094 +-2.05835300955200350970 +-2.05852963475796046922 +-2.05870617269922506765 +-2.05888262342229300117 +-2.05905898697358002991 +-2.05923526339942464247 +-2.05941145274608450322 +-2.05958755505973822864 +-2.05976357038648671960 +-2.05993949877235182910 +-2.06011534026327591818 +-2.06029109490512318814 +-2.06046676274367923654 +-2.06064234382465238937 +-2.06081783819366970434 +-2.06099324589628318805 +-2.06116856697796491105 +-2.06134380148410878419 +-2.06151894946003189091 +-2.06169401095097315491 +-2.06186898600209289611 +-2.06204387465847371885 +-2.06221867696512228818 +-2.06239339296696577719 +-2.06256802270885453154 +-2.06274256623556251355 +-2.06291702359178508175 +-2.06309139482214165540 +-2.06326567997117304998 +-2.06343987908334458581 +-2.06361399220304475577 +-2.06378801937458389304 +-2.06396196064219683564 +-2.06413581605004070596 +-2.06430958564219846352 +-2.06448326946267313176 +-2.06465686755539445940 +-2.06483037996421403548 +-2.06500380673290928613 +-2.06517714790517947776 +-2.06535040352464793756 +-2.06552357363486516206 +-2.06569665827930082358 +-2.06586965750135398423 +-2.06604257134434510235 +-2.06621539985151958518 +-2.06638814306604867710 +-2.06656080103102590684 +-2.06673337378947197251 +-2.06690586138433163299 +-2.06707826385847326378 +-2.06725058125469196568 +-2.06742281361570778841 +-2.06759496098416484244 +-2.06776702340263396351 +-2.06793900091361004812 +-2.06811089355951471802 +-2.06828270138269321166 +-2.06845442442541926908 +-2.06862606272989024703 +-2.06879761633823067157 +-2.06896908529248868547 +-2.06914046963464137718 +-2.06931176940659122820 +-2.06948298465016433667 +-2.06965411540711663463 +-2.06982516171912811487 +-2.06999612362780682773 +-2.07016700117468532838 +-2.07033779440122511772 +-2.07050850334881220149 +-2.07067912805876197524 +-2.07084966857231478343 +-2.07102012493063769583 +-2.07119049717482672790 +-2.07136078534590417632 +-2.07153098948481861896 +-2.07170110963244713531 +-2.07187114582959530651 +-2.07204109811699321853 +-2.07221096653529990306 +-2.07238075112510422571 +-2.07255045192692044509 +-2.07272006898119220963 +-2.07288960232828811669 +-2.07305905200850926207 +-2.07322841806208213455 +-2.07339770052916083642 +-2.07356689944983196838 +-2.07373601486410441552 +-2.07390504681192133773 +-2.07407399533314906748 +-2.07424286046758910018 +-2.07441164225496521567 +-2.07458034073493413629 +-2.07474895594708153013 +-2.07491748793091801417 +-2.07508593672588892431 +-2.07525430237136632172 +-2.07542258490664943693 +-2.07559078437096955483 +-2.07575890080348868239 +-2.07592693424329421958 +-2.07609488472940606485 +-2.07626275230077528278 +-2.07643053699627877506 +-2.07659823885472594185 +-2.07676585791485646126 +-2.07693339421533762490 +-2.07710084779477099914 +-2.07726821869168487567 +-2.07743550694453960048 +-2.07760271259172490943 +-2.07776983567156214860 +-2.07793687622230338619 +-2.07810383428212919199 +-2.07827070988915396654 +-2.07843750308142150018 +-2.07860421389690763760 +-2.07877084237351583695 +-2.07893738854908605163 +-2.07910385246138496029 +-2.07927023414811351643 +-2.07943653364690161922 +-2.07960275099531344267 +-2.07976888623084255059 +-2.07993493939091411704 +-2.08010091051288625863 +-2.08026679963404959040 +-2.08043260679162456128 +-2.08059833202276589503 +-2.08076397536455770521 +-2.08092953685401882424 +-2.08109501652810013894 +-2.08126041442368237000 +-2.08142573057758140109 +-2.08159096502654605843 +-2.08175611780725455802 +-2.08192118895632116704 +-2.08208617851029131884 +-2.08225108650564338930 +-2.08241591297878869682 +-2.08258065796607327869 +-2.08274532150377345019 +-2.08290990362810068959 +-2.08307440437520119403 +-2.08323882378115010638 +-2.08340316188196084113 +-2.08356741871357620255 +-2.08373159431187771062 +-2.08389568871267538697 +-2.08405970195171663661 +-2.08422363406468047486 +-2.08438748508718330044 +-2.08455125505477090186 +-2.08471494400292733928 +-2.08487855196706828309 +-2.08504207898254723119 +-2.08520552508464662722 +-2.08536889030858763050 +-2.08553217468952567515 +-2.08569537826255002599 +-2.08585850106268422266 +-2.08602154312488785592 +-2.08618450448405567954 +-2.08634738517501538979 +-2.08651018523253251047 +-2.08667290469130639607 +-2.08683554358597112000 +-2.08699810195109680677 +-2.08716057982119007619 +-2.08732297723069137874 +-2.08748529421397766015 +-2.08764753080536102914 +-2.08780968703909142192 +-2.08797176294935127316 +-2.08813375857026217730 +-2.08829567393588044766 +-2.08845750908019800463 +-2.08861926403714370792 +-2.08878093884058291252 +-2.08894253352431746862 +-2.08910404812208527758 +-2.08926548266755984784 +-2.08942683719435340350 +-2.08958811173601466393 +-2.08974930632602751146 +-2.08991042099781276775 +-2.09007145578473085834 +-2.09023241072007692765 +-2.09039328583708394760 +-2.09055408116892316173 +-2.09071479674870008836 +-2.09087543260946340240 +-2.09103598878419072449 +-2.09119646530580638455 +-2.09135686220716676686 +-2.09151717952106741549 +-2.09167741728024170200 +-2.09183757551736126956 +-2.09199765426503558885 +-2.09215765355581195806 +-2.09231757342217683515 +-2.09247741389655184108 +-2.09263717501130219745 +-2.09279685679872784476 +-2.09295645929106655103 +-2.09311598252049835267 +-2.09327542651913844907 +-2.09343479131904297574 +-2.09359407695220500756 +-2.09375328345055944368 +-2.09391241084597634625 +-2.09407145917026893400 +-2.09423042845518647681 +-2.09438931873241873660 +-2.09454813003359507917 +-2.09470686239028447417 +-2.09486551583399416288 +-2.09502409039617276676 +-2.09518258610820629073 +-2.09534100300142300810 +-2.09549934110708946378 +-2.09565760045641313880 +-2.09581578108054022991 +-2.09597388301055786997 +-2.09613190627749323980 +-2.09628985091231445637 +-2.09644771694592924050 +-2.09660550440918491688 +-2.09676321333287107862 +-2.09692084374771781086 +-2.09707839568439435851 +-2.09723586917351179082 +-2.09739326424562166906 +-2.09755058093121693474 +-2.09770781926073102142 +-2.09786497926453829876 +-2.09802206097295629306 +-2.09817906441624035807 +-2.09833598962458989234 +-2.09849283662814567464 +-2.09864960545698764349 +-2.09880629614114022630 +-2.09896290871056745431 +-2.09911944319517607127 +-2.09927589962481464525 +-2.09943227802927312453 +-2.09958857843828416989 +-2.09974480088152137824 +-2.09990094538860239126 +-2.10005701198908534266 +-2.10021300071247196684 +-2.10036891158820449022 +-2.10052474464567096035 +-2.10068049991419769640 +-2.10083617742305728271 +-2.10099177720146457204 +-2.10114729927857535330 +-2.10130274368348901604 +-2.10145811044524855049 +-2.10161339959284054757 +-2.10176861115519297840 +-2.10192374516117874705 +-2.10207880163961302600 +-2.10223378061925458837 +-2.10238868212880580799 +-2.10254350619691265933 +-2.10269825285216471755 +-2.10285292212309604665 +-2.10300751403818164675 +-2.10316202862584500366 +-2.10331646591444920702 +-2.10347082593230449987 +-2.10362510870766206139 +-2.10377931426872111231 +-2.10393344264362225360 +-2.10408749386045146323 +-2.10424146794723965215 +-2.10439536493196177602 +-2.10454918484253639122 +-2.10470292770682876338 +-2.10485659355264642656 +-2.10501018240774451229 +-2.10516369429982130868 +-2.10531712925652136903 +-2.10547048730543195916 +-2.10562376847408838643 +-2.10577697278997044705 +-2.10593010028050064975 +-2.10608315097305132113 +-2.10623612489493616806 +-2.10638902207341738304 +-2.10654184253570120333 +-2.10669458630894013140 +-2.10684725342023293493 +-2.10699984389662331452 +-2.10715235776510256827 +-2.10730479505260470674 +-2.10745715578601444662 +-2.10760943999215832889 +-2.10776164769781315655 +-2.10791377892969888919 +-2.10806583371448352793 +-2.10821781207878089504 +-2.10836971404915329842 +-2.10852153965210709075 +-2.10867328891409666625 +-2.10882496186152268436 +-2.10897655852073473426 +-2.10912807891802689397 +-2.10927952307964172718 +-2.10943089103176761867 +-2.10958218280054277116 +-2.10973339841204987621 +-2.10988453789232099922 +-2.11003560126733535895 +-2.11018658856301755122 +-2.11033749980524376610 +-2.11048833501983468253 +-2.11063909423255946507 +-2.11078977746913754032 +-2.11094038475523193554 +-2.11109091611645771636 +-2.11124137157837576950 +-2.11139175116649635555 +-2.11154205490627733255 +-2.11169228282312415601 +-2.11184243494239387573 +-2.11199251128938803035 +-2.11214251188935930870 +-2.11229243676750799708 +-2.11244228594898419971 +-2.11259205945888561828 +-2.11274175732225888424 +-2.11289137956410044694 +-2.11304092620935657365 +-2.11319039728291979685 +-2.11333979280963557557 +-2.11348911281429518993 +-2.11363835732164062620 +-2.11378752635636590895 +-2.11393661994310955166 +-2.11408563810646432657 +-2.11423458087096882707 +-2.11438344826111501717 +-2.11453224030134201428 +-2.11468095701604053005 +-2.11482959842955020591 +-2.11497816456616094527 +-2.11512665545011335766 +-2.11527507110559831460 +-2.11542341155675606146 +-2.11557167682767843786 +-2.11571986694240621318 +-2.11586798192493219517 +-2.11601602179919900948 +-2.11616398658910043196 +-2.11631187631848094455 +-2.11645969101113484712 +-2.11660743069081025425 +-2.11675509538120198982 +-2.11690268510595869245 +-2.11705019988868148317 +-2.11719763975291952462 +-2.11734500472217446188 +-2.11749229481990086654 +-2.11763951006950223999 +-2.11778665049433589829 +-2.11793371611771030771 +-2.11808070696288330836 +-2.11822762305306788733 +-2.11837446441142729370 +-2.11852123106107592676 +-2.11866792302508200052 +-2.11881454032646443508 +-2.11896108298819507709 +-2.11910755103319692338 +-2.11925394448434678552 +-2.11940026336447351341 +-2.11954650769635755125 +-2.11969267750273315798 +-2.11983877280628441042 +-2.11998479362965230877 +-2.12013073999542722703 +-2.12027661192615468622 +-2.12042240944433180161 +-2.12056813257240772685 +-2.12071378133278720668 +-2.12085935574782702417 +-2.12100485583983777715 +-2.12115028163108032544 +-2.12129563314377289629 +-2.12144091040008486715 +-2.12158611342214120654 +-2.12173124223201714500 +-2.12187629685174483640 +-2.12202127730330891708 +-2.12216618360864739401 +-2.12231101578965386523 +-2.12245577386817307897 +-2.12260045786600670681 +-2.12274506780490890279 +-2.12288960370658941201 +-2.12303406559271090615 +-2.12317845348488987156 +-2.12332276740469927390 +-2.12346700737366633760 +-2.12361117341327076957 +-2.12375526554494964415 +-2.12389928379009251813 +-2.12404322817004498347 +-2.12418709870610777912 +-2.12433089541953545876 +-2.12447461833153905530 +-2.12461826746328386051 +-2.12476184283588986901 +-2.12490534447043355470 +-2.12504877238794609440 +-2.12519212660941336779 +-2.12533540715577906610 +-2.12547861404794025120 +-2.12562174730675046419 +-2.12576480695301794910 +-2.12590779300750876146 +-2.12605070549094365973 +-2.12619354442399854932 +-2.12633630982730670311 +-2.12647900172145787323 +-2.12662162012699473834 +-2.12676416506442134136 +-2.12690663655419331945 +-2.12704903461672589771 +-2.12719135927238900408 +-2.12733361054151037806 +-2.12747578844437201795 +-2.12761789300121639812 +-2.12775992423223980765 +-2.12790188215759590307 +-2.12804376679739482014 +-2.12818557817170628255 +-2.12832731630055471683 +-2.12846898120392102882 +-2.12861057290174571222 +-2.12875209141392440770 +-2.12889353676031145568 +-2.12903490896071856397 +-2.12917620803491436376 +-2.12931743400262574184 +-2.12945858688353562016 +-2.12959966669728739674 +-2.12974067346347917251 +-2.12988160720166996853 +-2.13002246793137395287 +-2.13016325567206576963 +-2.13030397044317654220 +-2.13044461226409520549 +-2.13058518115417117045 +-2.13072567713270943912 +-2.13086610021897637779 +-2.13100645043219394381 +-2.13114672779154323834 +-2.13128693231616628267 +-2.13142706402516157738 +-2.13156712293758587862 +-2.13170710907245686272 +-2.13184702244874868526 +-2.13198686308539775425 +-2.13212663100129651284 +-2.13226632621529788025 +-2.13240594874621258725 +-2.13254549861281361700 +-2.13268497583382954375 +-2.13282438042795208233 +-2.13296371241382853867 +-2.13310297181006935929 +-2.13324215863524235814 +-2.13338127290787671342 +-2.13352031464645852665 +-2.13365928386943703998 +-2.13379818059522019524 +-2.13393700484217463398 +-2.13407575662862969423 +-2.13421443597287208149 +-2.13435304289315030957 +-2.13449157740767336833 +-2.13463003953460850326 +-2.13476842929208698862 +-2.13490674669819657794 +-2.13504499177098905349 +-2.13518316452847489728 +-2.13532126498862551145 +-2.13545929316937321829 +-2.13559724908861170434 +-2.13573513276419557627 +-2.13587294421393902866 +-2.13601068345561850848 +-2.13614835050697271512 +-2.13628594538569860362 +-2.13642346810945715774 +-2.13656091869586850507 +-2.13669829716251680196 +-2.13683560352694534856 +-2.13697283780666102970 +-2.13711000001912987400 +-2.13724709018178193887 +-2.13738410831200731366 +-2.13752105442715967243 +-2.13765792854455360938 +-2.13779473068146552706 +-2.13793146085513496857 +-2.13806811908276150902 +-2.13820470538151008455 +-2.13834121976850388691 +-2.13847766226083280117 +-2.13861403287554585617 +-2.13875033162965655364 +-2.13888655854013931545 +-2.13902271362393348042 +-2.13915879689793797525 +-2.13929480837901708767 +-2.13943074808399735787 +-2.13956661602966802249 +-2.13970241223277968245 +-2.13983813671005007606 +-2.13997378947815608541 +-2.14010937055373862137 +-2.14024487995340439994 +-2.14038031769372061319 +-2.14051568379121848196 +-2.14065097826239547629 +-2.14078620112370687778 +-2.14092135239157688176 +-2.14105643208239149189 +-2.14119144021249985244 +-2.14132637679821602461 +-2.14146124185581676613 +-2.14159603540154508394 +-2.14173075745160579331 +-2.14186540802216773827 +-2.14199998712936556799 +-2.14213449478929707226 +-2.14226893101802540187 +-2.14240329583157640414 +-2.14253758924594217561 +-2.14267181127707750932 +-2.14280596194090389162 +-2.14294004125330639354 +-2.14307404923013367082 +-2.14320798588720151656 +-2.14334185124028842040 +-2.14347564530514089753 +-2.14360936809746682741 +-2.14374301963294078277 +-2.14387659992720358559 +-2.14401010899585964253 +-2.14414354685447827720 +-2.14427691351859639468 +-2.14441020900371537294 +-2.14454343332530106281 +-2.14467658649878645249 +-2.14480966853956811491 +-2.14494267946301109262 +-2.14507561928444356880 +-2.14520848801916130810 +-2.14534128568242543622 +-2.14547401228946243990 +-2.14560666785546549917 +-2.14573925239559581968 +-2.14587176592497641536 +-2.14600420845870010211 +-2.14613658001182594504 +-2.14626888059937659392 +-2.14640111023634405640 +-2.14653326893768614525 +-2.14666535671832470200 +-2.14679737359315403467 +-2.14692931957702937140 +-2.14706119468477485412 +-2.14719299893118398259 +-2.14732473233101250898 +-2.14745639489898554331 +-2.14758798664979755344 +-2.14771950759810703602 +-2.14785095775853873690 +-2.14798233714568942432 +-2.14811364577411900711 +-2.14824488365835586379 +-2.14837605081289773068 +-2.14850714725220637291 +-2.14863817299071424571 +-2.14876912804282049763 +-2.14890001242289230277 +-2.14903082614526352856 +-2.14916156922423740028 +-2.14929224167408428059 +-2.14942284350904255774 +-2.14955337474331908965 +-2.14968383539108875979 +-2.14981422546649492133 +-2.14994454498364895301 +-2.15007479395663025912 +-2.15020497239948760182 +-2.15033508032623688067 +-2.15046511775086468532 +-2.15059508468732341058 +-2.15072498114953658543 +-2.15085480715139576446 +-2.15098456270676097191 +-2.15111424782946158984 +-2.15124386253329547003 +-2.15137340683202982206 +-2.15150288073940165745 +-2.15163228426911556923 +-2.15176161743484728461 +-2.15189088025023966821 +-2.15202007272890716294 +-2.15214919488443179318 +-2.15227824673036716163 +-2.15240722828023356428 +-2.15253613954752420767 +-2.15266498054569899168 +-2.15279375128818983853 +-2.15292245178839714015 +-2.15305108205969375490 +-2.15317964211541656994 +-2.15330813196888071204 +-2.15343655163336489267 +-2.15356490112211984567 +-2.15369318044836877135 +-2.15382138962530067516 +-2.15394952866608058173 +-2.15407759758383932081 +-2.15420559639168107680 +-2.15433352510267717150 +-2.15446138372987405774 +-2.15458917228628532570 +-2.15471689078489792024 +-2.15484453923866814407 +-2.15497211766052298998 +-2.15509962606335925273 +-2.15522706446004930214 +-2.15535443286343220137 +-2.15548173128631992412 +-2.15560895974149557830 +-2.15573611824171251783 +-2.15586320679969656311 +-2.15599022542814555692 +-2.15611717413972714397 +-2.15624405294708276770 +-2.15637086186282189715 +-2.15649760089952957642 +-2.15662427006976020749 +-2.15675086938604154696 +-2.15687739886087115337 +-2.15700385850672038401 +-2.15713024833603261854 +-2.15725656836122148263 +-2.15738281859467484480 +-2.15750899904875170776 +-2.15763510973578398477 +-2.15776115066807472331 +-2.15788712185789943732 +-2.15801302331750965990 +-2.15813885505912361751 +-2.15826461709493688801 +-2.15839030943711485122 +-2.15851593209779757387 +-2.15864148508909625690 +-2.15876696842309634405 +-2.15889238211185574556 +-2.15901772616740439403 +-2.15914300060174690898 +-2.15926820542685948823 +-2.15939334065469257240 +-2.15951840629716995679 +-2.15964340236618657087 +-2.15976832887361425151 +-2.15989318583129685791 +-2.16001797325104982761 +-2.16014269114466328503 +-2.16026733952390204152 +-2.16039191840050470717 +-2.16051642778618235852 +-2.16064086769261987087 +-2.16076523813147680642 +-2.16088953911438608202 +-2.16101377065295574553 +-2.16113793275876586719 +-2.16126202544337342459 +-2.16138604871830608545 +-2.16151000259506975709 +-2.16163388708514148107 +-2.16175770219997387400 +-2.16188144795099379536 +-2.16200512434960367969 +-2.16212873140717931619 +-2.16225226913507073689 +-2.16237573754460488118 +-2.16249913664708071082 +-2.16262246645377453902 +-2.16274572697593558956 +-2.16286891822478910541 +-2.16299204021153412825 +-2.16311509294734705122 +-2.16323807644337806622 +-2.16336099071075205202 +-2.16348383576056901845 +-2.16360661160390632674 +-2.16372931825181513688 +-2.16385195571532173986 +-2.16397452400542977813 +-2.16409702313311669286 +-2.16421945310933550033 +-2.16434181394501612417 +-2.16446410565106361901 +-2.16458632823835994685 +-2.16470848171776042435 +-2.16483056610009905185 +-2.16495258139618451665 +-2.16507452761680152520 +-2.16519640477271213541 +-2.16531821287465264803 +-2.16543995193333715932 +-2.16556162195945578475 +-2.16568322296367465896 +-2.16580475495663682395 +-2.16592621794896222909 +-2.16604761195124595474 +-2.16616893697406043273 +-2.16629019302795500224 +-2.16641138012345768615 +-2.16653249827106941794 +-2.16665354748127025886 +-2.16677452776451762162 +-2.16689543913124449404 +-2.16701628159186299172 +-2.16713705515676080537 +-2.16725775983630164490 +-2.16737839564083056842 +-2.16749896258066510057 +-2.16761946066610411421 +-2.16773988990742116911 +-2.16786025031486984105 +-2.16798054189867794861 +-2.16810076466905510273 +-2.16822091863618426899 +-2.16834100381023064941 +-2.16846102020133235655 +-2.16858096781960973942 +-2.16870084667515961030 +-2.16882065677805480064 +-2.16894039813834904606 +-2.16906007076607343365 +-2.16917967467123551373 +-2.16929920986382285264 +-2.16941867635380214452 +-2.16953807415111477042 +-2.16965740326568656826 +-2.16977666370741495427 +-2.16989585548618046928 +-2.17001497861184144966 +-2.17013403309423402732 +-2.17025301894317301787 +-2.17037193616845369704 +-2.17049078477984780378 +-2.17060956478710842532 +-2.17072827619996511217 +-2.17084691902812831898 +-2.17096549328128762824 +-2.17108399896911041793 +-2.17120243610124408207 +-2.17132080468731558653 +-2.17143910473693102503 +-2.17155733625967517497 +-2.17167549926511327385 +-2.17179359376278968696 +-2.17191161976222790742 +-2.17202957727293277657 +-2.17214746630438515496 +-2.17226528686605080409 +-2.17238303896737061649 +-2.17250072261776727700 +-2.17261833782664481873 +-2.17273588460338551442 +-2.17285336295735032053 +-2.17297077289788509447 +-2.17308811443430949240 +-2.17320538757592940371 +-2.17332259233202629289 +-2.17343972871186652540 +-2.17355679672469115360 +-2.17367379637972835127 +-2.17379072768618053502 +-2.17390759065323502242 +-2.17402438529005825885 +-2.17414111160579759385 +-2.17425776960957994888 +-2.17437435931051536997 +-2.17449088071769303099 +-2.17460733384018389813 +-2.17472371868703939768 +-2.17484003526729230416 +-2.17495628358995585216 +-2.17507246366402684501 +-2.17518857549848032562 +-2.17530461910227401745 +-2.17542059448434610403 +-2.17553650165361789348 +-2.17565234061899159812 +-2.17576811138935033441 +-2.17588381397355767888 +-2.17599944838046210904 +-2.17611501461889123021 +-2.17623051269765399596 +-2.17634594262554514899 +-2.17646130441133456301 +-2.17657659806378234180 +-2.17669182359162327600 +-2.17680698100357750135 +-2.17692207030834739001 +-2.17703709151461755056 +-2.17715204463105305166 +-2.17726692966630386294 +-2.17738174662900085821 +-2.17749649552775670358 +-2.17761117637116718981 +-2.17772578916781123226 +-2.17784033392624998271 +-2.17795481065502682938 +-2.17806921936266828510 +-2.17818356005768265504 +-2.17829783274856181308 +-2.17841203744378120177 +-2.17852617415179850013 +-2.17864024288105406768 +-2.17875424363997183264 +-2.17886817643695884783 +-2.17898204128040395844 +-2.17909583817868091060 +-2.17920956714014790734 +-2.17932322817314183538 +-2.17943682128598759107 +-2.17955034648699186306 +-2.17966380378444357646 +-2.17977719318661788961 +-2.17989051470177130909 +-2.18000376833814524247 +-2.18011695410396422190 +-2.18023007200743634826 +-2.18034312205675462337 +-2.18045610426009606186 +-2.18056901862561725025 +-2.18068186516146722553 +-2.18079464387577148798 +-2.18090735477664265929 +-2.18101999787217781801 +-2.18113257317045894368 +-2.18124508067954936408 +-2.18135752040749819614 +-2.18146989236234167819 +-2.18158219655209739685 +-2.18169443298476695148 +-2.18180660166834083924 +-2.18191870261078690874 +-2.18203073582006501496 +-2.18214270130411680526 +-2.18225459907086660749 +-2.18236642912822853546 +-2.18247819148409627488 +-2.18258988614635240921 +-2.18270151312286264655 +-2.18281307242147892822 +-2.18292456405003632014 +-2.18303598801635878601 +-2.18314734432824986143 +-2.18325863299350508839 +-2.18336985401990046896 +-2.18348100741519823842 +-2.18359209318714819759 +-2.18370311134348460413 +-2.18381406189192617262 +-2.18392494484017829492 +-2.18403576019593215207 +-2.18414650796686382606 +-2.18425718816063652028 +-2.18436780078489922730 +-2.18447834584728539653 +-2.18458882335541515474 +-2.18469923331689530599 +-2.18480957573931799942 +-2.18491985063026206149 +-2.18503005799729210779 +-2.18514019784795987533 +-2.18525027018980155802 +-2.18536027503034135933 +-2.18547021237708971597 +-2.18558008223754285382 +-2.18568988461918323196 +-2.18579961952948220727 +-2.18590928697589514940 +-2.18601888696586499350 +-2.18612841950682268433 +-2.18623788460618317941 +-2.18634728227135122225 +-2.18645661250971690137 +-2.18656587532865698265 +-2.18667507073553668562 +-2.18678419873770746307 +-2.18689325934250655692 +-2.18700225255726188323 +-2.18711117838928492674 +-2.18722003684587651406 +-2.18732882793432414914 +-2.18743755166190334549 +-2.18754620803587629396 +-2.18765479706349275091 +-2.18776331875199092636 +-2.18787177310859703994 +-2.18798016014052132405 +-2.18808847985496646160 +-2.18819673225912003645 +-2.18830491736015897430 +-2.18841303516524732231 +-2.18852108568153713719 +-2.18862906891616804117 +-2.18873698487626855425 +-2.18884483356895520600 +-2.18895261500133253563 +-2.18906032918049220370 +-2.18916797611351654496 +-2.18927555580747323916 +-2.18938306826942197247 +-2.18949051350640688796 +-2.18959789152546280278 +-2.18970520233361432005 +-2.18981244593787094388 +-2.18991962234523374065 +-2.19002673156269311860 +-2.19013377359722483106 +-2.19024074845579663773 +-2.19034765614536297562 +-2.19045449667286939999 +-2.19056127004524858748 +-2.19066797626942166843 +-2.19077461535230222367 +-2.19088118730078962315 +-2.19098769212177302279 +-2.19109412982213180854 +-2.19120050040873426411 +-2.19130680388843801509 +-2.19141304026808958483 +-2.19151920955452483852 +-2.19162531175457031551 +-2.19173134687504100881 +-2.19183731492274214148 +-2.19194321590446872250 +-2.19204904982700377047 +-2.19215481669712231039 +-2.19226051652158826499 +-2.19236614930715534300 +-2.19247171506056570678 +-2.19257721378855485739 +-2.19268264549784541728 +-2.19278801019515112714 +-2.19289330788717595766 +-2.19299853858061366552 +-2.19310370228214868149 +-2.19320879899845522232 +-2.19331382873619773477 +-2.19341879150203222792 +-2.19352368730260494090 +-2.19362851614454967830 +-2.19373327803449447160 +-2.19383797297905669410 +-2.19394260098484394916 +-2.19404716205845407018 +-2.19415165620647689693 +-2.19425608343549249923 +-2.19436044375207206514 +-2.19446473716277790089 +-2.19456896367416076643 +-2.19467312329276564853 +-2.19477721602512820809 +-2.19488124187777255969 +-2.19498520085721837702 +-2.19508909296997023475 +-2.19519291822253048707 +-2.19529667662138994189 +-2.19540036817302919303 +-2.19550399288392172892 +-2.19560755076053482071 +-2.19571104180932374916 +-2.19581446603673580142 +-2.19591782344921249148 +-2.19602111405318467519 +-2.19612433785507432660 +-2.19622749486129720253 +-2.19633058507826062211 +-2.19643360851236169040 +-2.19653656516999262749 +-2.19663945505753410714 +-2.19674227818136280632 +-2.19684503454784341159 +-2.19694772416333528042 +-2.19705034703418933262 +-2.19715290316674893845 +-2.19725539256734858640 +-2.19735781524231654771 +-2.19746017119797310002 +-2.19756246044063052736 +-2.19766468297659400832 +-2.19776683881216028382 +-2.19786892795361943342 +-2.19797095040725531945 +-2.19807290617934159016 +-2.19817479527614834112 +-2.19827661770393500973 +-2.19837837346895526025 +-2.19848006257745565151 +-2.19858168503567696916 +-2.19868324084985067302 +-2.19878473002620156151 +-2.19888615257094910405 +-2.19898750849030522048 +-2.19908879779047383707 +-2.19919002047765310692 +-2.19929117655803540998 +-2.19939226603780468849 +-2.19949328892313866746 +-2.19959424522020885462 +-2.19969513493518187275 +-2.19979595807421413056 +-2.19989671464345803997 +-2.19999740464906023973 +-2.20009802809715848682 +-2.20019858499388742956 +-2.20029907534537239044 +-2.20039949915773469513 +-2.20049985643708945204 +-2.20060014718954377599 +-2.20070037142120078499 +-2.20080052913815693572 +-2.20090062034650202349 +-2.20100064505232095868 +-2.20110060326169287848 +-2.20120049498068892646 +-2.20130032021537891396 +-2.20140007897182199414 +-2.20149977125607554385 +-2.20159939707418805810 +-2.20169895643220581150 +-2.20179844933616797320 +-2.20189787579210660695 +-2.20199723580605200013 +-2.20209652938402511424 +-2.20219575653204602261 +-2.20229491725612458453 +-2.20239401156226977108 +-2.20249303945648211567 +-2.20259200094475993126 +-2.20269089603309398129 +-2.20278972472747192057 +-2.20288848703387429850 +-2.20298718295827988811 +-2.20308581250665946882 +-2.20318437568497982326 +-2.20328287249920551361 +-2.20338130295529133207 +-2.20347966705919251496 +-2.20357796481685674905 +-2.20367619623422816844 +-2.20377436131724557811 +-2.20387246007184334218 +-2.20397049250395227205 +-2.20406845861949829413 +-2.20416635842440289395 +-2.20426419192458400431 +-2.20436195912595422897 +-2.20445966003442217485 +-2.20455729465589200800 +-2.20465486299626522992 +-2.20475236506143890125 +-2.20484980085730386534 +-2.20494717038974918921 +-2.20504447366465994307 +-2.20514171068791631214 +-2.20523888146539492894 +-2.20533598600297020553 +-2.20543302430651033674 +-2.20552999638188085285 +-2.20562690223494461961 +-2.20572374187155961778 +-2.20582051529758071950 +-2.20591722251885968831 +-2.20601386354124384681 +-2.20611043837057829720 +-2.20620694701270414484 +-2.20630338947345805423 +-2.20639976575867624575 +-2.20649607587419005483 +-2.20659231982582682008 +-2.20668849761941165966 +-2.20678460926076613902 +-2.20688065475571093543 +-2.20697663411006095302 +-2.20707254732962931953 +-2.20716839442022561002 +-2.20726417538765717907 +-2.20735989023772960493 +-2.20745553897624402495 +-2.20755112160899891194 +-2.20764663814179140644 +-2.20774208858041509629 +-2.20783747293066046069 +-2.20793279119831620250 +-2.20802804338916969229 +-2.20812322950900341567 +-2.20821834956359897006 +-2.20831340355873617654 +-2.20840839150019041526 +-2.20850331339373706641 +-2.20859816924514795744 +-2.20869295906019269538 +-2.20878768284464044314 +-2.20888234060425547867 +-2.20897693234480296809 +-2.20907145807204408072 +-2.20916591779173998589 +-2.20926031150964741201 +-2.20935463923152264343 +-2.20944890096312063221 +-2.20954309671019366590 +-2.20963722647849225567 +-2.20973129027376735678 +-2.20982528810176459544 +-2.20991921996823181829 +-2.21001308587891243107 +-2.21010688583954939546 +-2.21020061985588611719 +-2.21029428793366111705 +-2.21038789007861247171 +-2.21048142629648047830 +-2.21057489659300010487 +-2.21066830097390543131 +-2.21076163944493275793 +-2.21085491201181305598 +-2.21094811868027907309 +-2.21104125945606133641 +-2.21113433434488859675 +-2.21122734335249093718 +-2.21132028648459622033 +-2.21141316374693008839 +-2.21150597514521995990 +-2.21159872068519192112 +-2.21169140037256939380 +-2.21178401421307668784 +-2.21187656221243722499 +-2.21196904437637442697 +-2.21206146071060993918 +-2.21215381122086540699 +-2.21224609591286247579 +-2.21233831479232323503 +-2.21243046786496577738 +-2.21252255513651219232 +-2.21261457661268101660 +-2.21270653229919300742 +-2.21279842220176758971 +-2.21289024632612374432 +-2.21298200467798089619 +-2.21307369726305669388 +-2.21316532408707233870 +-2.21325688515574725557 +-2.21334838047479776080 +-2.21343981004994594386 +-2.21353117388690989742 +-2.21362247199141037868 +-2.21371370436916636848 +-2.21380487102589817994 +-2.21389597196732657025 +-2.21398700719917274071 +-2.21407797672715744852 +-2.21416888055700189497 +-2.21425971869442905771 +-2.21435049114516147029 +-2.21444119791492388671 +-2.21453183900943839646 +-2.21462241443442930944 +-2.21471292419562493237 +-2.21480336829874868698 +-2.21489374674952799182 +-2.21498405955369159770 +-2.21507430671696647906 +-2.21516448824508405124 +-2.21525460414377395324 +-2.21534465441876760039 +-2.21543463907579862848 +-2.21552455812059934104 +-2.21561441155890515020 +-2.21570419939645280039 +-2.21579392163897814783 +-2.21588357829222060147 +-2.21597316936192001435 +-2.21606269485381801587 +-2.21615215477365623542 +-2.21624154912717852284 +-2.21633087792013183659 +-2.21642014115826135878 +-2.21650933884731671242 +-2.21659847099304752049 +-2.21668753760120695873 +-2.21677653867754642647 +-2.21686547422782265215 +-2.21695434425779280829 +-2.21704314877321451149 +-2.21713188777984893107 +-2.21722056128345945680 +-2.21730916928980903435 +-2.21739771180466505029 +-2.21748618883379622346 +-2.21757460038297349314 +-2.21766294645796824270 +-2.21775122706455629640 +-2.21783944220851303442 +-2.21792759189562005417 +-2.21801567613165717674 +-2.21810369492240866407 +-2.21819164827366144266 +-2.21827953619120243900 +-2.21836735868082479683 +-2.21845511574831943946 +-2.21854280739948439560 +-2.21863043364011769398 +-2.21871799447601958377 +-2.21880548991299475503 +-2.21889291995684967418 +-2.21898028461339347217 +-2.21906758388843705632 +-2.21915481778779621891 +-2.21924198631728852860 +-2.21932908948273466265 +-2.21941612728995618653 +-2.21950309974478221520 +-2.21959000685303964318 +-2.21967684862056335859 +-2.21976362505318647322 +-2.21985033615674964835 +-2.21993698193709265709 +-2.22002356240006148980 +-2.22011007755150480136 +-2.22019652739727391122 +-2.22028291194322369151 +-2.22036923119521167891 +-2.22045548515910029508 +-2.22054167384075462621 +-2.22062779724604286713 +-2.22071385538083765354 +-2.22079984825101339752 +-2.22088577586245117246 +-2.22097163822103293995 +-2.22105743533264554657 +-2.22114316720317939158 +-2.22122883383852887107 +-2.22131443524459148975 +-2.22139997142726786095 +-2.22148544239246659160 +-2.22157084814609540047 +-2.22165618869406733538 +-2.22174146404230166141 +-2.22182667419671897591 +-2.22191181916324476120 +-2.22199689894780982868 +-2.22208191355634721020 +-2.22216686299479615485 +-2.22225174726909768808 +-2.22233656638520082893 +-2.22242132034905370830 +-2.22250600916661378292 +-2.22259063284384028591 +-2.22267519138669733536 +-2.22275968480115437842 +-2.22284411309318352679 +-2.22292847626876355349 +-2.22301277433387678428 +-2.22309700729450909762 +-2.22318117515665436557 +-2.22326527792630734837 +-2.22334931560946991169 +-2.22343328821214658575 +-2.22351719574035078253 +-2.22360103820009635811 +-2.22368481559740427400 +-2.22376852793829993260 +-2.22385217522881362129 +-2.22393575747498184469 +-2.22401927468284377198 +-2.22410272685844656593 +-2.22418611400784049792 +-2.22426943613708072434 +-2.22435269325222861880 +-2.22443588535935177219 +-2.22451901246452088401 +-2.22460207457381331508 +-2.22468507169331131124 +-2.22476800382910333553 +-2.22485087098728229194 +-2.22493367317394596938 +-2.22501641039520103860 +-2.22509908265715417031 +-2.22518168996592313746 +-2.22526423232762793347 +-2.22534670974839521307 +-2.22542912223435829233 +-2.22551146979165448414 +-2.22559375242642820680 +-2.22567597014482965179 +-2.22575812295301211918 +-2.22584021085714001131 +-2.22592223386337861868 +-2.22600419197790300174 +-2.22608608520689177368 +-2.22616791355652976492 +-2.22624967703300935540 +-2.22633137564252825413 +-2.22641300939129038738 +-2.22649457828550456640 +-2.22657608233138804010 +-2.22665752153516249834 +-2.22673889590305806863 +-2.22682020544130887529 +-2.22690145015615659219 +-2.22698263005384911040 +-2.22706374514064142645 +-2.22714479542279386592 +-2.22722578090657252758 +-2.22730670159825372423 +-2.22738755750411643319 +-2.22746834863044851360 +-2.22754907498354315365 +-2.22762973656970242331 +-2.22771033339523150119 +-2.22779086546644622402 +-2.22787133278966642536 +-2.22795173537122037644 +-2.22803207321744300984 +-2.22811234633467503130 +-2.22819255472926514017 +-2.22827269840756914121 +-2.22835277737594950054 +-2.22843279164077623378 +-2.22851274120842646198 +-2.22859262608528219118 +-2.22867244627773564147 +-2.22875220179218569427 +-2.22883189263503789235 +-2.22891151881270310753 +-2.22899108033160375797 +-2.22907057719816581454 +-2.22915000941882546215 +-2.22922937700002421479 +-2.22930867994821158007 +-2.22938791826984550326 +-2.22946709197139103509 +-2.22954620105931944352 +-2.22962524554011132238 +-2.22970422542025437096 +-2.22978314070624428211 +-2.22986199140458385415 +-2.22994077752178387897 +-2.23001949906436269799 +-2.23009815603884709034 +-2.23017674845177138465 +-2.23025527630967701498 +-2.23033373961911385308 +-2.23041213838664154068 +-2.23049047261882549265 +-2.23056874232223822929 +-2.23064694750346426133 +-2.23072508816909209628 +-2.23080316432572089980 +-2.23088117597995694297 +-2.23095912313841493457 +-2.23103700580771802109 +-2.23111482399449778669 +-2.23119257770539292096 +-2.23127026694705232757 +-2.23134789172613112740 +-2.23142545204929554359 +-2.23150294792321712833 +-2.23158037935457853607 +-2.23165774635006997073 +-2.23173504891639096215 +-2.23181228706024592512 +-2.23188946078835348530 +-2.23196657010743670924 +-2.23204361502423020980 +-2.23212059554547614937 +-2.23219751167792246349 +-2.23227436342833218674 +-2.23235115080347146232 +-2.23242787381011753567 +-2.23250453245505831035 +-2.23258112674508701900 +-2.23265765668700888469 +-2.23273412228763579179 +-2.23281052355379072694 +-2.23288686049230467034 +-2.23296313311001703994 +-2.23303934141377746769 +-2.23311548541044535554 +-2.23319156510688721085 +-2.23326758050997975502 +-2.23334353162661081171 +-2.23341941846367486590 +-2.23349524102807661663 +-2.23357099932673053289 +-2.23364669336655996545 +-2.23372232315449759099 +-2.23379788869748630020 +-2.23387339000247875376 +-2.23394882707643471775 +-2.23402419992632683687 +-2.23409950855913574941 +-2.23417475298185008725 +-2.23424993320147269316 +-2.23432504922500951849 +-2.23440010105948339003 +-2.23447508871192113133 +-2.23455001218936244456 +-2.23462487149885591364 +-2.23469966664746166884 +-2.23477439764224650176 +-2.23484906449028919440 +-2.23492366719867785463 +-2.23499820577451213666 +-2.23507268022490013237 +-2.23514709055695925954 +-2.23522143677781892634 +-2.23529571889461786682 +-2.23536993691450414090 +-2.23544409084463824300 +-2.23551818069218777296 +-2.23559220646433320923 +-2.23566616816826435610 +-2.23574006581118212011 +-2.23581389940029362506 +-2.23588766894282420239 +-2.23596137444600229216 +-2.23603501591707054530 +-2.23610859336328005043 +-2.23618210679189477474 +-2.23625555621018801133 +-2.23632894162544237915 +-2.23640226304495381981 +-2.23647552047602538039 +-2.23654871392597387469 +-2.23662184340212633060 +-2.23669490891181865777 +-2.23676791046240008853 +-2.23684084806122784883 +-2.23691372171567248728 +-2.23698653143311343428 +-2.23705927722094299881 +-2.23713195908656237165 +-2.23720457703738517807 +-2.23727713108083614557 +-2.23734962122434932752 +-2.23742204747537121179 +-2.23749440984135938848 +-2.23756670832978210584 +-2.23763894294811827024 +-2.23771111370386011075 +-2.23778322060450696185 +-2.23785526365757414524 +-2.23792724287058408805 +-2.23799915825107476053 +-2.23807100980659123834 +-2.23814279754469236394 +-2.23821452147294852608 +-2.23828618159893988349 +-2.23835777793025991755 +-2.23842931047451099147 +-2.23850077923931056745 +-2.23857218423228498949 +-2.23864352546107392428 +-2.23871480293332547618 +-2.23878601665670284859 +-2.23885716663887945899 +-2.23892825288754027113 +-2.23899927541038357148 +-2.23907023421511608419 +-2.23914112930945918833 +-2.23921196070114669752 +-2.23928272839792041893 +-2.23935343240753770289 +-2.23942407273776478149 +-2.23949464939638387406 +-2.23956516239118563760 +-2.23963561172997449589 +-2.23970599742056508674 +-2.23977631947078625885 +-2.23984657788847751902 +-2.23991677268149214086 +-2.23998690385769272382 +-2.24005697142495607821 +-2.24012697539117144885 +-2.24019691576423918278 +-2.24026679255207294972 +-2.24033660576259707753 +-2.24040635540375010493 +-2.24047604148348122877 +-2.24054566400975385676 +-2.24061522299054294294 +-2.24068471843383454356 +-2.24075415034762981392 +-2.24082351873994012337 +-2.24089282361879016392 +-2.24096206499221706210 +-2.24103124286827171119 +-2.24110035725501477444 +-2.24116940816052379049 +-2.24123839559288562384 +-2.24130731956020001761 +-2.24137618007058048164 +-2.24144497713215384849 +-2.24151371075305760883 +-2.24158238094144435237 +-2.24165098770547821516 +-2.24171953105333709999 +-2.24178801099321001189 +-2.24185642753330105492 +-2.24192478068182543538 +-2.24199307044701345859 +-2.24206129683710653211 +-2.24212945986035894208 +-2.24219755952504007368 +-2.24226559583943130249 +-2.24233356881182599452 +-2.24240147845053261477 +-2.24246932476387250688 +-2.24253710776017767259 +-2.24260482744779743314 +-2.24267248383509087972 +-2.24274007693043264666 +-2.24280760674220935869 +-2.24287507327882140729 +-2.24294247654868295072 +-2.24300981656022146993 +-2.24307709332187732443 +-2.24314430684210375233 +-2.24321145712937042305 +-2.24327854419215677595 +-2.24334556803895868171 +-2.24341252867828311324 +-2.24347942611865303064 +-2.24354626036860382854 +-2.24361303143668555649 +-2.24367973933145847809 +-2.24374638406150239689 +-2.24381296563540422184 +-2.24387948406177084593 +-2.24394593934921893208 +-2.24401233150638113045 +-2.24407866054190074934 +-2.24414492646444019286 +-2.24421112928266985875 +-2.24427726900527924059 +-2.24434334564096893416 +-2.24440935919845374613 +-2.24447530968646358218 +-2.24454119711374078250 +-2.24460702148904278630 +-2.24467278282114168775 +-2.24473848111882334777 +-2.24480411639088606179 +-2.24486968864614455654 +-2.24493519789342688142 +-2.24500064414157485260 +-2.24506602739944582936 +-2.24513134767590960550 +-2.24519660497985196201 +-2.24526179932017200258 +-2.24532693070578437400 +-2.24539199914561571347 +-2.24545700464860997769 +-2.24552194722372355784 +-2.24558682687992883231 +-2.24565164362621016991 +-2.24571639747156925893 +-2.24578108842502111031 +-2.24584571649559494588 +-2.24591028169233508649 +-2.24597478402430095201 +-2.24603922350056484092 +-2.24610360013021681524 +-2.24616791392235715108 +-2.24623216488610566444 +-2.24629635303059371765 +-2.24636047836496821617 +-2.24642454089839072040 +-2.24648854064003966613 +-2.24655247759910503547 +-2.24661635178479413000 +-2.24668016320632801808 +-2.24674391187294286709 +-2.24680759779389038755 +-2.24687122097843561264 +-2.24693478143586178319 +-2.24699827917546368639 +-2.24706171420655342885 +-2.24712508653845599582 +-2.24718839618051458018 +-2.24725164314208525340 +-2.24731482743253918599 +-2.24737794906126486794 +-2.24744100803766277963 +-2.24750400437115116503 +-2.24756693807116336714 +-2.24762980914714693981 +-2.24769261760856453591 +-2.24775536346489479556 +-2.24781804672563234604 +-2.24788066740028602553 +-2.24794322549838110348 +-2.24800572102945706021 +-2.24806815400307025143 +-2.24813052442879168780 +-2.24819283231620747898 +-2.24825507767491972189 +-2.24831726051454783288 +-2.24837938084472321876 +-2.24844143867509504986 +-2.24850343401532848375 +-2.24856536687510333294 +-2.24862723726411539715 +-2.24868904519207690740 +-2.24875079066871386146 +-2.24881247370377090888 +-2.24887409430700513369 +-2.24893565248819182756 +-2.24899714825712271349 +-2.24905858162360194896 +-2.24911995259745234321 +-2.24918126118851269268 +-2.24924250740663644876 +-2.24930369126169349414 +-2.24936481276356881054 +-2.24942587192216558734 +-2.24948686874740166886 +-2.24954780324920955437 +-2.24960867543754039488 +-2.24966948532236044045 +-2.24973023291365059606 +-2.24979091822140953028 +-2.24985154125565189887 +-2.24991210202640790072 +-2.24997260054372505422 +-2.25003303681766686495 +-2.25009341085831060525 +-2.25015372267575308740 +-2.25021397228010622271 +-2.25027415968149835379 +-2.25033428489007247819 +-2.25039434791598980112 +-2.25045434876942973546 +-2.25051428746058235220 +-2.25057416399966081499 +-2.25063397839688983382 +-2.25069373066251365856 +-2.25075342080679074996 +-2.25081304883999777644 +-2.25087261477242739360 +-2.25093211861438691201 +-2.25099156037620407034 +-2.25105094006822037400 +-2.25111025770079509201 +-2.25116951328430348056 +-2.25122870682913855944 +-2.25128783834570800337 +-2.25134690784443858291 +-2.25140591533577305583 +-2.25146486083016972302 +-2.25152374433810509302 +-2.25158256587007121752 +-2.25164132543657879992 +-2.25170002304815364269 +-2.25175865871534020002 +-2.25181723244869802514 +-2.25187574425880443485 +-2.25193419415625362134 +-2.25199258215165665220 +-2.25205090825564147039 +-2.25210917247885378245 +-2.25216737483195572622 +-2.25222551532562631493 +-2.25228359397056143720 +-2.25234161077747652158 +-2.25239956575709943110 +-2.25245745892017890100 +-2.25251529027748054190 +-2.25257305983978506347 +-2.25263076761789315938 +-2.25268841362262062233 +-2.25274599786480056451 +-2.25280352035528474985 +-2.25286098110494092950 +-2.25291838012465595042 +-2.25297571742533131456 +-2.25303299301788806375 +-2.25309020691326322705 +-2.25314735912241248528 +-2.25320444965630839462 +-2.25326147852594083076 +-2.25331844574231610068 +-2.25337535131646093944 +-2.25343219525941629300 +-2.25348897758224220311 +-2.25354569829601736330 +-2.25360235741183512204 +-2.25365895494080925587 +-2.25371549089406952859 +-2.25377196528276346754 +-2.25382837811805725181 +-2.25388472941113349179 +-2.25394101917319300554 +-2.25399724741545481876 +-2.25405341414915438847 +-2.25410951938554759977 +-2.25416556313590410454 +-2.25422154541151442686 +-2.25427746622368596618 +-2.25433332558374299737 +-2.25438912350303066745 +-2.25444485999290833433 +-2.25450053506475578402 +-2.25455614872996923381 +-2.25461170099996266458 +-2.25466719188617004122 +-2.25472262140004131581 +-2.25477798955304553630 +-2.25483329635666862600 +-2.25488854182241649227 +-2.25494372596181058555 +-2.25499884878639145214 +-2.25505391030771917826 +-2.25510891053737028145 +-2.25516384948694037504 +-2.25521872716804150372 +-2.25527354359230525205 +-2.25532829877138230046 +-2.25538299271694020476 +-2.25543762544066428433 +-2.25549219695425984256 +-2.25554670726944817005 +-2.25560115639797231779 +-2.25565554435158865942 +-2.25570987114207621715 +-2.25576413678123177675 +-2.25581834128086722302 +-2.25587248465281708931 +-2.25592656690893100802 +-2.25598058806107903962 +-2.25603454812114945227 +-2.25608844710104738951 +-2.25614228501269931115 +-2.25619606186804633197 +-2.25624977767905132708 +-2.25630343245769493521 +-2.25635702621597555861 +-2.25641055896591025132 +-2.25646403071953693953 +-2.25651744148890731623 +-2.25657079128609661112 +-2.25662408012319648520 +-2.25667730801231680715 +-2.25673047496558698555 +-2.25678358099515685709 +-2.25683662611318958113 +-2.25688961033187318606 +-2.25694253366341079925 +-2.25699539612002597622 +-2.25704819771395959194 +-2.25710093845747206132 +-2.25715361836284378327 +-2.25720623744237203212 +-2.25725879570837406618 +-2.25731129317318712779 +-2.25736372984916355833 +-2.25741610574867879180 +-2.25746842088412469352 +-2.25752067526791355689 +-2.25757286891247588301 +-2.25762500183026082468 +-2.25767707403373663055 +-2.25772908553539108922 +-2.25778103634773197328 +-2.25783292648328481889 +-2.25788475595459292578 +-2.25793652477422090996 +-2.25798823295475248329 +-2.25803988050878867710 +-2.25809146744895139491 +-2.25814299378788119199 +-2.25819445953823638717 +-2.25824586471269661558 +-2.25829720932396016408 +-2.25834849338474397129 +-2.25839971690778451574 +-2.25845087990583825999 +-2.25850198239167854197 +-2.25855302437810090410 +-2.25860400587791865235 +-2.25865492690396463260 +-2.25870578746909211887 +-2.25875658758617081645 +-2.25880732726809396738 +-2.25885800652776991271 +-2.25890862537813097433 +-2.25895918383212412905 +-2.25900968190271944636 +-2.25906011960290431517 +-2.25911049694568744073 +-2.25916081394409440364 +-2.25921107061117387715 +-2.25926126695999140992 +-2.25931140300363209050 +-2.25936147875520143558 +-2.25941149422782538991 +-2.25946144943464721777 +-2.25951134438883149969 +-2.25956117910356235612 +-2.25961095359204255928 +-2.25966066786749664175 +-2.25971032194316467923 +-2.25975991583231206050 +-2.25980944954821838522 +-2.25985892310418767792 +-2.25990833651353906220 +-2.25995768978961608653 +-2.26000698294578006298 +-2.26005621599540917899 +-2.26010538895190737918 +-2.26015450182869281903 +-2.26020355463920763484 +-2.26025254739691039418 +-2.26030148011528275731 +-2.26035035280782281575 +-2.26039916548805308594 +-2.26044791816951029517 +-2.26049661086575603974 +-2.26054524359036967951 +-2.26059381635695055834 +-2.26064232917911800413 +-2.26069078207051221696 +-2.26073917504479249274 +-2.26078750811563811141 +-2.26083578129674833690 +-2.26088399460184330536 +-2.26093214804466358103 +-2.26098024163896704763 +-2.26102827539853601380 +-2.26107624933716744309 +-2.26112416346868405626 +-2.26117201780692411717 +-2.26121981236574942642 +-2.26126754715903999227 +-2.26131522220069669515 +-2.26136283750464084363 +-2.26141039308481239800 +-2.26145788895517307893 +-2.26150532512970547927 +-2.26155270162240995546 +-2.26160001844730995657 +-2.26164727561844713932 +-2.26169447314988403264 +-2.26174161105570403762 +-2.26178868935001009532 +-2.26183570804692601897 +-2.26188266716059649397 +-2.26192956670518485751 +-2.26197640669487620713 +-2.26202318714387562437 +-2.26206990806640906300 +-2.26211656947672201667 +-2.26216317138908129536 +-2.26220971381777280484 +-2.26225619677710465538 +-2.26230262028140449715 +-2.26234898434502129660 +-2.26239528898232311604 +-2.26244153420769844587 +-2.26248772003555886911 +-2.26253384648033284421 +-2.26257991355647325449 +-2.26262592127845030276 +-2.26267186966075639631 +-2.26271775871790392642 +-2.26276358846442704476 +-2.26280935891487855471 +-2.26285507008383390826 +-2.26290072198588765318 +-2.26294631463565654172 +-2.26299184804777597790 +-2.26303732223690357017 +-2.26308273721771779918 +-2.26312809300491712960 +-2.26317338961322045421 +-2.26321862705736842614 +-2.26326380535212123846 +-2.26330892451226128870 +-2.26335398455259140249 +-2.26339898548793350130 +-2.26344392733313171107 +-2.26348881010305280626 +-2.26353363381257999265 +-2.26357839847662090094 +-2.26362310411010314581 +-2.26366775072797477009 +-2.26371233834520424466 +-2.26375686697678224490 +-2.26380133663771987429 +-2.26384574734304910848 +-2.26389009910782057489 +-2.26393439194711110218 +-2.26397862587601395035 +-2.26402280090964413972 +-2.26406691706313889512 +-2.26411097435165631353 +-2.26415497279037447598 +-2.26419891239449233566 +-2.26424279317923105026 +-2.26428661515983309371 +-2.26433037835156092399 +-2.26437408276969698306 +-2.26441772842954769374 +-2.26446131534643768646 +-2.26450484353571646068 +-2.26454831301274861488 +-2.26459172379292628108 +-2.26463507589165846667 +-2.26467836932437727171 +-2.26472160410653433615 +-2.26476478025360572488 +-2.26480789778108482224 +-2.26485095670448766114 +-2.26489395703935292303 +-2.26493689880123927338 +-2.26497978200572491758 +-2.26502260666841248593 +-2.26506537280492326047 +-2.26510808043090161590 +-2.26515072956201235499 +-2.26519332021394159682 +-2.26523585240239677674 +-2.26527832614310664638 +-2.26532074145182082958 +-2.26536309834431159871 +-2.26540539683637032198 +-2.26544763694381279251 +-2.26548981868247301108 +-2.26553194206820807111 +-2.26557400711689682637 +-2.26561601384443811469 +-2.26565796226675253422 +-2.26569985239978333169 +-2.26574168425949418193 +-2.26578345786186918787 +-2.26582517322291643325 +-2.26586683035866309766 +-2.26590842928515856514 +-2.26594997001847442419 +-2.26599145257470313553 +-2.26603287696995803202 +-2.26607424322037598330 +-2.26611555134211206664 +-2.26615680135134489603 +-2.26619799326427662223 +-2.26623912709712715952 +-2.26628020286613907075 +-2.26632122058757934369 +-2.26636218027773184147 +-2.26640308195290574034 +-2.26644392562943020053 +-2.26648471132365569858 +-2.26652543905195580365 +-2.26656610883072362483 +-2.26660672067637580795 +-2.26664727460534942693 +-2.26668777063410464834 +-2.26672820877912073456 +-2.26676858905690137291 +-2.26680891148397023471 +-2.26684917607687230756 +-2.26688938285217700397 +-2.26692953182647238819 +-2.26696962301636961712 +-2.26700965643850160802 +-2.26704963210952215036 +-2.26708955004610723805 +-2.26712941026495551355 +-2.26716921278278649154 +-2.26720895761634100296 +-2.26724864478238252730 +-2.26728827429769719259 +-2.26732784617908933456 +-2.26736736044338904605 +-2.26740681710744773625 +-2.26744621618813502195 +-2.26748555770234760942 +-2.26752484166699863621 +-2.26756406809902788524 +-2.26760323701539423524 +-2.26764234843307832534 +-2.26768140236908521956 +-2.26772039884043863367 +-2.26775933786418582017 +-2.26779821945739579192 +-2.26783704363716021035 +-2.26787581042059116498 +-2.26791451982482339389 +-2.26795317186701295142 +-2.26799176656433898458 +-2.26803030393400240072 +-2.26806878399322497941 +-2.26810720675925159284 +-2.26814557224934709723 +-2.26818388048080255004 +-2.26822213147092677232 +-2.26826032523705167776 +-2.26829846179653271676 +-2.26833654116674487966 +-2.26837456336508802579 +-2.26841252840898111032 +-2.26845043631586751331 +-2.26848828710321148705 +-2.26852608078849948825 +-2.26856381738923973401 +-2.26860149692296353408 +-2.26863911940722351446 +-2.26867668485959317337 +-2.26871419329767087802 +-2.26875164473907497964 +-2.26878903920144647799 +-2.26882637670244902139 +-2.26886365725976624219 +-2.26890088089110752989 +-2.26893804761420092575 +-2.26897515744679845184 +-2.26901221040667522288 +-2.26904920651162456124 +-2.26908614577946732282 +-2.26912302822804168301 +-2.26915985387521024208 +-2.26919662273885824888 +-2.26923333483689360079 +-2.26926999018724329105 +-2.26930658880785873777 +-2.26934313071671311945 +-2.26937961593180315134 +-2.26941604447114642085 +-2.26945241635278271985 +-2.26948873159477271244 +-2.26952499021520281985 +-2.26956119223217855918 +-2.26959733766383076059 +-2.26963342652830801782 +-2.26966945884378512588 +-2.26970543462845819604 +-2.26974135390054421180 +-2.26977721667828413743 +-2.26981302297994069761 +-2.26984877282379748920 +-2.26988446622816342213 +-2.26992010321136650219 +-2.26995568379175916007 +-2.26999120798771558682 +-2.27002667581763128979 +-2.27006208729992575712 +-2.27009744245304023735 +-2.27013274129543685120 +-2.27016798384560303248 +-2.27020317012204575491 +-2.27023830014329552895 +-2.27027337392790506954 +-2.27030839149444974012 +-2.27034335286152710864 +-2.27037825804775739158 +-2.27041310707178300987 +-2.27044789995226725665 +-2.27048263670789784996 +-2.27051731735738515638 +-2.27055194191945908244 +-2.27058651041287573591 +-2.27062102285641120858 +-2.27065547926886512897 +-2.27068987966905755371 +-2.27072422407583340842 +-2.27075851250805893500 +-2.27079274498462213572 +-2.27082692152443632594 +-2.27086104214643302868 +-2.27089510686956996821 +-2.27092911571282440875 +-2.27096306869519759530 +-2.27099696583571475372 +-2.27103080715342064977 +-2.27106459266738225367 +-2.27109832239669362508 +-2.27113199636046569907 +-2.27116561457783561195 +-2.27119917706796226042 +-2.27123268385002496927 +-2.27126613494322837639 +-2.27129953036679799183 +-2.27133287013998286241 +-2.27136615428205290712 +-2.27139938281230158168 +-2.27143255575004632263 +-2.27146567311462410643 +-2.27149873492539722264 +-2.27153174120174883299 +-2.27156469196308430369 +-2.27159758722883253768 +-2.27163042701844553051 +-2.27166321135139703813 +-2.27169594024718302094 +-2.27172861372532208790 +-2.27176123180535594059 +-2.27179379450684848507 +-2.27182630184938716411 +-2.27185875385257984860 +-2.27189115053605883432 +-2.27192349191947862153 +-2.27195577802251591493 +-2.27198800886486962369 +-2.27202018446626308190 +-2.27205230484644005173 +-2.27208437002516783210 +-2.27211638002223637045 +-2.27214833485745870689 +-2.27218023455066919780 +-2.27221207912172573629 +-2.27224386859050886400 +-2.27227560297692088298 +-2.27230728230088763198 +-2.27233890658235759830 +-2.27237047584130102962 +-2.27240199009771215444 +-2.27243344937160607344 +-2.27246485368302186814 +-2.27249620305202082449 +-2.27252749749868643292 +-2.27255873704312616468 +-2.27258992170546836320 +-2.27262105150586579683 +-2.27265212646449166201 +-2.27268314660154535645 +-2.27271411193724448552 +-2.27274502249183196767 +-2.27277587828557381400 +-2.27280667933875690778 +-2.27283742567169255722 +-2.27286811730471338677 +-2.27289875425817511356 +-2.27292933655245654734 +-2.27295986420795870231 +-2.27299033724510479715 +-2.27302075568434247543 +-2.27305111954614025294 +-2.27308142885098929398 +-2.27311168361940607596 +-2.27314188387192528396 +-2.27317202962910913655 +-2.27320212091153850409 +-2.27323215773981957000 +-2.27326214013457983398 +-2.27329206811646988839 +-2.27332194170616430640 +-2.27335176092435764517 +-2.27338152579176977497 +-2.27341123632914099417 +-2.27344089255723647014 +-2.27347049449684179834 +-2.27350004216876833141 +-2.27352953559384696192 +-2.27355897479293256325 +-2.27358835978690354551 +-2.27361769059666052328 +-2.27364696724312542742 +-2.27367618974724416958 +-2.27370535812998664227 +-2.27373447241234316607 +-2.27376353261532715422 +-2.27379253875997600076 +-2.27382149086734886012 +-2.27385038895852797936 +-2.27387923305461780998 +-2.27390802317674545208 +-2.27393675934606287470 +-2.27396544158374114275 +-2.27399406991097663422 +-2.27402264434898748746 +-2.27405116491901537756 +-2.27407963164232329589 +-2.27410804454019865872 +-2.27413640363395019861 +-2.27416470894491107302 +-2.27419296049443397933 +-2.27422115830389737212 +-2.27424930239470191040 +-2.27427739278827045766 +-2.27430542950604763774 +-2.27433341256950205533 +-2.27436134200012540774 +-2.27438921781943204081 +-2.27441704004895584035 +-2.27444480871025911384 +-2.27447252382492237643 +-2.27450018541455012411 +-2.27452779350077083365 +-2.27455534810523340994 +-2.27458284924961073870 +-2.27461029695559968644 +-2.27463769124491754781 +-2.27466503213930604232 +-2.27469231966052731764 +-2.27471955383036972265 +-2.27474673467064203436 +-2.27477386220317567833 +-2.27480093644982561685 +-2.27482795743246901665 +-2.27485492517300613713 +-2.27488183969335944212 +-2.27490870101547582038 +-2.27493550916132170059 +-2.27496226415288793632 +-2.27498896601218980607 +-2.27501561476126346051 +-2.27504221042216681070 +-2.27506875301698263669 +-2.27509524256781503482 +-2.27512167909679119404 +-2.27514806262606095189 +-2.27517439317779857078 +-2.27520067077419652080 +-2.27522689543747480556 +-2.27525306718987474497 +-2.27527918605365808702 +-2.27530525205111189280 +-2.27533126520454587194 +-2.27535722553628971809 +-2.27538313306869977026 +-2.27540898782415057511 +-2.27543478982504421282 +-2.27546053909380185942 +-2.27548623565286867176 +-2.27551187952471334341 +-2.27553747073182410787 +-2.27556300929671673217 +-2.27558849524192519098 +-2.27561392859000832800 +-2.27563930936354763546 +-2.27566463758514769822 +-2.27568991327743264108 +-2.27571513646305412237 +-2.27574030716468378444 +-2.27576542540501458589 +-2.27579049120676568663 +-2.27581550459267489828 +-2.27584046558550623374 +-2.27586537420804546628 +-2.27589023048309879727 +-2.27591503443349729707 +-2.27593978608209468462 +-2.27596448545176688327 +-2.27598913256541202088 +-2.27601372744595042974 +-2.27603827011632819932 +-2.27606276059950918267 +-2.27608719891848387817 +-2.27611158509626365642 +-2.27613591915588253656 +-2.27616020112039851853 +-2.27618443101289003039 +-2.27620860885646081329 +-2.27623273467423414829 +-2.27625680848935685319 +-2.27628083032500150296 +-2.27630480020436021249 +-2.27632871815064641297 +-2.27635258418709973682 +-2.27637639833698024461 +-2.27640016062357108950 +-2.27642387107017851733 +-2.27644752970012964610 +-2.27647113653677690692 +-2.27649469160349271490 +-2.27651819492367302189 +-2.27654164652073776054 +-2.27656504641812640344 +-2.27658839463930506852 +-2.27661169120775852548 +-2.27663493614699552481 +-2.27665812948054924192 +-2.27668127123197194805 +-2.27670436142484122755 +-2.27672740008275686918 +-2.27675038722933864577 +-2.27677332288823386364 +-2.27679620708310626043 +-2.27681903983764755139 +-2.27684182117556810354 +-2.27686455112060359696 +-2.27688722969651058392 +-2.27690985692706870935 +-2.27693243283607849037 +-2.27695495744736664534 +-2.27697743078477898848 +-2.27699985287218531482 +-2.27702222373347851203 +-2.27704454339257100770 +-2.27706681187340276296 +-2.27708902919993061431 +-2.27711119539613759954 +-2.27713331048602896090 +-2.27715537449363081279 +-2.27717738744299236231 +-2.27719934935818635324 +-2.27722126026330640158 +-2.27724312018246966005 +-2.27726492913981592991 +-2.27728668715950544055 +-2.27730839426572417850 +-2.27733005048267678205 +-2.27735165583459409078 +-2.27737321034572648415 +-2.27739471404034832247 +-2.27741616694275528232 +-2.27743756907726657701 +-2.27745892046822318022 +-2.27748022113998827010 +-2.27750147111694900559 +-2.27752267042351208559 +-2.27754381908410863389 +-2.27756491712319197873 +-2.27758596456523809692 +-2.27760696143474339337 +-2.27762790775622958606 +-2.27764880355423748881 +-2.27766964885333367263 +-2.27769044367810424845 +-2.27771118805315841982 +-2.27773188200312981522 +-2.27775252555267071486 +-2.27777311872645915614 +-2.27779366154919316045 +-2.27781415404559384186 +-2.27783459624040540703 +-2.27785498815839293485 +-2.27787532982434504092 +-2.27789562126307210121 +-2.27791586249940625208 +-2.27793605355820227842 +-2.27795619446433761368 +-2.27797628524271233985 +-2.27799632591824741112 +-2.27801631651588731842 +-2.27803625706059831302 +-2.27805614757736751841 +-2.27807598809120781524 +-2.27809577862715073593 +-2.27811551921025134959 +-2.27813520986558737391 +-2.27815485061825873103 +-2.27817444149338621528 +-2.27819398251611460182 +-2.27821347371160998208 +-2.27823291510506020785 +-2.27825230672167622359 +-2.27827164858668984593 +-2.27829094072535687232 +-2.27831018316295397241 +-2.27832937592478090849 +-2.27834851903615698276 +-2.27836761252242681053 +-2.27838665640895587927 +-2.27840565072113232503 +-2.27842459548436471195 +-2.27844349072408602908 +-2.27846233646574969356 +-2.27848113273483177110 +-2.27849987955683008778 +-2.27851857695726556230 +-2.27853722496167909739 +-2.27855582359563646477 +-2.27857437288472297610 +-2.27859287285454836791 +-2.27861132353074147261 +-2.27862972493895510340 +-2.27864807710486516612 +-2.27866638005416577428 +-2.27868463381257768674 +-2.27870283840584031410 +-2.27872099385971660368 +-2.27873910019999037502 +-2.27875715745246898436 +-2.27877516564297977197 +-2.27879312479737361485 +-2.27881103494152315037 +-2.27882889610132233216 +-2.27884670830268820652 +-2.27886447157155691556 +-2.27888218593389080269 +-2.27889985141566997484 +-2.27891746804289896389 +-2.27893503584160495024 +-2.27895255483783287787 +-2.27897002505765522429 +-2.27898744652716045422 +-2.27900481927246412184 +-2.27902214331969998895 +-2.27903941869502579820 +-2.27905664542461972033 +-2.27907382353468257463 +-2.27909095305143694077 +-2.27910803400112671468 +-2.27912506641001888497 +-2.27914205030439953603 +-2.27915898571058006539 +-2.27917587265489052228 +-2.27919271116368538088 +-2.27920950126333776709 +-2.27922624298024567580 +-2.27924293634082753002 +-2.27925958137152262495 +-2.27927617809879334843 +-2.27929272654912251639 +-2.27930922674901692560 +-2.27932567872500158046 +-2.27934208250362724257 +-2.27935843811146154891 +-2.27937474557509833772 +-2.27939100492115098717 +-2.27940721617625463580 +-2.27942337936706573842 +-2.27943949452026295432 +-2.27945556166254714725 +-2.27947158082063916495 +-2.27948755202128205966 +-2.27950347529124108803 +-2.27951935065730326713 +-2.27953517814627648619 +-2.27955095778499039483 +-2.27956668960029507076 +-2.27958237361906501661 +-2.27959800986819249857 +-2.27961359837459420774 +-2.27962913916520859559 +-2.27964463226699232123 +-2.27966007770692691281 +-2.27967547551201343836 +-2.27969082570927517040 +-2.27970612832575714179 +-2.27972138338852481354 +-2.27973659092466629517 +-2.27975175096129056840 +-2.27976686352552659898 +-2.27978192864452866573 +-2.27979694634546747878 +-2.27981191665553861725 +-2.27982683960195808837 +-2.27984171521196321564 +-2.27985654351281130658 +-2.27987132453178453773 +-2.27988605829618284915 +-2.27990074483332882949 +-2.27991538417056682775 +-2.27992997633526162105 +-2.27994452135480019095 +-2.27995901925659039122 +-2.27997347006806005965 +-2.27998787381666057072 +-2.28000223052986372707 +-2.28001654023516175940 +-2.28003080296006777061 +-2.28004501873211884444 +-2.28005918757887027226 +-2.28007330952789910583 +-2.28008738460680504545 +-2.28010141284320777544 +-2.28011539426474829639 +-2.28012932889908892520 +-2.28014321677391329501 +-2.28015705791692457893 +-2.28017085235584948677 +-2.28018460011843471236 +-2.28019830123244737763 +-2.28021195572567680898 +-2.28022556362593187274 +-2.28023912496104497194 +-2.28025263975886716139 +-2.28026610804727125625 +-2.28027952985415183207 +-2.28029290520742344839 +-2.28030623413502242514 +-2.28031951666490551034 +-2.28033275282505076831 +-2.28034594264345624737 +-2.28035908614814308848 +-2.28037218336715019618 +-2.28038523432854134398 +-2.28039823906039806900 +-2.28041119759082322460 +-2.28042410994794142454 +-2.28043697615989904293 +-2.28044979625486066155 +-2.28046257026101351073 +-2.28047529820656569299 +-2.28048798011974529487 +-2.28050061602880127509 +-2.28051320596200524093 +-2.28052574994764611915 +-2.28053824801403681732 +-2.28055070018950889477 +-2.28056310650241611526 +-2.28057546698113311479 +-2.28058778165405229288 +-2.28060005054959091808 +-2.28061227369618313432 +-2.28062445112228839861 +-2.28063658285638126699 +-2.28064866892696027634 +-2.28066070936254439161 +-2.28067270419167344997 +-2.28068465344290549623 +-2.28069655714482255604 +-2.28070841532602441859 +-2.28072022801513396573 +-2.28073199524079228695 +-2.28074371703166223213 +-2.28075539341642663516 +-2.28076702442378964619 +-2.28077861008247584351 +-2.28079015042122890122 +-2.28080164546881469789 +-2.28081309525401776384 +-2.28082449980564572201 +-2.28083585915252395893 +-2.28084717332349917740 +-2.28085844234743939651 +-2.28086966625323173119 +-2.28088084506978550081 +-2.28089197882602734424 +-2.28090306755090654889 +-2.28091411127339327436 +-2.28092511002247499974 +-2.28093606382716318492 +-2.28094697271648705339 +-2.28095783671949714488 +-2.28096865586526353908 +-2.28097943018287763195 +-2.28099015970145080345 +-2.28100084445011308532 +-2.28101148445801671372 +-2.28102207975433302067 +-2.28103263036825376631 +-2.28104313632899158293 +-2.28105359766577819869 +-2.28106401440786576984 +-2.28107438658452732483 +-2.28108471422505321158 +-2.28109499735875820292 +-2.28110523601497483526 +-2.28111543022305518491 +-2.28112558001237131222 +-2.28113568541231659381 +-2.28114574645230394623 +-2.28115576316176671412 +-2.28116573557015689389 +-2.28117566370694735411 +-2.28118554760163094741 +-2.28119538728371962222 +-2.28120518278274664326 +-2.28121493412826392699 +-2.28122464134984470618 +-2.28123430447707997715 +-2.28124392353958249657 +-2.28125349856698367290 +-2.28126302958893578676 +-2.28127251663511110280 +-2.28128195973519964923 +-2.28129135891891277055 +-2.28130071421598090708 +-2.28131002565615625954 +-2.28131929326920879220 +-2.28132851708492756515 +-2.28133769713312251071 +-2.28134683344362443336 +-2.28135592604628101299 +-2.28136497497096168985 +-2.28137398024755588821 +-2.28138294190596990774 +-2.28139185997613269663 +-2.28140073448799007849 +-2.28140956547151185774 +-2.28141835295668160555 +-2.28142709697350642983 +-2.28143579755201120207 +-2.28144445472224077776 +-2.28145306851426132866 +-2.28146163895815545786 +-2.28147016608402619653 +-2.28147864992199744805 +-2.28148709050221176753 +-2.28149548785482858548 +-2.28150384201003220142 +-2.28151215299802068159 +-2.28152042084901562902 +-2.28152864559325418981 +-2.28153682726099660272 +-2.28154496588251953781 +-2.28155306148812053735 +-2.28156111410811623941 +-2.28156912377284282201 +-2.28157709051265378264 +-2.28158501435792393508 +-2.28159289533904674485 +-2.28160073348643432922 +-2.28160852883051923357 +-2.28161628140175176682 +-2.28162399123060222195 +-2.28163165834755776729 +-2.28163928278312999609 +-2.28164686456784338020 +-2.28165440373224592818 +-2.28166190030690163582 +-2.28166935432239625925 +-2.28167676580933331820 +-2.28168413479833454005 +-2.28169146132004074801 +-2.28169874540511408156 +-2.28170598708423311152 +-2.28171318638809639268 +-2.28172034334741979933 +-2.28172745799294052205 +-2.28173453035541440315 +-2.28174156046561371625 +-2.28174854835433249534 +-2.28175549405238076162 +-2.28176239759059029666 +-2.28176925899980886925 +-2.28177607831090512036 +-2.28178285555476589863 +-2.28178959076229492808 +-2.28179628396441858129 +-2.28180293519207788577 +-2.28180954447623429715 +-2.28181611184787014324 +-2.28182263733798063043 +-2.28182912097758494596 +-2.28183556279771959652 +-2.28184196282943885237 +-2.28184832110381430326 +-2.28185463765193929930 +-2.28186091250492273375 +-2.28186714569389392793 +-2.28187333724999996676 +-2.28187948720440614281 +-2.28188559558829817675 +-2.28189166243287644420 +-2.28189768776936263706 +-2.28190367162899576670 +-2.28190961404303394033 +-2.28191551504275347284 +-2.28192137465944844266 +-2.28192719292443157997 +-2.28193296986903337853 +-2.28193870552460342793 +-2.28194439992250952542 +-2.28195005309413812000 +-2.28195566507089253605 +-2.28196123588419341743 +-2.28196676556548405657 +-2.28197225414622106854 +-2.28197770165788149654 +-2.28198310813196059144 +-2.28198847359997136763 +-2.28199379809344460313 +-2.28199908164392928356 +-2.28200432428299215815 +-2.28200952604221907194 +-2.28201468695321363356 +-2.28201980704759588292 +-2.28202488635700451169 +-2.28202992491309863965 +-2.28203492274755204150 +-2.28203987989205714371 +-2.28204479637832546857 +-2.28204967223808585786 +-2.28205450750308402874 +-2.28205930220508523831 +-2.28206405637587117496 +-2.28206877004724129065 +-2.28207344325101368909 +-2.28207807601902423755 +-2.28208266838312567870 +-2.28208722037518851877 +-2.28209173202710147166 +-2.28209620337077145891 +-2.28210063443812183337 +-2.28210502526109415555 +-2.28210937587164730544 +-2.28211368630175881478 +-2.28211795658342131432 +-2.28212218674864741885 +-2.28212637682946617446 +-2.28213052685792527896 +-2.28213463686608841741 +-2.28213870688603615022 +-2.28214273694986946595 +-2.28214672708970400805 +-2.28215067733767451585 +-2.28215458772593127179 +-2.28215845828664232187 +-2.28216228905199569610 +-2.28216608005419407945 +-2.28216983132545658819 +-2.28217354289802321077 +-2.28217721480414770241 +-2.28218084707610247008 +-2.28218443974617768433 +-2.28218799284667950289 +-2.28219150640993184709 +-2.28219498046827551363 +-2.28219841505406995097 +-2.28220181019968926250 +-2.28220516593752575929 +-2.28220848229998907186 +-2.28221175931950481797 +-2.28221499702851726710 +-2.28221819545948667596 +-2.28222135464488973255 +-2.28222447461722222073 +-2.28222755540899280291 +-2.28223059705273190190 +-2.28223359958098370726 +-2.28223656302630928394 +-2.28223948742128879275 +-2.28224237279851571714 +-2.28224521919060441277 +-2.28224802663018166982 +-2.28225079514989470653 +-2.28225352478240584020 +-2.28225621556039470761 +-2.28225886751655560047 +-2.28226148068360190635 +-2.28226405509426300000 +-2.28226659078128424341 +-2.28226908777742742984 +-2.28227154611547300433 +-2.28227396582821429050 +-2.28227634694846548413 +-2.28227868950905232737 +-2.28228099354282143452 +-2.28228325908263451893 +-2.28228548616136839300 +-2.28228767481191718858 +-2.28228982506719235701 +-2.28229193696011956050 +-2.28229401052364222480 +-2.28229604579072065107 +-2.28229804279433068359 +-2.28230000156746282158 +-2.28230192214312710419 +-2.28230380455434733733 +-2.28230564883416509048 +-2.28230745501563614397 +-2.28230922313183404171 +-2.28231095321584742663 +-2.28231264530078270525 +-2.28231429941976005082 +-2.28231591560591740020 +-2.28231749389240823334 +-2.28231903431240157332 +-2.28232053689908376271 +-2.28232200168565491083 +-2.28232342870533289059 +-2.28232481799135156209 +-2.28232616957695944038 +-2.28232748349542058364 +-2.28232875978001681361 +-2.28232999846404549515 +-2.28233119958081731582 +-2.28233236316366117080 +-2.28233348924592149842 +-2.28233457786095739195 +-2.28233562904214482003 +-2.28233664282287351810 +-2.28233761923655098514 +-2.28233855831659981916 +-2.28233946009645682906 +-2.28234032460957658728 +-2.28234115188942698893 +-2.28234194196949369271 +-2.28234269488327567998 +-2.28234341066428880751 +-2.28234408934606358699 +-2.28234473096214696142 +-2.28234533554610097283 +-2.28234590313150231822 +-2.28234643375194279358 +-2.28234692744103062623 +-2.28234738423239003069 +-2.28234780415965765599 +-2.28234818725648880289 +-2.28234853355655120666 +-2.28234884309352858978 +-2.28234911590112155011 +-2.28234935201304356411 +-2.28234955146302276319 +-2.28234971428480726274 +-2.28234984051215405998 +-2.28234993017883880384 +-2.28234998331865046595 +-2.28234999996539489331 +-2.28234998014945666966 +-2.28234992388917534711 +-2.28234983120024015335 +-2.28234970209833232246 +-2.28234953659912864765 +-2.28234933471829704033 +-2.28234909647149919465 +-2.28234882187438925527 +-2.28234851094261559368 +-2.28234816369181814366 +-2.28234778013763195403 +-2.28234736029568274773 +-2.28234690418159047454 +-2.28234641181096931106 +-2.28234588319942455215 +-2.28234531836255483128 +-2.28234471731595478516 +-2.28234408007520794825 +-2.28234340665589252595 +-2.28234269707358228274 +-2.28234195134384032499 +-2.28234116948222487409 +-2.28234035150428704597 +-2.28233949742557085116 +-2.28233860726161275068 +-2.28233768102794565280 +-2.28233671874009047542 +-2.28233572041356413962 +-2.28233468606387779332 +-2.28233361570653325856 +-2.28233250935702658424 +-2.28233136703084671382 +-2.28233018874347681759 +-2.28232897451039162817 +-2.28232772434705921683 +-2.28232643826894232575 +-2.28232511629149525945 +-2.28232375843016699335 +-2.28232236470039762111 +-2.28232093511762101912 +-2.28231946969726662289 +-2.28231796845475409796 +-2.28231643140549600446 +-2.28231485856490090569 +-2.28231324994836937137 +-2.28231160557129308941 +-2.28230992544906063912 +-2.28230820959704994166 +-2.28230645803063492139 +-2.28230467076518195313 +-2.28230284781604852995 +-2.28230098919859036855 +-2.28229909492814941885 +-2.28229716502006718670 +-2.28229519948967540799 +-2.28229319835229871316 +-2.28229116162325595951 +-2.28228908931785845482 +-2.28228698145141217779 +-2.28228483803921422535 +-2.28228265909655636534 +-2.28228044463872326020 +-2.28227819468099291100 +-2.28227590923863532524 +-2.28227358832691651358 +-2.28227123196109227266 +-2.28226884015641484638 +-2.28226641292812670869 +-2.28226395029146678084 +-2.28226145226166465818 +-2.28225891885394327474 +-2.28225635008352067956 +-2.28225374596560692808 +-2.28225110651540497031 +-2.28224843174811242719 +-2.28224572167891937013 +-2.28224297632300698879 +-2.28224019569555469644 +-2.28223737981172947187 +-2.28223452868669696159 +-2.28223164233561304215 +-2.28222872077362515242 +-2.28222576401587762263 +-2.28222277207750723349 +-2.28221974497364366030 +-2.28221668271940769657 +-2.28221358532991658308 +-2.28221045282028045520 +-2.28220728520560012242 +-2.28220408250097328562 +-2.28220084472148831978 +-2.28219757188222738264 +-2.28219426399826685881 +-2.28219092108467558333 +-2.28218754315651661813 +-2.28218413022884547559 +-2.28218068231671056267 +-2.28217719943515495729 +-2.28217368159921463189 +-2.28217012882391756534 +-2.28216654112428818379 +-2.28216291851533936708 +-2.28215926101208221866 +-2.28215556862951807204 +-2.28215184138264381986 +-2.28214807928644658475 +-2.28214428235591126892 +-2.28214045060601167236 +-2.28213658405171759824 +-2.28213268270799174431 +-2.28212874658978881470 +-2.28212477571205951676 +-2.28212077008974567605 +-2.28211672973778334494 +-2.28211265467110058225 +-2.28210854490462189403 +-2.28210440045326157232 +-2.28210022133192991234 +-2.28209600755553010387 +-2.28209175913895645493 +-2.28208747609710016491 +-2.28208315844484355139 +-2.28207880619706315883 +-2.28207441936862798215 +-2.28206999797440124311 +-2.28206554202923950214 +-2.28206105154799310242 +-2.28205652654550394942 +-2.28205196703661039592 +-2.28204737303614058064 +-2.28204274455891997775 +-2.28203808161976340330 +-2.28203338423348256470 +-2.28202865241487984349 +-2.28202388617875362442 +-2.28201908553989474271 +-2.28201425051308426362 +-2.28200938111310280831 +-2.28200447735471989574 +-2.28199953925269971577 +-2.28199456682180024103 +-2.28198956007677145053 +-2.28198451903235888238 +-2.28197944370330008113 +-2.28197433410432681811 +-2.28196919025016287108 +-2.28196401215552890918 +-2.28195879983513316702 +-2.28195355330368387925 +-2.28194827257587817826 +-2.28194295766640875556 +-2.28193760858996075314 +-2.28193222536121398392 +-2.28192680799483937903 +-2.28192135650550431691 +-2.28191587090786729419 +-2.28191035121658236662 +-2.28190479744629426406 +-2.28189920961164416369 +-2.28189358772726569313 +-2.28188793180778448644 +-2.28188224186782129266 +-2.28187651792198931133 +-2.28187075998489730111 +-2.28186496807114469476 +-2.28185914219532648417 +-2.28185328237202966761 +-2.28184738861583635838 +-2.28184146094132156435 +-2.28183549936305141159 +-2.28182950389559069393 +-2.28182347455349310295 +-2.28181741135130700115 +-2.28181131430357542200 +-2.28180518342483473759 +-2.28179901872961288234 +-2.28179282023243334976 +-2.28178658794781297203 +-2.28178032189026147591 +-2.28177402207428281500 +-2.28176768851437250518 +-2.28176132122502250965 +-2.28175492022071590981 +-2.28174848551593090207 +-2.28174201712513902152 +-2.28173551506280380963 +-2.28172897934338481107 +-2.28172240998133313283 +-2.28171580699109322055 +-2.28170917038710641123 +-2.28170250018380249557 +-2.28169579639560948792 +-2.28168905903694563264 +-2.28168228812222517732 +-2.28167548366585348774 +-2.28166864568223237697 +-2.28166177418575522040 +-2.28165486919080917616 +-2.28164793071177518513 +-2.28164095876302708277 +-2.28163395335893426363 +-2.28162691451385901686 +-2.28161984224215519390 +-2.28161273655817220529 +-2.28160559747625280025 +-2.28159842501073262255 +-2.28159121917594287510 +-2.28158397998620499081 +-2.28157670745583729399 +-2.28156940159914878308 +-2.28156206243044534787 +-2.28155468996402310822 +-2.28154728421417463124 +-2.28153984519518493457 +-2.28153237292133104219 +-2.28152486740688642541 +-2.28151732866611700601 +-2.28150975671328160033 +-2.28150215156263458383 +-2.28149451322842011791 +-2.28148684172488103172 +-2.28147913706624994035 +-2.28147139926675501798 +-2.28146362834061688929 +-2.28145582430205084989 +-2.28144798716526553406 +-2.28144011694446291472 +-2.28143221365383874755 +-2.28142427730758257098 +-2.28141630791987681803 +-2.28140830550490036899 +-2.28140027007682100191 +-2.28139220164980427441 +-2.28138410023800819459 +-2.28137596585558233286 +-2.28136779851667315100 +-2.28135959823542000535 +-2.28135136502595470276 +-2.28134309890240194463 +-2.28133479987888332374 +-2.28132646796951199519 +-2.28131810318839489682 +-2.28130970554963186103 +-2.28130127506731827935 +-2.28129281175554199379 +-2.28128431562838507318 +-2.28127578669992381322 +-2.28126722498422607188 +-2.28125863049535526628 +-2.28125000324736859625 +-2.28124134325431482395 +-2.28123265053024049109 +-2.28122392508918148124 +-2.28121516694516923707 +-2.28120637611223076036 +-2.28119755260438195066 +-2.28118869643563870753 +-2.28117980762000449602 +-2.28117088617148056073 +-2.28116193210406192904 +-2.28115294543173341424 +-2.28114392616847894146 +-2.28113487432827133361 +-2.28112578992507941678 +-2.28111667297286668799 +-2.28110752348558865066 +-2.28109834147719592323 +-2.28108912696163024236 +-2.28107987995283201244 +-2.28107060046472964743 +-2.28106128851124889678 +-2.28105194410630884860 +-2.28104256726382104148 +-2.28103315799769212902 +-2.28102371632182121530 +-2.28101424225010340763 +-2.28100473579642448740 +-2.28099519697466712742 +-2.28098562579870467459 +-2.28097602228240647904 +-2.28096638643963567361 +-2.28095671828424695349 +-2.28094701783009234930 +-2.28093728509101367763 +-2.28092752008084920234 +-2.28091772281343141415 +-2.28090789330258347789 +-2.28089803156212544977 +-2.28088813760586939239 +-2.28087821144762292747 +-2.28086825310118568311 +-2.28085826258035151426 +-2.28084823989890761453 +-2.28083818507063806891 +-2.28082809810931497196 +-2.28081797902871041828 +-2.28080782784258673246 +-2.28079764456470002187 +-2.28078742920880195300 +-2.28077718178863575460 +-2.28076690231794110275 +-2.28075659081044923582 +-2.28074624727988695128 +-2.28073587173997394117 +-2.28072546420442279214 +-2.28071502468694209398 +-2.28070455320123288701 +-2.28069404976098955018 +-2.28068351437990157748 +-2.28067294707165135748 +-2.28066234784991683782 +-2.28065171672836619621 +-2.28064105372066627808 +-2.28063035884047282664 +-2.28061963210143892056 +-2.28060887351721142124 +-2.28059808310142786425 +-2.28058726086772356467 +-2.28057640682972495583 +-2.28056552100105358605 +-2.28055460339532434233 +-2.28054365402614678260 +-2.28053267290712247117 +-2.28052166005184941966 +-2.28051061547391809015 +-2.28049953918691228338 +-2.28048843120441047105 +-2.28047729153998490759 +-2.28046612020720207425 +-2.28045491721962045872 +-2.28044368259079632821 +-2.28043241633427573589 +-2.28042111846359940586 +-2.28040978899230450949 +-2.28039842793391978049 +-2.28038703530196862346 +-2.28037561110996822578 +-2.28036415537142911347 +-2.28035266809985737169 +-2.28034114930875020377 +-2.28032959901160214855 +-2.28031801722189797488 +-2.28030640395312023117 +-2.28029475921874125177 +-2.28028308303223115061 +-2.28027137540705160390 +-2.28025963635665895879 +-2.28024786589450378926 +-2.28023606403402867571 +-2.28022423078867308988 +-2.28021236617186895401 +-2.28020047019704019675 +-2.28018854287760852628 +-2.28017658422698632492 +-2.28016459425858286636 +-2.28015257298579809841 +-2.28014052042202886028 +-2.28012843658066266528 +-2.28011632147508525037 +-2.28010417511867258256 +-2.28009199752479485568 +-2.28007978870682004313 +-2.28006754867810501608 +-2.28005527745200309298 +-2.28004297504186315138 +-2.28003064146102429888 +-2.28001827672282209036 +-2.28000588084058586347 +-2.27999345382763829448 +-2.27998099569729539837 +-2.27996850646286919329 +-2.27995598613766414786 +-2.27994343473497851349 +-2.27993085226810610067 +-2.27991823875033228219 +-2.27990559419493976634 +-2.27989291861520060323 +-2.27988021202438551072 +-2.27986747443575632488 +-2.27985470586256866454 +-2.27984190631807592808 +-2.27982907581551996756 +-2.27981621436813997050 +-2.27980332198916979536 +-2.27979039869183530698 +-2.27977744448935659705 +-2.27976445939494887227 +-2.27975144342182067803 +-2.27973839658317434242 +-2.27972531889220686452 +-2.27971221036210724975 +-2.27969907100606317130 +-2.27968590083725075601 +-2.27967269986884346622 +-2.27965946811400765881 +-2.27964620558590436161 +-2.27963291229768749702 +-2.27961958826250787880 +-2.27960623349350566258 +-2.27959284800381967173 +-2.27957943180657940374 +-2.27956598491490991520 +-2.27955250734192960138 +-2.27953899910075330482 +-2.27952546020448609809 +-2.27951189066622950108 +-2.27949829049907881640 +-2.27948465971612268532 +-2.27947099833044486417 +-2.27945730635512155970 +-2.27944358380322453783 +-2.27942983068781934719 +-2.27941604702196443100 +-2.27940223281871334748 +-2.27938838809111432582 +-2.27937451285220848973 +-2.27936060711503163390 +-2.27934667089261155937 +-2.27933270419797473494 +-2.27931870704413652717 +-2.27930467944411052628 +-2.27929062141090144067 +-2.27927653295750864970 +-2.27926241409692842410 +-2.27924826484214726463 +-2.27923408520614767525 +-2.27921987520190549859 +-2.27920563484239080410 +-2.27919136414057010853 +-2.27917706310939971459 +-2.27916273176183326044 +-2.27914837011081727880 +-2.27913397816929208517 +-2.27911955595019355414 +-2.27910510346644956670 +-2.27909062073098445111 +-2.27907610775671365388 +-2.27906156455654995696 +-2.27904699114339814869 +-2.27903238753015857654 +-2.27901775372972315026 +-2.27900308975498155917 +-2.27898839561881461080 +-2.27897367133409778361 +-2.27895891691370211518 +-2.27894413237049020537 +-2.27892931771732243362 +-2.27891447296705029757 +-2.27889959813252085397 +-2.27888469322657361005 +-2.27886975826204407625 +-2.27885479325176154575 +-2.27883979820854865039 +-2.27882477314522180478 +-2.27880971807459431488 +-2.27879463300947016080 +-2.27877951796265021400 +-2.27876437294692602009 +-2.27874919797508779240 +-2.27873399305991641839 +-2.27871875821418790053 +-2.27870349345067380042 +-2.27868819878213813013 +-2.27867287422133824037 +-2.27865751978102881736 +-2.27864213547395610959 +-2.27862672131286014832 +-2.27861127731047830025 +-2.27859580347953860624 +-2.27858029983276466623 +-2.27856476638287563929 +-2.27854920314258180269 +-2.27853361012458988100 +-2.27851798734160038151 +-2.27850233480630759431 +-2.27848665253140092446 +-2.27847094052956178345 +-2.27845519881346847413 +-2.27843942739579219392 +-2.27842362628919747891 +-2.27840779550634353612 +-2.27839193505988601984 +-2.27837604496247081443 +-2.27836012522674113967 +-2.27834417586533310995 +-2.27832819689087795467 +-2.27831218831599935370 +-2.27829615015331699013 +-2.27828008241544299750 +-2.27826398511498595667 +-2.27824785826454689897 +-2.27823170187672063847 +-2.27821551596409888063 +-2.27819930053926489322 +-2.27818305561479705901 +-2.27816678120326754353 +-2.27815047731724362734 +-2.27813414396928681782 +-2.27811778117195196103 +-2.27810138893778812985 +-2.27808496727933906811 +-2.27806851620914407874 +-2.27805203573973358289 +-2.27803552588363444897 +-2.27801898665336732819 +-2.27800241806144754264 +-2.27798582012038375311 +-2.27796919284267973538 +-2.27795253624083215982 +-2.27793585032733369999 +-2.27791913511466992404 +-2.27790239061532151510 +-2.27788561684176160682 +-2.27786881380646155648 +-2.27785198152188250731 +-2.27783512000048249391 +-2.27781822925471288954 +-2.27780130929701973841 +-2.27778436013984109110 +-2.27776738179561499820 +-2.27775037427676574353 +-2.27773333759571983137 +-2.27771627176489310784 +-2.27769917679669609001 +-2.27768205270353574221 +-2.27766489949781059110 +-2.27764771719191605470 +-2.27763050579824000152 +-2.27761326532916452692 +-2.27759599579706861761 +-2.27757869721432149035 +-2.27756136959329058556 +-2.27754401294633312958 +-2.27752662728580590468 +-2.27750921262405636725 +-2.27749176897342708870 +-2.27747429634625531136 +-2.27745679475487161625 +-2.27743926421160169937 +-2.27742170472876725995 +-2.27740411631868022724 +-2.27738649899364986595 +-2.27736885276597966765 +-2.27735117764796601847 +-2.27733347365189997547 +-2.27731574079006859890 +-2.27729797907475051133 +-2.27728018851822033852 +-2.27726236913274737717 +-2.27724452093059426261 +-2.27722664392401741296 +-2.27720873812526924951 +-2.27719080354659597631 +-2.27717284020023669200 +-2.27715484809842694247 +-2.27713682725339561230 +-2.27711877767736492473 +-2.27710069938255310618 +-2.27708259238117216583 +-2.27706445668542789562 +-2.27704629230752120250 +-2.27702809925964633209 +-2.27700987755399353318 +-2.27699162720274461691 +-2.27697334821807961802 +-2.27695504061216880132 +-2.27693670439717932297 +-2.27691833958527301007 +-2.27689994618860369613 +-2.27688152421932255010 +-2.27686307368957230324 +-2.27684459461149169002 +-2.27682608699721278356 +-2.27680755085886410427 +-2.27678898620856529078 +-2.27677039305843287309 +-2.27675177142057716395 +-2.27673312130710181478 +-2.27671444273010692427 +-2.27669573570168459753 +-2.27667700023392205466 +-2.27665823633890296307 +-2.27663944402870166428 +-2.27662062331539072346 +-2.27660177421103293582 +-2.27658289672768932022 +-2.27656399087741423415 +-2.27654505667225448562 +-2.27652609412425288582 +-2.27650710324544647278 +-2.27648808404786784365 +-2.27646903654354071378 +-2.27644996074448613399 +-2.27643085666271893786 +-2.27641172431024685352 +-2.27639256369907494459 +-2.27637337484119983699 +-2.27635415774861327165 +-2.27633491243330210452 +-2.27631563890724830657 +-2.27629633718242496698 +-2.27627700727080384269 +-2.27625764918434825290 +-2.27623826293501574369 +-2.27621884853476164068 +-2.27619940599553016725 +-2.27617993532926643496 +-2.27616043654790312090 +-2.27614090966337334621 +-2.27612135468760179435 +-2.27610177163250648746 +-2.27608216051000278313 +-2.27606252133199804533 +-2.27604285411039564124 +-2.27602315885709183263 +-2.27600343558397888444 +-2.27598368430294284437 +-2.27596390502586265470 +-2.27594409776461548134 +-2.27592426253106872025 +-2.27590439933708710285 +-2.27588450819452869922 +-2.27586458911524580628 +-2.27584464211108494780 +-2.27582466719388865073 +-2.27580466437549189251 +-2.27578463366772609788 +-2.27576457508241514205 +-2.27574448863137890342 +-2.27572437432643015498 +-2.27570423217937811700 +-2.27568406220202534840 +-2.27566386440616819087 +-2.27564363880359898928 +-2.27562338540610253901 +-2.27560310422546141496 +-2.27558279527344886617 +-2.27556245856183503307 +-2.27554209410238295064 +-2.27552170190685165707 +-2.27550128198699308513 +-2.27548083435455605894 +-2.27546035902128140904 +-2.27543985599890552507 +-2.27541932529915902350 +-2.27539876693376630357 +-2.27537818091444821178 +-2.27535756725291804514 +-2.27533692596088554794 +-2.27531625705005291493 +-2.27529556053211790001 +-2.27527483641877292797 +-2.27525408472170465046 +-2.27523330545259261370 +-2.27521249862311369938 +-2.27519166424493857193 +-2.27517080232972990217 +-2.27514991288914858458 +-2.27512899593484796412 +-2.27510805147847516849 +-2.27508707953167288451 +-2.27506608010607758175 +-2.27504505321332350931 +-2.27502399886503337001 +-2.27500291707283031073 +-2.27498180784832726431 +-2.27496067120313583132 +-2.27493950714885917463 +-2.27491831569709601624 +-2.27489709685943930495 +-2.27487585064747666053 +-2.27485457707279081774 +-2.27483327614695873820 +-2.27481194788155027808 +-2.27479059228813218496 +-2.27476920937826498914 +-2.27474779916350255959 +-2.27472636165539565667 +-2.27470489686548749120 +-2.27468340480531550085 +-2.27466188548641401468 +-2.27464033892030981221 +-2.27461876511852434390 +-2.27459716409257595160 +-2.27457553585397453944 +-2.27455388041422557066 +-2.27453219778483006763 +-2.27451048797728150319 +-2.27448875100307112973 +-2.27446698687368131786 +-2.27444519560058955321 +-2.27442337719527110096 +-2.27440153166919190042 +-2.27437965903381522637 +-2.27435775930059680405 +-2.27433583248098747376 +-2.27431387858643319078 +-2.27429189762837546951 +-2.27426988961824738666 +-2.27424785456747935442 +-2.27422579248749556768 +-2.27420370338971311597 +-2.27418158728554731240 +-2.27415944418640414426 +-2.27413727410368737836 +-2.27411507704879101155 +-2.27409285303310992887 +-2.27407060206802880131 +-2.27404832416492652669 +-2.27402601933518111466 +-2.27400368759016036080 +-2.27398132894122984027 +-2.27395894339974757870 +-2.27393653097706627264 +-2.27391409168453639822 +-2.27389162553349866158 +-2.27386913253529110435 +-2.27384661270124555088 +-2.27382406604268805239 +-2.27380149257093977511 +-2.27377889229731611209 +-2.27375626523312934779 +-2.27373361138968155259 +-2.27371093077827390871 +-2.27368822341019916067 +-2.27366548929674738844 +-2.27364272844920067840 +-2.27361994087883756421 +-2.27359712659693080639 +-2.27357428561474650408 +-2.27355141794354809193 +-2.27352852359458879050 +-2.27350560257912270856 +-2.27348265490839507308 +-2.27345968059364400560 +-2.27343667964610629539 +-2.27341365207700940587 +-2.27339059789757946817 +-2.27336751711903417572 +-2.27334440975258722517 +-2.27332127580944609591 +-2.27329811530081205007 +-2.27327492823788546161 +-2.27325171463185604637 +-2.27322847449390996744 +-2.27320520783522894703 +-2.27318191466698982239 +-2.27315859500036054897 +-2.27313524884650863811 +-2.27311187621659138713 +-2.27308847712176476108 +-2.27306505157317717547 +-2.27304159958197260494 +-2.27301812115928836278 +-2.27299461631625776548 +-2.27297108506400880046 +-2.27294752741366279380 +-2.27292394337633840706 +-2.27290033296314453182 +-2.27287669618518828329 +-2.27285303305357189174 +-2.27282934357938914971 +-2.27280562777373118521 +-2.27278188564768202085 +-2.27275811721232168239 +-2.27273432247872309020 +-2.27271050145795694419 +-2.27268665416108550659 +-2.27266278059916748688 +-2.27263888078325493325 +-2.27261495472439589705 +-2.27259100243363265648 +-2.27256702392200082841 +-2.27254301920053380925 +-2.27251898828025611365 +-2.27249493117218914762 +-2.27247084788734943217 +-2.27244673843674593883 +-2.27242260283138408639 +-2.27239844108226352049 +-2.27237425320037811360 +-2.27235003919671685324 +-2.27232579908226428600 +-2.27230153286799740897 +-2.27227724056489055471 +-2.27225292218391095034 +-2.27222857773602049392 +-2.27220420723217708669 +-2.27217981068333285677 +-2.27215538810043193862 +-2.27213093949441935493 +-2.27210646487622813794 +-2.27208196425679087582 +-2.27205743764703038678 +-2.27203288505786948903 +-2.27200830650022167490 +-2.27198370198499599582 +-2.27195907152309661825 +-2.27193441512542460003 +-2.27190973280287122904 +-2.27188502456632512860 +-2.27186029042667048117 +-2.27183553039478391966 +-2.27181074448153896839 +-2.27178593269780160213 +-2.27176109505443513115 +-2.27173623156229442799 +-2.27171134223223347703 +-2.27168642707509649270 +-2.27166148610172413669 +-2.27163651932295307390 +-2.27161152674961330789 +-2.27158650839252995723 +-2.27156146426252236736 +-2.27153639437040544280 +-2.27151129872698787082 +-2.27148617734307434191 +-2.27146103022946288519 +-2.27143585739694797709 +-2.27141065885631832089 +-2.27138543461835462622 +-2.27136018469383582641 +-2.27133490909353508158 +-2.27130960782821844646 +-2.27128428090864797895 +-2.27125892834558218425 +-2.27123355014977024169 +-2.27120814633195955423 +-2.27118271690289086351 +-2.27115726187329958208 +-2.27113178125391668161 +-2.27110627505546736060 +-2.27108074328867193259 +-2.27105518596424404976 +-2.27102960309289514385 +-2.27100399468532732072 +-2.27097836075224090990 +-2.27095270130433002365 +-2.27092701635228211288 +-2.27090130590678240807 +-2.27087556997850725793 +-2.27084980857812990251 +-2.27082402171631914101 +-2.27079820940373711124 +-2.27077237165104062200 +-2.27074650846888204114 +-2.27072061986790707522 +-2.27069470585875921032 +-2.27066876645207305074 +-2.27064280165848186854 +-2.27061681148860960988 +-2.27059079595307888866 +-2.27056475506250343699 +-2.27053868882749387836 +-2.27051259725865728356 +-2.27048648036659139748 +-2.27046033816189174459 +-2.27043417065514763209 +-2.27040797785694348221 +-2.27038175977785883219 +-2.27035551642846744613 +-2.27032924781933731495 +-2.27030295396103287686 +-2.27027663486411190874 +-2.27025029053912730248 +-2.27022392099662750908 +-2.27019752624715565048 +-2.27017110630124907544 +-2.27014466116943935958 +-2.27011819086225408171 +-2.27009169539021593565 +-2.27006517476384050980 +-2.27003862899364028394 +-2.27001205809012152059 +-2.26998546206378515322 +-2.26995884092512767438 +-2.26993219468463980348 +-2.26990552335280737495 +-2.26987882694011133822 +-2.26985210545702642548 +-2.26982535891402292805 +-2.26979858732156580814 +-2.26977179069011514301 +-2.26974496903012656901 +-2.26971812235204772890 +-2.26969125066632360088 +-2.26966435398339472229 +-2.26963743231369408093 +-2.26961048566765022372 +-2.26958351405568770076 +-2.26955651748822351266 +-2.26952949597567288365 +-2.26950244952844348845 +-2.26947537815693767271 +-2.26944828187155467347 +-2.26942116068268573414 +-2.26939401460071987771 +-2.26936684363603857761 +-2.26933964779901975461 +-2.26931242710003555629 +-2.26928518154945324525 +-2.26925791115763431094 +-2.26923061593493446964 +-2.26920329589170766127 +-2.26917595103829849990 +-2.26914858138504849094 +-2.26912118694229425486 +-2.26909376772036752712 +-2.26906632372959338184 +-2.26903885498029245227 +-2.26901136148278048665 +-2.26898384324736879236 +-2.26895630028436157133 +-2.26892873260405991687 +-2.26890114021675826095 +-2.26887352313274748283 +-2.26884588136231180044 +-2.26881821491573187899 +-2.26879052380328127825 +-2.26876280803522956120 +-2.26873506762184140584 +-2.26870730257337616109 +-2.26867951290008784682 +-2.26865169861222559788 +-2.26862385972003366419 +-2.26859599623375007837 +-2.26856810816360843219 +-2.26854019551983876468 +-2.26851225831266356536 +-2.26848429655230177104 +-2.26845631024896521311 +-2.26842829941286305839 +-2.26840026405419870059 +-2.26837220418316976023 +-2.26834411980996852876 +-2.26831601094478330083 +-2.26828787759779615385 +-2.26825971977918605660 +-2.26823153749912398425 +-2.26820333076777780335 +-2.26817509959531005137 +-2.26814684399187793673 +-2.26811856396763200650 +-2.26809025953272191956 +-2.26806193069728712075 +-2.26803357747146616674 +-2.26800519986539006467 +-2.26797679788918449262 +-2.26794837155297290820 +-2.26791992086686944319 +-2.26789144584098822932 +-2.26786294648543407249 +-2.26783442281030778176 +-2.26780587482570572533 +-2.26777730254172027458 +-2.26774870596843580728 +-2.26772008511593448077 +-2.26769143999429090286 +-2.26766277061357657274 +-2.26763407698385721645 +-2.26760535911519323093 +-2.26757661701764057227 +-2.26754785070124986746 +-2.26751906017606552624 +-2.26749024545212884973 +-2.26746140653947492183 +-2.26743254344813394141 +-2.26740365618813122239 +-2.26737474476948630553 +-2.26734580920221562295 +-2.26731684949632850135 +-2.26728786566182893836 +-2.26725885770871782299 +-2.26722982564698938290 +-2.26720076948663429306 +-2.26717168923763745525 +-2.26714258490997755402 +-2.26711345651362972120 +-2.26708430405856375955 +-2.26705512755474281050 +-2.26702592701212912729 +-2.26699670244067519320 +-2.26696745385033038289 +-2.26693818125103963013 +-2.26690888465274298369 +-2.26687956406537294285 +-2.26685021949886111869 +-2.26682085096312979644 +-2.26679145846809859677 +-2.26676204202368314355 +-2.26673260163979017889 +-2.26670313732632644488 +-2.26667364909318802546 +-2.26664413695027233686 +-2.26661460090746613716 +-2.26658504097465351990 +-2.26655545716171413773 +-2.26652584947852142605 +-2.26649621793494482347 +-2.26646656254084799542 +-2.26643688330608972237 +-2.26640718024052478796 +-2.26637745335400087043 +-2.26634770265636165121 +-2.26631792815744681491 +-2.26628812986708982891 +-2.26625830779512016377 +-2.26622846195136018466 +-2.26619859234562959216 +-2.26616869898774231373 +-2.26613878188750694775 +-2.26610884105472631944 +-2.26607887649920014539 +-2.26604888823072281312 +-2.26601887625908204882 +-2.26598884059406069369 +-2.26595878124543936849 +-2.26592869822299114446 +-2.26589859153648420786 +-2.26586846119568319224 +-2.26583830721034562572 +-2.26580812959022681596 +-2.26577792834507407704 +-2.26574770348463205849 +-2.26571745501863963668 +-2.26568718295682947073 +-2.26565688730893111114 +-2.26562656808466922342 +-2.26559622529376136768 +-2.26556585894592155128 +-2.26553546905085889662 +-2.26550505561827764112 +-2.26547461865787536084 +-2.26544415817934741142 +-2.26541367419238248715 +-2.26538316670666350916 +-2.26535263573187073405 +-2.26532208127767775707 +-2.26529150335375284442 +-2.26526090196976070956 +-2.26523027713536029282 +-2.26519962886020609361 +-2.26516895715394639410 +-2.26513826202622592376 +-2.26510754348668497116 +-2.26507680154495494307 +-2.26504603621066813446 +-2.26501524749344662624 +-2.26498443540291116705 +-2.26495359994867584419 +-2.26492274114034852772 +-2.26489185898753575543 +-2.26486095349983651559 +-2.26483002468684491149 +-2.26479907255815060552 +-2.26476809712333881919 +-2.26473709839198766858 +-2.26470607637367349341 +-2.26467503107796597206 +-2.26464396251442900976 +-2.26461287069262295901 +-2.26458175562210239917 +-2.26455061731241746870 +-2.26451945577311430924 +-2.26448827101373151294 +-2.26445706304380500740 +-2.26442583187286494706 +-2.26439457751043615730 +-2.26436329996603857850 +-2.26433199924918948653 +-2.26430067536939771955 +-2.26426932833616856300 +-2.26423795815900374961 +-2.26420656484739790670 +-2.26417514841084299704 +-2.26414370885882298978 +-2.26411224620082007775 +-2.26408076044630934831 +-2.26404925160476366841 +-2.26401771968564702320 +-2.26398616469842028920 +-2.26395458665254167840 +-2.26392298555746140920 +-2.26389136142262481499 +-2.26385971425747545283 +-2.26382804407144888614 +-2.26379635087397534932 +-2.26376463467448374445 +-2.26373289548239453595 +-2.26370113330712507960 +-2.26366934815808695802 +-2.26363754004468731296 +-2.26360570897632795706 +-2.26357385496240670619 +-2.26354197801231604714 +-2.26351007813544224945 +-2.26347815534116847402 +-2.26344620963887255272 +-2.26341424103792610012 +-2.26338224954769851038 +-2.26335023517755073996 +-2.26331819793684196895 +-2.26328613783492471612 +-2.26325405488114794750 +-2.26322194908485441189 +-2.26318982045538241721 +-2.26315766900206449819 +-2.26312549473423008095 +-2.26309329766120326255 +-2.26306107779230236687 +-2.26302883513684083283 +-2.26299656970412810253 +-2.26296428150346828900 +-2.26293197054416062031 +-2.26289963683549988360 +-2.26286728038677376063 +-2.26283490120726904493 +-2.26280249930626320420 +-2.26277007469303192977 +-2.26273762737684602797 +-2.26270515736697008791 +-2.26267266467266336960 +-2.26264014930318158036 +-2.26260761126777465435 +-2.26257505057568941709 +-2.26254246723616514458 +-2.26250986125843889241 +-2.26247723265173972251 +-2.26244458142529536460 +-2.26241190758832555474 +-2.26237921115004692041 +-2.26234649211967164817 +-2.26231375050640570734 +-2.26228098631945062635 +-2.26224819956800393683 +-2.26221539026125562089 +-2.26218255840839477244 +-2.26214970401860249183 +-2.26211682710105721483 +-2.26208392766492982773 +-2.26205100571938855225 +-2.26201806127359672516 +-2.26198509433671191005 +-2.26195210491788678553 +-2.26191909302626958933 +-2.26188605867100456237 +-2.26185300186122884014 +-2.26181992260607644951 +-2.26178682091467697646 +-2.26175369679615378971 +-2.26172055025962537300 +-2.26168738131420576920 +-2.26165418996900635662 +-2.26162097623313007588 +-2.26158774011567631490 +-2.26155448162574135296 +-2.26152120077241258755 +-2.26148789756477830437 +-2.26145457201191701913 +-2.26142122412290502709 +-2.26138785390681151810 +-2.26135446137270390565 +-2.26132104652964205371 +-2.26128760938668316172 +-2.26125414995287776776 +-2.26122066823727196905 +-2.26118716424890919825 +-2.26115363799682356216 +-2.26112008949004872349 +-2.26108651873761168360 +-2.26105292574853500298 +-2.26101931053183458076 +-2.26098567309652498381 +-2.26095201345161411766 +-2.26091833160610278242 +-2.26088462756899133410 +-2.26085090134927213512 +-2.26081715295593399517 +-2.26078338239796083897 +-2.26074958968433126216 +-2.26071577482401897541 +-2.26068193782599413666 +-2.26064807869922068662 +-2.26061419745265856918 +-2.26058029409526239917 +-2.26054636863598190644 +-2.26051242108376282403 +-2.26047845144754555591 +-2.26044445973626562107 +-2.26041044595885320945 +-2.26037641012423495823 +-2.26034235224133128739 +-2.26030827231905995234 +-2.26027417036633115899 +-2.26024004639205289280 +-2.26020590040512603380 +-2.26017173241444746523 +-2.26013754242891140578 +-2.26010333045740274827 +-2.26006909650880682960 +-2.26003484059200099310 +-2.26000056271585725298 +-2.25996626288924407078 +-2.25993194112102591120 +-2.25989759742006146581 +-2.25986323179520498527 +-2.25982884425530494710 +-2.25979443480920627607 +-2.25976000346574856792 +-2.25972555023376564520 +-2.25969107512208911004 +-2.25965657813954345912 +-2.25962205929494919232 +-2.25958751859712192456 +-2.25955295605487371802 +-2.25951837167700864129 +-2.25948376547232987477 +-2.25944913744963349345 +-2.25941448761771113141 +-2.25937981598534953775 +-2.25934512256133146479 +-2.25931040735443433576 +-2.25927567037343068890 +-2.25924091162708817748 +-2.25920613112417001389 +-2.25917132887343585779 +-2.25913650488363781932 +-2.25910165916352490001 +-2.25906679172184254867 +-2.25903190256732866459 +-2.25899699170871848253 +-2.25896205915474146408 +-2.25892710491412307405 +-2.25889212899558300407 +-2.25885713140783650488 +-2.25882211215959527451 +-2.25878707125956523782 +-2.25875200871644654654 +-2.25871692453893713193 +-2.25868181873572693164 +-2.25864669131550499515 +-2.25861154228695193424 +-2.25857637165874614027 +-2.25854117943955889913 +-2.25850596563805927630 +-2.25847073026291145226 +-2.25843547332277205797 +-2.25840019482629550396 +-2.25836489478213042759 +-2.25832957319892191350 +-2.25829423008530838501 +-2.25825886544992560090 +-2.25822347930140221450 +-2.25818807164836465873 +-2.25815264249943270514 +-2.25811719186322168440 +-2.25808171974834470674 +-2.25804622616340555652 +-2.25801071111700668581 +-2.25797517461774521763 +-2.25793961667421294592 +-2.25790403729499677965 +-2.25786843648868051915 +-2.25783281426384085933 +-2.25779717062905138647 +-2.25776150559288080188 +-2.25772581916389158962 +-2.25769011135064401330 +-2.25765438216169167518 +-2.25761863160558373664 +-2.25758285969086625045 +-2.25754706642607771983 +-2.25751125181975398348 +-2.25747541588042555105 +-2.25743955861661849127 +-2.25740368003685398790 +-2.25736778014964922789 +-2.25733185896351384869 +-2.25729591648695748773 +-2.25725995272848001250 +-2.25722396769657951410 +-2.25718796139975008685 +-2.25715193384647916375 +-2.25711588504524884868 +-2.25707981500453991330 +-2.25704372373282557973 +-2.25700761123857462920 +-2.25697147753025229022 +-2.25693532261631757407 +-2.25689914650522682749 +-2.25686294920542884768 +-2.25682673072537198777 +-2.25679049107349438685 +-2.25675423025823507217 +-2.25671794828802241284 +-2.25668164517128655433 +-2.25664532091644831624 +-2.25660897553192496545 +-2.25657260902612932796 +-2.25653622140747023295 +-2.25649981268435118054 +-2.25646338286516945359 +-2.25642693195832144681 +-2.25639045997219422901 +-2.25635396691517353673 +-2.25631745279563977746 +-2.25628091762196802961 +-2.25624436140252759841 +-2.25620778414568645687 +-2.25617118585980414025 +-2.25613456655323751932 +-2.25609792623433857983 +-2.25606126491145575486 +-2.25602458259292948384 +-2.25598787928709842987 +-2.25595115500229548289 +-2.25591440974684953602 +-2.25587764352908415333 +-2.25584085635731845798 +-2.25580404823986624407 +-2.25576721918503819708 +-2.25573036920113834114 +-2.25569349829646847994 +-2.25565660647932242355 +-2.25561969375799220572 +-2.25558276014076408700 +-2.25554580563592033116 +-2.25550883025173654062 +-2.25547183399648698554 +-2.25543481687843705430 +-2.25539777890585124709 +-2.25536072008698740277 +-2.25532364043009980747 +-2.25528653994343653011 +-2.25524941863524208685 +-2.25521227651375610890 +-2.25517511358721467474 +-2.25513792986384720152 +-2.25510072535187910958 +-2.25506350005953137838 +-2.25502625399502187875 +-2.25498898716656004382 +-2.25495169958235486263 +-2.25491439125060821880 +-2.25487706217951711096 +-2.25483971237727542913 +-2.25480234185207129016 +-2.25476495061208837001 +-2.25472753866550545965 +-2.25469010602049824143 +-2.25465265268523484821 +-2.25461517866788208053 +-2.25457768397659963355 +-2.25454016861954409379 +-2.25450263260486538641 +-2.25446507594071121616 +-2.25442749863522395870 +-2.25438990069653932835 +-2.25435228213279170717 +-2.25431464295210659543 +-2.25427698316261126976 +-2.25423930277242101639 +-2.25420160178965156561 +-2.25416388022241243050 +-2.25412613807880735095 +-2.25408837536693829051 +-2.25405059209489921912 +-2.25401278827078099809 +-2.25397496390267182420 +-2.25393711899865190063 +-2.25389925356679832191 +-2.25386136761518374172 +-2.25382346115187592872 +-2.25378553418493821070 +-2.25374758672242947455 +-2.25370961877240194582 +-2.25367163034290607371 +-2.25363362144198786652 +-2.25359559207768489486 +-2.25355754225803428525 +-2.25351947199106650288 +-2.25348138128480623976 +-2.25344327014727685565 +-2.25340513858649504897 +-2.25336698661047263315 +-2.25332881422721786890 +-2.25329062144473279972 +-2.25325240827101724861 +-2.25321417471406393318 +-2.25317592078186246241 +-2.25313764648239711619 +-2.25309935182364862172 +-2.25306103681359193303 +-2.25302270146019933961 +-2.25298434577143380508 +-2.25294596975526006943 +-2.25290757341963310267 +-2.25286915677250654255 +-2.25283071982182736548 +-2.25279226257553855106 +-2.25275378504157997028 +-2.25271528722788438870 +-2.25267676914238101915 +-2.25263823079299552177 +-2.25259967218764822761 +-2.25256109333425325048 +-2.25252249424072248374 +-2.25248387491496249169 +-2.25244523536487362136 +-2.25240657559835577572 +-2.25236789562329908776 +-2.25232919544759280228 +-2.25229047507911905868 +-2.25225173452575866406 +-2.25221297379538398786 +-2.25217419289586473496 +-2.25213539183506705754 +-2.25209657062085089052 +-2.25205772926107217202 +-2.25201886776358106701 +-2.25197998613622640818 +-2.25194108438684814644 +-2.25190216252328623270 +-2.25186322055337084791 +-2.25182425848493217302 +-2.25178527632579328355 +-2.25174627408377414639 +-2.25170725176668806711 +-2.25166820938234524263 +-2.25162914693855320536 +-2.25159006444311016182 +-2.25155096190381431853 +-2.25151183932845677660 +-2.25147269672482375213 +-2.25143353410069968490 +-2.25139435146386102105 +-2.25135514882208154219 +-2.25131592618313058907 +-2.25127668355477217332 +-2.25123742094476675391 +-2.25119813836086768433 +-2.25115883581082698583 +-2.25111951330239135061 +-2.25108017084330036539 +-2.25104080844129272876 +-2.25100142610409958976 +-2.25096202383944943293 +-2.25092260165506452552 +-2.25088315955866447027 +-2.25084369755796309676 +-2.25080421566067068184 +-2.25076471387449039696 +-2.25072519220712408128 +-2.25068565066626735671 +-2.25064608925961184838 +-2.25060650799484340823 +-2.25056690687964477959 +-2.25052728592169382082 +-2.25048764512866350529 +-2.25044798450822236546 +-2.25040830406803449293 +-2.25036860381575909429 +-2.25032888375905137934 +-2.25028914390556167291 +-2.25024938426293541482 +-2.25020960483881449221 +-2.25016980564083546312 +-2.25012998667662955654 +-2.25009014795382533691 +-2.25005028948004648370 +-2.25001041126291045913 +-2.24997051331003161678 +-2.24993059562901942527 +-2.24989065822747935641 +-2.24985070111301155293 +-2.24981072429321082851 +-2.24977072777567066453 +-2.24973071156797566061 +-2.24969067567770864002 +-2.24965062011244842921 +-2.24961054487976763738 +-2.24957044998723398876 +-2.24953033544241343122 +-2.24949020125286436311 +-2.24945004742614207416 +-2.24940987396979652502 +-2.24936968089137456772 +-2.24932946819841772523 +-2.24928923589846263553 +-2.24924898399904238389 +-2.24920871250768383831 +-2.24916842143191075820 +-2.24912811077924157388 +-2.24908778055719205113 +-2.24904743077327129441 +-2.24900706143498441136 +-2.24896667254983251283 +-2.24892626412531138058 +-2.24888583616891279959 +-2.24884538868812544621 +-2.24880492169043000317 +-2.24876443518330582094 +-2.24872392917422603276 +-2.24868340367066110730 +-2.24864285868007440783 +-2.24860229420992618898 +-2.24856171026767359677 +-2.24852110686076578361 +-2.24848048399665012553 +-2.24843984168276822544 +-2.24839917992655990986 +-2.24835849873545612354 +-2.24831779811688603488 +-2.24827707807827437136 +-2.24823633862704008735 +-2.24819557977059858445 +-2.24815480151635904704 +-2.24811400387172977133 +-2.24807318684411194809 +-2.24803235044090232719 +-2.24799149466949277354 +-2.24795061953727159931 +-2.24790972505162400807 +-2.24786881121992720978 +-2.24782787804955619393 +-2.24778692554788328550 +-2.24774595372227015133 +-2.24770496258008156687 +-2.24766395212867298170 +-2.24762292237539540452 +-2.24758187332759762356 +-2.24754080499262309800 +-2.24749971737781040204 +-2.24745861049049233671 +-2.24741748433800081486 +-2.24737633892765886756 +-2.24733517426678952589 +-2.24729399036270782730 +-2.24725278722272570064 +-2.24721156485414930160 +-2.24717032326428434175 +-2.24712906246042631864 +-2.24708778244987028572 +-2.24704648323990552328 +-2.24700516483781775889 +-2.24696382725088605881 +-2.24692247048638726881 +-2.24688109455159246153 +-2.24683969945376871280 +-2.24679828520017910165 +-2.24675685179808048986 +-2.24671539925472796284 +-2.24667392757736950060 +-2.24663243677324953040 +-2.24659092684960937092 +-2.24654939781368501173 +-2.24650784967270578107 +-2.24646628243390056312 +-2.24642469610448980433 +-2.24638309069169306298 +-2.24634146620272234784 +-2.24629982264478700316 +-2.24625816002509237634 +-2.24621647835083670941 +-2.24617477762921780027 +-2.24613305786742412096 +-2.24609131907264414352 +-2.24604956125206056683 +-2.24600778441284898435 +-2.24596598856218365725 +-2.24592417370723307357 +-2.24588233985516305680 +-2.24584048701313143681 +-2.24579861518829515532 +-2.24575672438780449269 +-2.24571481461880528840 +-2.24567288588844116148 +-2.24563093820384818144 +-2.24558897157216152962 +-2.24554698600050750557 +-2.24550498149601196474 +-2.24546295806579454535 +-2.24542091571697044472 +-2.24537885445664997519 +-2.24533677429194034048 +-2.24529467522994385931 +-2.24525255727775752135 +-2.24521042044247431946 +-2.24516826473118324969 +-2.24512609015096931131 +-2.24508389670891128631 +-2.24504168441208529217 +-2.24499945326756167319 +-2.24495720328240810915 +-2.24491493446368428621 +-2.24487264681845077874 +-2.24483034035375883519 +-2.24478801507665881587 +-2.24474567099419308747 +-2.24470330811340312849 +-2.24466092644132375611 +-2.24461852598498623479 +-2.24457610675141738810 +-2.24453366874763871053 +-2.24449121198066903204 +-2.24444873645752052127 +-2.24440624218520357047 +-2.24436372917072057831 +-2.24432119742107349936 +-2.24427864694325718276 +-2.24423607774426292494 +-2.24419348983107802553 +-2.24415088321068401100 +-2.24410825789005885511 +-2.24406561387617742298 +-2.24402295117600791841 +-2.24398026979651410429 +-2.24393756974465752307 +-2.24389485102739438815 +-2.24385211365167513975 +-2.24380935762444666537 +-2.24376658295265141163 +-2.24372378964322960471 +-2.24368097770311303307 +-2.24363814713923215294 +-2.24359529795851120326 +-2.24355243016787087029 +-2.24350954377422784347 +-2.24346663878449348317 +-2.24342371520557470888 +-2.24338077304437577553 +-2.24333781230779338856 +-2.24329483300272292112 +-2.24325183513605397323 +-2.24320881871467125990 +-2.24316578374545683161 +-2.24312273023528563343 +-2.24307965819103083405 +-2.24303656761956071719 +-2.24299345852773646115 +-2.24295033092241924422 +-2.24290718481046180699 +-2.24286402019871555780 +-2.24282083709402568772 +-2.24277763550323339103 +-2.24273441543317630931 +-2.24269117689068542276 +-2.24264791988259037936 +-2.24260464441571460981 +-2.24256135049687710392 +-2.24251803813289285472 +-2.24247470733057285841 +-2.24243135809672278214 +-2.24238799043814429623 +-2.24234460436163596242 +-2.24230119987398923698 +-2.24225777698199468801 +-2.24221433569243400186 +-2.24217087601208930892 +-2.24212739794773474600 +-2.24208390150614222947 +-2.24204038669407834661 +-2.24199685351830435565 +-2.24195330198558018253 +-2.24190973210265731552 +-2.24186614387628679879 +-2.24182253731321168289 +-2.24177891242017457429 +-2.24173526920390919770 +-2.24169160767114927779 +-2.24164792782862054565 +-2.24160422968304651192 +-2.24156051324114669043 +-2.24151677850963348959 +-2.24147302549521842963 +-2.24142925420460548125 +-2.24138546464449728290 +-2.24134165682158847943 +-2.24129783074257415976 +-2.24125398641414008694 +-2.24121012384297069175 +-2.24116624303574596411 +-2.24112234399913923255 +-2.24107842673982116111 +-2.24103449126445974926 +-2.24099053757971455880 +-2.24094656569224381926 +-2.24090257560870176334 +-2.24085856733573640653 +-2.24081454087999087932 +-2.24077049624810609174 +-2.24072643344671806886 +-2.24068235248245795077 +-2.24063825336195243665 +-2.24059413609182378480 +-2.24055000067869070079 +-2.24050584712916656116 +-2.24046167544986163378 +-2.24041748564737996929 +-2.24037327772832384198 +-2.24032905169928886480 +-2.24028480756686532160 +-2.24024054533764438446 +-2.24019626501820834363 +-2.24015196661513460441 +-2.24010765013499968390 +-2.24006331558437299378 +-2.24001896296982083712 +-2.23997459229790552016 +-2.23993020357518357599 +-2.23988579680820754092 +-2.23984137200352684260 +-2.23979692916768602373 +-2.23975246830722429792 +-2.23970798942867732606 +-2.23966349253857721635 +-2.23961897764345030382 +-2.23957444474981937077 +-2.23952989386420231455 +-2.23948532499311392385 +-2.23944073814306277015 +-2.23939613332055431627 +-2.23935151053209002825 +-2.23930686978416648714 +-2.23926221108327538900 +-2.23921753443590576538 +-2.23917283984853998646 +-2.23912812732765731383 +-2.23908339687973345633 +-2.23903864851123923785 +-2.23899388222864015319 +-2.23894909803839814444 +-2.23890429594697160098 +-2.23885947596081358313 +-2.23881463808637226620 +-2.23876978233009404917 +-2.23872490869841733740 +-2.23868001719777920400 +-2.23863510783461183706 +-2.23859018061534165156 +-2.23854523554639150973 +-2.23850027263418072110 +-2.23845529188512415431 +-2.23841029330563090483 +-2.23836527690210829178 +-2.23832024268095652886 +-2.23827519064857272113 +-2.23823012081135042095 +-2.23818503317567740751 +-2.23813992774793923957 +-2.23809480453451392634 +-2.23804966354177903298 +-2.23800450477610413103 +-2.23795932824385657156 +-2.23791413395140059706 +-2.23786892190509245637 +-2.23782369211128839837 +-2.23777844457633534603 +-2.23773317930658199870 +-2.23768789630836772986 +-2.23764259558802880434 +-2.23759727715189793429 +-2.23755194100630472320 +-2.23750658715757300143 +-2.23746121561202038208 +-2.23741582637596447825 +-2.23737041945571579760 +-2.23732499485757951874 +-2.23727955258785948800 +-2.23723409265285377856 +-2.23718861505885691088 +-2.23714311981215629999 +-2.23709760691903936092 +-2.23705207638578640328 +-2.23700652821867329578 +-2.23696096242397324261 +-2.23691537900795411886 +-2.23686977797688069103 +-2.23682415933701062016 +-2.23677852309460023505 +-2.23673286925590053542 +-2.23668719782715808009 +-2.23664150881461409881 +-2.23659580222450848908 +-2.23655007806307448703 +-2.23650433633654088794 +-2.23645857705113337843 +-2.23641280021307320425 +-2.23636700582857672615 +-2.23632119390385586399 +-2.23627536444511987312 +-2.23622951745857179162 +-2.23618365295041154894 +-2.23613777092683285730 +-2.23609187139402898481 +-2.23604595435818520599 +-2.23600001982548501900 +-2.23595406780210437248 +-2.23590809829422143551 +-2.23586211130800105451 +-2.23581610684961074043 +-2.23577008492521223104 +-2.23572404554096149099 +-2.23567798870301004399 +-2.23563191441750808153 +-2.23558582269059913372 +-2.23553971352842140163 +-2.23549358693711264223 +-2.23544744292280217479 +-2.23540128149161843041 +-2.23535510264968229066 +-2.23530890640311374895 +-2.23526269275802524916 +-2.23521646172052923518 +-2.23517021329672926910 +-2.23512394749272669259 +-2.23507766431461973866 +-2.23503136376850042311 +-2.23498504586045809717 +-2.23493871059657545075 +-2.23489235798293472968 +-2.23484598802560929798 +-2.23479960073067340787 +-2.23475319610419198568 +-2.23470677415222951367 +-2.23466033488084381275 +-2.23461387829609048339 +-2.23456740440401890879 +-2.23452091321067536356 +-2.23447440472210212548 +-2.23442787894433569917 +-2.23438133588341036884 +-2.23433477554535420140 +-2.23428819793619304335 +-2.23424160306194696801 +-2.23419499092863205192 +-2.23414836154226081888 +-2.23410171490884001955 +-2.23405505103437507231 +-2.23400836992486340193 +-2.23396167158630110094 +-2.23391495602467893278 +-2.23386822324598277589 +-2.23382147325619673239 +-2.23377470606129646669 +-2.23372792166725764318 +-2.23368112008005015312 +-2.23363430130563855869 +-2.23358746534998253708 +-2.23354061221904220957 +-2.23349374191876837159 +-2.23344685445510959809 +-2.23339994983401135542 +-2.23335302806141111631 +-2.23330608914324768577 +-2.23325913308545009883 +-2.23321215989394694645 +-2.23316516957466149051 +-2.23311816213351166382 +-2.23307113757641362284 +-2.23302409590927553040 +-2.23297703713800554937 +-2.23292996126850473715 +-2.23288286830667148664 +-2.23283575825839841755 +-2.23278863112957592918 +-2.23274148692608731537 +-2.23269432565381542588 +-2.23264714731863511687 +-2.23259995192641991224 +-2.23255273948303845089 +-2.23250550999435359856 +-2.23245826346622511238 +-2.23241099990450964086 +-2.23236371931505761523 +-2.23231642170371680223 +-2.23226910707632830722 +-2.23222177543873412375 +-2.23217442679676603134 +-2.23212706115625447723 +-2.23207967852302591183 +-2.23203227890290278879 +-2.23198486230170267675 +-2.23193742872523870346 +-2.23188997817931955581 +-2.23184251066975081201 +-2.23179502620233272125 +-2.23174752478286242408 +-2.23170000641713217604 +-2.23165247111093068000 +-2.23160491887004086564 +-2.23155734970024299813 +-2.23150976360731334580 +-2.23146216059702240386 +-2.23141454067513755888 +-2.23136690384742220061 +-2.23131925011963438976 +-2.23127157949752863431 +-2.23122389198685677769 +-2.23117618759336355794 +-2.23112846632279104853 +-2.23108072818087821432 +-2.23103297317335780292 +-2.23098520130595812105 +-2.23093741258440703135 +-2.23088960701442395873 +-2.23084178460172521952 +-2.23079394535202446548 +-2.23074608927103001932 +-2.23069821636444576285 +-2.23065032663797246926 +-2.23060242009730469448 +-2.23055449674813521810 +-2.23050655659615193471 +-2.23045859964703785394 +-2.23041062590647154451 +-2.23036263538012846652 +-2.23031462807367875101 +-2.23026660399279030855 +-2.23021856314312483249 +-2.23017050553033957527 +-2.23012243116009045707 +-2.23007434003802673672 +-2.23002623216979323217 +-2.22997810756103298502 +-2.22992996621738148733 +-2.22988180814447423117 +-2.22983363334793782684 +-2.22978544183339888463 +-2.22973723360647779757 +-2.22968900867278918554 +-2.22964076703794855661 +-2.22959250870756209295 +-2.22954423368723331222 +-2.22949594198256395572 +-2.22944763359914777112 +-2.22939930854257672976 +-2.22935096681843880617 +-2.22930260843231708989 +-2.22925423338978934140 +-2.22920584169643198891 +-2.22915743335781435519 +-2.22910900837950354259 +-2.22906056676706221253 +-2.22901210852604769741 +-2.22896363366201510914 +-2.22891514218051378649 +-2.22886663408708862733 +-2.22881810938728230909 +-2.22876956808663084786 +-2.22872101019066892746 +-2.22867243570492501448 +-2.22862384463492313458 +-2.22857523698618553709 +-2.22852661276422781000 +-2.22847797197456332086 +-2.22842931462269921994 +-2.22838064071414043710 +-2.22833195025438612902 +-2.22828324324893278785 +-2.22823451970327335303 +-2.22818577962289232630 +-2.22813702301327554167 +-2.22808824987990128363 +-2.22803946022824383988 +-2.22799065406377616583 +-2.22794183139196411148 +-2.22789299221826908592 +-2.22784413654815161010 +-2.22779526438706598768 +-2.22774637574046074917 +-2.22769747061378264874 +-2.22764854901247444374 +-2.22759961094197311837 +-2.22755065640771343638 +-2.22750168541512261200 +-2.22745269796962874764 +-2.22740369407665061985 +-2.22735467374160789333 +-2.22730563696991223921 +-2.22725658376697133178 +-2.22720751413819195719 +-2.22715842808897424021 +-2.22710932562471430884 +-2.22706020675080385018 +-2.22701107147263188679 +-2.22696191979558255625 +-2.22691275172503555524 +-2.22686356726636658365 +-2.22681436642494734457 +-2.22676514920614510018 +-2.22671591561532489223 +-2.22666666565784332477 +-2.22661739933905744593 +-2.22656811666431853070 +-2.22651881763897296906 +-2.22646950226836315423 +-2.22642017055782792667 +-2.22637082251270213007 +-2.22632145813831616721 +-2.22627207743999644407 +-2.22622268042306581393 +-2.22617326709284135688 +-2.22612383745463704443 +-2.22607439151376329534 +-2.22602492927552519930 +-2.22597545074522606967 +-2.22592595592816122618 +-2.22587644482962554449 +-2.22582691745490812707 +-2.22577737380929363553 +-2.22572781389806317875 +-2.22567823772649520109 +-2.22562864529986015327 +-2.22557903662342893014 +-2.22552941170246443292 +-2.22547977054222911875 +-2.22543011314797789524 +-2.22538043952496300548 +-2.22533074967843402803 +-2.22528104361363388008 +-2.22523132133580370251 +-2.22518158285017841891 +-2.22513182816198984426 +-2.22508205727646624084 +-2.22503227019883143001 +-2.22498246693430479226 +-2.22493264748810082310 +-2.22488281186543224166 +-2.22483296007150510576 +-2.22478309211152325275 +-2.22473320799068607911 +-2.22468330771418720815 +-2.22463339128721804272 +-2.22458345871496598889 +-2.22453351000261401182 +-2.22448354515533930353 +-2.22443356417831727967 +-2.22438356707671802681 +-2.22433355385570852292 +-2.22428352452044997278 +-2.22423347907610091667 +-2.22418341752781545395 +-2.22413333988074457537 +-2.22408324614003216624 +-2.22403313631082122370 +-2.22398301039824985992 +-2.22393286840745041388 +-2.22388271034355300415 +-2.22383253621168419656 +-2.22378234601696389561 +-2.22373213976451022944 +-2.22368191745943599713 +-2.22363167910685177731 +-2.22358142471185971090 +-2.22353115427956371519 +-2.22348086781505838161 +-2.22343056532343874565 +-2.22338024680979273739 +-2.22332991227920384603 +-2.22327956173675467255 +-2.22322919518752026846 +-2.22317881263657435298 +-2.22312841408898487217 +-2.22307799954981488710 +-2.22302756902412701479 +-2.22297712251697499042 +-2.22292666003341432557 +-2.22287618157848987366 +-2.22282568715724648811 +-2.22277517677472502555 +-2.22272465043596101353 +-2.22267410814598553870 +-2.22262354990982702319 +-2.22257297573250989231 +-2.22252238561905279823 +-2.22247177957447128449 +-2.22242115760377734190 +-2.22237051971197807632 +-2.22231986590407704085 +-2.22226919618507334775 +-2.22221851055996300062 +-2.22216780903373711809 +-2.22211709161138237789 +-2.22206635829788101688 +-2.22201560909821438372 +-2.22196484401735672165 +-2.22191406306027738893 +-2.22186326623194618790 +-2.22181245353732226278 +-2.22176162498136786638 +-2.22171078056903681386 +-2.22165992030527892354 +-2.22160904419504179330 +-2.22155815224326769197 +-2.22150724445489444747 +-2.22145632083485899955 +-2.22140538138808851798 +-2.22135442611951239300 +-2.22130345503405202123 +-2.22125246813662480250 +-2.22120146543214680435 +-2.22115044692552743300 +-2.22109941262167254195 +-2.22104836252548532016 +-2.22099729664186318345 +-2.22094621497570043900 +-2.22089511753188739718 +-2.22084400431531081566 +-2.22079287533085123485 +-2.22074173058338786291 +-2.22069057007779369073 +-2.22063939381893993286 +-2.22058820181169158658 +-2.22053699406091054058 +-2.22048577057145557490 +-2.22043453134817925232 +-2.22038327639593191520 +-2.22033200571955990910 +-2.22028071932390380638 +-2.22022941721380284719 +-2.22017809939408916620 +-2.22012676586959356584 +-2.22007541664514151947 +-2.22002405172555361546 +-2.21997267111564955400 +-2.21992127482024059759 +-2.21986986284413800874 +-2.21981843519214594451 +-2.21976699186906722971 +-2.21971553287969936008 +-2.21966405822883494636 +-2.21961256792126349069 +-2.21956106196177094247 +-2.21950954035513881024 +-2.21945800310614460571 +-2.21940645021956139971 +-2.21935488170015871034 +-2.21930329755270161485 +-2.21925169778195252590 +-2.21920008239266808303 +-2.21914845138960048487 +-2.21909680477750104188 +-2.21904514256111484727 +-2.21899346474518210925 +-2.21894177133444170380 +-2.21889006233362584553 +-2.21883833774746319634 +-2.21878659758068108587 +-2.21873484183799929426 +-2.21868307052413626934 +-2.21863128364380424173 +-2.21857948120171277751 +-2.21852766320256744592 +-2.21847582965106981945 +-2.21842398055191658557 +-2.21837211590980221132 +-2.21832023572941494649 +-2.21826834001544082042 +-2.21821642877255964521 +-2.21816450200545078886 +-2.21811255971878740212 +-2.21806060191723686259 +-2.21800862860546654787 +-2.21795663978813717421 +-2.21790463546990634924 +-2.21785261565542679563 +-2.21780058034934857147 +-2.21774852955631684992 +-2.21769646328097325139 +-2.21764438152795539949 +-2.21759228430189558878 +-2.21754017160742566972 +-2.21748804344916861098 +-2.21743589983174693714 +-2.21738374075977917599 +-2.21733156623787630579 +-2.21727937627065152526 +-2.21722717086270737497 +-2.21717495001864595139 +-2.21712271374306624239 +-2.21707046204056146266 +-2.21701819491571994192 +-2.21696591237312912170 +-2.21691361441736978222 +-2.21686130105302048321 +-2.21680897228465401128 +-2.21675662811683960030 +-2.21670426855414515188 +-2.21665189360113190631 +-2.21659950326235621887 +-2.21654709754237400077 +-2.21649467644573361369 +-2.21644223997698208706 +-2.21638978814066156531 +-2.21633732094130930790 +-2.21628483838345990975 +-2.21623234047164308080 +-2.21617982721038542238 +-2.21612729860420909489 +-2.21607475465763181788 +-2.21602219537516864634 +-2.21596962076132930619 +-2.21591703082061997065 +-2.21586442555754370431 +-2.21581180497659957496 +-2.21575916908227998903 +-2.21570651787907735297 +-2.21565385137147696781 +-2.21560116956396191412 +-2.21554847246101083158 +-2.21549576006709925124 +-2.21544303238669737510 +-2.21539028942427140834 +-2.21533753118428355933 +-2.21528475767119514828 +-2.21523196888945905769 +-2.21517916484352817008 +-2.21512634553784737435 +-2.21507351097686067121 +-2.21502066116500762050 +-2.21496779610672289706 +-2.21491491580643806714 +-2.21486202026858070013 +-2.21480910949757348050 +-2.21475618349783642813 +-2.21470324227378467796 +-2.21465028582983025629 +-2.21459731417038030443 +-2.21454432729983885508 +-2.21449132522260550004 +-2.21443830794307627841 +-2.21438527546564190018 +-2.21433222779469218722 +-2.21427916493460941183 +-2.21422608688977451408 +-2.21417299366456399312 +-2.21411988526334857497 +-2.21406676169049898562 +-2.21401362295037662520 +-2.21396046904734378202 +-2.21390729998575652715 +-2.21385411576996782301 +-2.21380091640432574707 +-2.21374770189317437996 +-2.21369447224085558190 +-2.21364122745170632811 +-2.21358796753005782065 +-2.21353469248024037341 +-2.21348140230657985938 +-2.21342809701339549022 +-2.21337477660500647758 +-2.21332144108572448360 +-2.21326809045985939406 +-2.21321472473171709794 +-2.21316134390559904332 +-2.21310794798580179332 +-2.21305453697662102286 +-2.21300111088234441326 +-2.21294766970726008992 +-2.21289421345564774057 +-2.21284074213178705293 +-2.21278725573995105336 +-2.21273375428441143598 +-2.21268023776943367764 +-2.21262670619927881432 +-2.21257315957820788199 +-2.21251959791047347892 +-2.21246602120032775929 +-2.21241242945201621595 +-2.21235882266978300947 +-2.21230520085786608320 +-2.21225156402050027182 +-2.21219791216191818961 +-2.21214424528634578948 +-2.21209056339800680391 +-2.21203686650112141265 +-2.21198315459990357823 +-2.21192942769856681906 +-2.21187568580131665996 +-2.21182192891235951393 +-2.21176815703589424444 +-2.21171437017611527409 +-2.21166056833721835773 +-2.21160675152338814797 +-2.21155291973881062972 +-2.21149907298766690289 +-2.21144521127413273831 +-2.21139133460238124229 +-2.21133744297657974798 +-2.21128353640089603260 +-2.21122961487948943571 +-2.21117567841651663230 +-2.21112172701613163284 +-2.21106776068248356282 +-2.21101377941971843910 +-2.21095978323197694948 +-2.21090577212339800539 +-2.21085174609811430102 +-2.21079770516025631011 +-2.21074364931395006550 +-2.21068957856331849143 +-2.21063549291247829487 +-2.21058139236554529461 +-2.21052727692662953629 +-2.21047314659983751284 +-2.21041900138927260855 +-2.21036484129903332274 +-2.21031066633321460202 +-2.21025647649590917254 +-2.21020227179120132277 +-2.21014805222317711753 +-2.21009381779591551620 +-2.21003956851349148138 +-2.20998530437997642295 +-2.20993102539944130669 +-2.20987673157594644024 +-2.20982242291355390762 +-2.20976809941632001966 +-2.20971376108829797857 +-2.20965940793353476934 +-2.20960503995607604466 +-2.20955065715996257225 +-2.20949625954923156712 +-2.20944184712791624747 +-2.20938741990004539062 +-2.20933297786964510934 +-2.20927852104073707551 +-2.20922404941733896422 +-2.20916956300346312148 +-2.20911506180312189329 +-2.20906054582032007616 +-2.20900601505905980204 +-2.20895146952334098245 +-2.20889690921715686756 +-2.20884233414449848709 +-2.20878774430935376216 +-2.20873313971570395253 +-2.20867852036752987388 +-2.20862388626880568054 +-2.20856923742350463868 +-2.20851457383559202086 +-2.20845989550903265553 +-2.20840520244778781844 +-2.20835049465581256811 +-2.20829577213705796623 +-2.20824103489547418633 +-2.20818628293500518467 +-2.20813151625959269708 +-2.20807673487317224215 +-2.20802193877967711799 +-2.20796712798303795822 +-2.20791230248717784690 +-2.20785746229601986812 +-2.20780260741348177689 +-2.20774773784347777550 +-2.20769285358991673718 +-2.20763795465670487062 +-2.20758304104774571996 +-2.20752811276693750031 +-2.20747316981817442993 +-2.20741821220534806258 +-2.20736323993234506702 +-2.20730825300304811520 +-2.20725325142133765866 +-2.20719823519108926391 +-2.20714320431617405660 +-2.20708815880046049784 +-2.20703309864781171967 +-2.20697802386208907777 +-2.20692293444714859874 +-2.20686783040684364465 +-2.20681271174502180443 +-2.20675757846552844654 +-2.20670243057220494265 +-2.20664726806888955579 +-2.20659209095941477585 +-2.20653689924760998409 +-2.20648169293730234131 +-2.20642647203231279107 +-2.20637123653646005650 +-2.20631598645355886390 +-2.20626072178741949870 +-2.20620544254184913768 +-2.20615014872065007268 +-2.20609484032762281913 +-2.20603951736656123117 +-2.20598417984125783065 +-2.20592882775550114260 +-2.20587346111307436303 +-2.20581807991775624700 +-2.20576268417332599370 +-2.20570727388355480869 +-2.20565184905221034484 +-2.20559640968306025499 +-2.20554095577986331023 +-2.20548548734637783753 +-2.20543000438635683480 +-2.20537450690355063543 +-2.20531899490170602007 +-2.20526346838456355215 +-2.20520792735586201871 +-2.20515237181933665411 +-2.20509680177871869589 +-2.20504121723773360841 +-2.20498561820010552381 +-2.20493000466955324512 +-2.20487437664979379903 +-2.20481873414453710680 +-2.20476307715749308969 +-2.20470740569236411943 +-2.20465171975285167960 +-2.20459601934265325696 +-2.20454030446546056510 +-2.20448457512496309718 +-2.20442883132484634956 +-2.20437307306879048951 +-2.20431730036047612842 +-2.20426151320357410768 +-2.20420571160175660097 +-2.20414989555868956472 +-2.20409406507803584674 +-2.20403822016345340984 +-2.20398236081859799640 +-2.20392648704712135199 +-2.20387059885267078130 +-2.20381469623888959219 +-2.20375877920941709576 +-2.20370284776789082670 +-2.20364690191794343477 +-2.20359094166320224062 +-2.20353496700729278857 +-2.20347897795383662611 +-2.20342297450645085988 +-2.20336695666874948785 +-2.20331092444434073485 +-2.20325487783683149345 +-2.20319881684982510350 +-2.20314274148691913169 +-2.20308665175170803607 +-2.20303054764778361019 +-2.20297442917873276258 +-2.20291829634813884908 +-2.20286214915958167282 +-2.20280598761663748419 +-2.20274981172287809272 +-2.20269362148187219930 +-2.20263741689718406391 +-2.20258119797237528203 +-2.20252496471100256414 +-2.20246871711661951210 +-2.20241245519277573095 +-2.20235617894301638486 +-2.20229988837088530573 +-2.20224358347992010820 +-2.20218726427365485421 +-2.20213093075562182932 +-2.20207458292934665778 +-2.20201822079835407564 +-2.20196184436616215763 +-2.20190545363628809028 +-2.20184904861224461925 +-2.20179262929753871703 +-2.20173619569567646792 +-2.20167974781015729491 +-2.20162328564447928869 +-2.20156680920213609909 +-2.20151031848661782320 +-2.20145381350140967314 +-2.20139729424999375240 +-2.20134076073585038813 +-2.20128421296245146976 +-2.20122765093327021901 +-2.20117107465177275216 +-2.20111448412142429731 +-2.20105787934568342123 +-2.20100126032800647025 +-2.20094462707184579386 +-2.20088797958064974480 +-2.20083131785786445533 +-2.20077464190693072865 +-2.20071795173128492706 +-2.20066124733436208061 +-2.20060452871959189025 +-2.20054779589040006016 +-2.20049104885021007405 +-2.20043428760244053066 +-2.20037751215050647602 +-2.20032072249781984752 +-2.20026391864778725349 +-2.20020710060381352591 +-2.20015026836929905585 +-2.20009342194763979350 +-2.20003656134222946861 +-2.19997968655645603775 +-2.19992279759370568115 +-2.19986589445736013815 +-2.19980897715079715127 +-2.19975204567739135442 +-2.19969510004051294061 +-2.19963814024352855014 +-2.19958116628980171470 +-2.19952417818269241323 +-2.19946717592555529563 +-2.19941015952174323544 +-2.19935312897460422121 +-2.19929608428748313287 +-2.19923902546372040945 +-2.19918195250665338136 +-2.19912486541961582631 +-2.19906776420593752519 +-2.19901064886894470618 +-2.19895351941195960066 +-2.19889637583830133138 +-2.19883921815128413613 +-2.19878204635421958812 +-2.19872486045041570790 +-2.19866766044317651918 +-2.19861044633580204888 +-2.19855321813158832711 +-2.19849597583382916355 +-2.19843871944581259470 +-2.19838144897082621299 +-2.19832416441214828495 +-2.19826686577306107395 +-2.19820955305683574110 +-2.19815222626674433570 +-2.19809488540605446616 +-2.19803753047802796772 +-2.19798016148592623153 +-2.19792277843300354334 +-2.19786538132251374478 +-2.19780797015770534841 +-2.19775054494182153775 +-2.19769310567810594037 +-2.19763565236979374617 +-2.19757818502012192141 +-2.19752070363231810646 +-2.19746320820960949760 +-2.19740569875522018251 +-2.19734817527236803159 +-2.19729063776426913890 +-2.19723308623413604579 +-2.19717552068517552044 +-2.19711794112059211059 +-2.19706034754358858763 +-2.19700273995736106158 +-2.19694511836510208980 +-2.19688748277000334141 +-2.19682983317524893607 +-2.19677216958402432567 +-2.19671449199950608033 +-2.19665680042487032608 +-2.19659909486328741579 +-2.19654137531792681415 +-2.19648364179195265677 +-2.19642589428852419431 +-2.19636813281079978921 +-2.19631035736193158669 +-2.19625256794506995561 +-2.19619476456336082393 +-2.19613694721994612280 +-2.19607911591796467476 +-2.19602127066055130555 +-2.19596341145083817636 +-2.19590553829195211932 +-2.19584765118701730202 +-2.19578975013915433934 +-2.19573183515147984934 +-2.19567390622710689740 +-2.19561596336914544025 +-2.19555800658070099374 +-2.19550003586487507690 +-2.19544205122476743242 +-2.19538405266347158573 +-2.19532604018408017410 +-2.19526801378967961753 +-2.19520997348335500376 +-2.19515191926818520329 +-2.19509385114724908661 +-2.19503576912361708651 +-2.19497767320036096805 +-2.19491956338054494680 +-2.19486143966723235010 +-2.19480330206348073219 +-2.19474515057234587090 +-2.19468698519687777093 +-2.19462880594012510471 +-2.19457061280513165968 +-2.19451240579493767058 +-2.19445418491257937532 +-2.19439595016109123549 +-2.19433770154350238357 +-2.19427943906283706710 +-2.19422116272212042176 +-2.19416287252436781330 +-2.19410456847259638380 +-2.19404625056981705811 +-2.19398791881903854062 +-2.19392957322326287439 +-2.19387121378549210249 +-2.19381284050872205071 +-2.19375445339594765670 +-2.19369605245015764083 +-2.19363763767433761487 +-2.19357920907147097012 +-2.19352076664453576882 +-2.19346231039650829686 +-2.19340384033035862288 +-2.19334535644905637142 +-2.19328685875556494977 +-2.19322834725284510071 +-2.19316982194385534655 +-2.19311128283154710417 +-2.19305272991887223455 +-2.19299416320877638142 +-2.19293558270420207990 +-2.19287698840808920053 +-2.19281838032337317301 +-2.19275975845298587430 +-2.19270112279985518455 +-2.19264247336690587531 +-2.19258381015706005357 +-2.19252513317323449726 +-2.19246644241834420797 +-2.19240773789529797000 +-2.19234901960700456769 +-2.19229028755636523584 +-2.19223154174628120927 +-2.19217278217964750553 +-2.19211400885935692173 +-2.19205522178829870228 +-2.19199642096935809477 +-2.19193760640541546181 +-2.19187877809935027784 +-2.19181993605403713232 +-2.19176108027234572972 +-2.19170221075714533043 +-2.19164332751129808941 +-2.19158443053766438524 +-2.19152551983910148792 +-2.19146659541846222652 +-2.19140765727859632150 +-2.19134870542234860835 +-2.19128973985256170209 +-2.19123076057207688550 +-2.19117176758372478318 +-2.19111276089034001657 +-2.19105374049475010167 +-2.19099470639978033404 +-2.19093565860824934788 +-2.19087659712297622150 +-2.19081752194677337187 +-2.19075843308245277186 +-2.19069933053281928892 +-2.19064021430067734642 +-2.19058108438882603863 +-2.19052194080006001897 +-2.19046278353717305265 +-2.19040361260295401991 +-2.19034442800018780417 +-2.19028522973165484800 +-2.19022601780013470574 +-2.19016679220840115860 +-2.19010755295922709962 +-2.19004830005537831639 +-2.18998903349961748788 +-2.18992975329470729307 +-2.18987045944340419368 +-2.18981115194846021055 +-2.18975183081262558815 +-2.18969249603864657416 +-2.18963314762926586354 +-2.18957378558722259854 +-2.18951440991525148050 +-2.18945502061608543443 +-2.18939561769245250034 +-2.18933620114707716553 +-2.18927677098268169686 +-2.18921732720198303213 +-2.18915786980769500047 +-2.18909839880252965472 +-2.18903891418919416267 +-2.18897941597038991901 +-2.18891990414881920657 +-2.18886037872717809094 +-2.18880083970815952910 +-2.18874128709445248120 +-2.18868172088874324288 +-2.18862214109371455706 +-2.18856254771204472576 +-2.18850294074640938646 +-2.18844332019948062396 +-2.18838368607392741438 +-2.18832403837241207256 +-2.18826437709759913375 +-2.18820470225214425142 +-2.18814501383870130269 +-2.18808531185992150014 +-2.18802559631845250365 +-2.18796586721693842037 +-2.18790612455801758429 +-2.18784636834432832941 +-2.18778659857850232839 +-2.18772681526317080980 +-2.18766701840095834086 +-2.18760720799448771245 +-2.18754738404637860683 +-2.18748754655924582124 +-2.18742769553570193253 +-2.18736783097835507661 +-2.18730795288981072488 +-2.18724806127266946376 +-2.18718815612952965921 +-2.18712823746298612448 +-2.18706830527562967603 +-2.18700835957004713350 +-2.18694840034882309610 +-2.18688842761453816621 +-2.18682844136976850535 +-2.18676844161708849867 +-2.18670842835906764634 +-2.18664840159827189581 +-2.18658836133726497408 +-2.18652830757860572319 +-2.18646824032485032063 +-2.18640815957855139118 +-2.18634806534225756280 +-2.18628795761851479895 +-2.18622783640986506626 +-2.18616770171884544638 +-2.18610755354799302097 +-2.18604739189983776626 +-2.18598721677690877030 +-2.18592702818172979207 +-2.18586682611682192601 +-2.18580661058470360203 +-2.18574638158788792097 +-2.18568613912888531914 +-2.18562588321020356830 +-2.18556561383434688750 +-2.18550533100381416673 +-2.18544503472110207554 +-2.18538472498870550709 +-2.18532440180911224914 +-2.18526406518481008945 +-2.18520371511827971034 +-2.18514335161200312641 +-2.18508297466845391455 +-2.18502258429010653984 +-2.18496218047942791785 +-2.18490176323888274368 +-2.18484133257093615654 +-2.18478088847804530204 +-2.18472043096266377304 +-2.18465996002724427427 +-2.18459947567423462544 +-2.18453897790607998175 +-2.18447846672522150158 +-2.18441794213409590242 +-2.18435740413513812541 +-2.18429685273077955898 +-2.18423628792344626248 +-2.18417570971556207482 +-2.18411511810954905854 +-2.18405451310782261487 +-2.18399389471279725683 +-2.18393326292688172430 +-2.18387261775248431306 +-2.18381195919200621347 +-2.18375128724784905998 +-2.18369060192240782570 +-2.18362990321807570737 +-2.18356919113724279313 +-2.18350846568229428613 +-2.18344772685561139269 +-2.18338697465957531918 +-2.18332620909656061059 +-2.18326543016893914739 +-2.18320463787908014552 +-2.18314383222934926820 +-2.18308301322210773776 +-2.18302218085971277972 +-2.18296133514452161961 +-2.18290047607888393344 +-2.18283960366514939722 +-2.18277871790566102561 +-2.18271781880276138921 +-2.18265690635878684134 +-2.18259598057607373534 +-2.18253504145695087502 +-2.18247408900374662011 +-2.18241312321878622171 +-2.18235214410438782551 +-2.18229115166287046534 +-2.18223014589654829010 +-2.18216912680773056366 +-2.18210809439872388538 +-2.18204704867183307826 +-2.18198598962935719214 +-2.18192491727359350051 +-2.18186383160683483595 +-2.18180273263137181061 +-2.18174162034949015165 +-2.18168049476347292170 +-2.18161935587560051886 +-2.18155820368814845622 +-2.18149703820338869420 +-2.18143585942359230501 +-2.18137466735102414361 +-2.18131346198794773272 +-2.18125224333662037779 +-2.18119101139929982835 +-2.18112976617823761671 +-2.18106850767568216654 +-2.18100723589388012513 +-2.18094595083507281075 +-2.18088465250149976526 +-2.18082334089539520150 +-2.18076201601899155591 +-2.18070067787451771224 +-2.18063932646419855743 +-2.18057796179025586980 +-2.18051658385490654268 +-2.18045519266036835759 +-2.18039378820885065835 +-2.18033237050256278877 +-2.18027093954370876361 +-2.18020949533449082125 +-2.18014803787710631511 +-2.18008656717374993406 +-2.18002508322661325835 +-2.17996358603788387143 +-2.17990207560974624812 +-2.17984055194438175462 +-2.17977901504396731625 +-2.17971746491067852602 +-2.17965590154668475975 +-2.17959432495415406095 +-2.17953273513525180860 +-2.17947113209213805263 +-2.17940951582696973432 +-2.17934788634190113044 +-2.17928624363908385320 +-2.17922458772066418575 +-2.17916291858878530263 +-2.17910123624559037836 +-2.17903954069321370568 +-2.17897783193379179778 +-2.17891610996945317424 +-2.17885437480232591057 +-2.17879262643453275317 +-2.17873086486819511620 +-2.17866909010543041703 +-2.17860730214835074392 +-2.17854550099906774108 +-2.17848368665968683544 +-2.17842185913231389804 +-2.17836001841904725040 +-2.17829816452198388177 +-2.17823629744321811685 +-2.17817441718483983948 +-2.17811252374893626893 +-2.17805061713758973951 +-2.17798869735288080918 +-2.17792676439688737133 +-2.17786481827168110215 +-2.17780285897933278960 +-2.17774088652191011306 +-2.17767890090147409055 +-2.17761690212008662826 +-2.17755489017980474742 +-2.17749286508268014018 +-2.17743082683076449868 +-2.17736877542610329783 +-2.17730671087074068026 +-2.17724463316671590363 +-2.17718254231606511695 +-2.17712043832082313699 +-2.17705832118301856326 +-2.17699619090467910709 +-2.17693404748782848301 +-2.17687189093448463240 +-2.17680972124666594070 +-2.17674753842638502022 +-2.17668534247565226281 +-2.17662313339647450761 +-2.17656091119085415286 +-2.17649867586079182047 +-2.17643642740828502369 +-2.17637416583532594672 +-2.17631189114390455330 +-2.17624960333600858675 +-2.17618730241362046129 +-2.17612498837872125890 +-2.17606266123328717654 +-2.17600032097929174668 +-2.17593796761870494905 +-2.17587560115349409884 +-2.17581322158562162628 +-2.17575082891704951749 +-2.17568842314973354135 +-2.17562600428562724630 +-2.17556357232668107216 +-2.17550112727484323827 +-2.17543866913205485858 +-2.17537619790025837929 +-2.17531371358139047345 +-2.17525121617738470547 +-2.17518870569017064298 +-2.17512618212167696541 +-2.17506364547382702312 +-2.17500109574854105787 +-2.17493853294773709095 +-2.17487595707332781458 +-2.17481336812722547691 +-2.17475076611133610882 +-2.17468815102756485302 +-2.17462552287781196725 +-2.17456288166397548878 +-2.17450022738794856991 +-2.17443756005162303069 +-2.17437487965688625025 +-2.17431218620562249910 +-2.17424947969971382733 +-2.17418676014103651184 +-2.17412402753146638545 +-2.17406128187287350784 +-2.17399852316712660638 +-2.17393575141609085577 +-2.17387296662162654570 +-2.17381016878559307770 +-2.17374735790984363604 +-2.17368453399623096089 +-2.17362169704660290748 +-2.17355884706280555463 +-2.17349598404667920803 +-2.17343310800006284111 +-2.17337021892479231866 +-2.17330731682269906457 +-2.17324440169561183822 +-2.17318147354535629034 +-2.17311853237375363079 +-2.17305557818262373715 +-2.17299261097378249019 +-2.17292963074904177390 +-2.17286663751021080770 +-2.17280363125909481425 +-2.17274061199749768392 +-2.17267757972721708981 +-2.17261453445005114915 +-2.17255147616779131781 +-2.17248840488222727529 +-2.17242532059514603660 +-2.17236222330832973171 +-2.17229911302355871427 +-2.17223598974261022931 +-2.17217285346725708095 +-2.17210970419926852060 +-2.17204654194041246740 +-2.17198336669245284369 +-2.17192017845714824276 +-2.17185697723625814604 +-2.17179376303153448546 +-2.17173053584472919297 +-2.17166729567758798325 +-2.17160404253185745915 +-2.17154077640927667403 +-2.17147749731158468123 +-2.17141420524051387275 +-2.17135090019779886106 +-2.17128758218516360046 +-2.17122425120433648615 +-2.17116090725703614339 +-2.17109755034498341786 +-2.17103418046989204981 +-2.17097079763347355907 +-2.17090740183743768910 +-2.17084399308348885427 +-2.17078057137333013671 +-2.17071713670865928947 +-2.17065368909117317742 +-2.17059022852256422453 +-2.17052675500452130208 +-2.17046326853873106089 +-2.17039976912687659905 +-2.17033625677063657378 +-2.17027273147168742184 +-2.17020919323170424775 +-2.17014564205235505057 +-2.17008207793530782936 +-2.17001850088222525414 +-2.16995491089476910673 +-2.16989130797459539579 +-2.16982769212335835363 +-2.16976406334270954801 +-2.16970042163429566173 +-2.16963676699976204532 +-2.16957309944074916430 +-2.16950941895889481970 +-2.16944572555583459206 +-2.16938201923319962106 +-2.16931829999261882591 +-2.16925456783571757313 +-2.16919082276411590016 +-2.16912706477943473260 +-2.16906329388328966701 +-2.16899951007729230312 +-2.16893571336305246433 +-2.16887190374217508904 +-2.16880808121626555973 +-2.16874424578692082122 +-2.16868039745573915056 +-2.16861653622431305166 +-2.16855266209423369617 +-2.16848877506708692664 +-2.16842487514445769747 +-2.16836096232792563399 +-2.16829703661906814105 +-2.16823309801946040309 +-2.16816914653067316365 +-2.16810518215427405764 +-2.16804120489182805542 +-2.16797721474489701876 +-2.16791321171503925669 +-2.16784919580380952553 +-2.16778516701276036116 +-2.16772112534344119084 +-2.16765707079739611274 +-2.16759300337616922505 +-2.16752892308130018506 +-2.16746482991432465326 +-2.16740072387677518151 +-2.16733660497018254532 +-2.16727247319607352338 +-2.16720832855597178579 +-2.16714417105139656172 +-2.16708000068386663628 +-2.16701581745489679776 +-2.16695162136599561720 +-2.16688741241867344201 +-2.16682319061443307007 +-2.16675895595477818745 +-2.16669470844120493069 +-2.16663044807521032453 +-2.16656617485828562053 +-2.16650188879192029390 +-2.16643758987760026713 +-2.16637327811680791001 +-2.16630895351102292778 +-2.16624461606172102890 +-2.16618026577037658953 +-2.16611590263846043314 +-2.16605152666743716594 +-2.16598713785877317051 +-2.16592273621392772398 +-2.16585832173435877124 +-2.16579389442152203671 +-2.16572945427686702757 +-2.16566500130184325101 +-2.16560053549789577332 +-2.16553605686646477579 +-2.16547156540899177202 +-2.16540706112691072605 +-2.16534254402165604603 +-2.16527801409465503468 +-2.16521347134733499473 +-2.16514891578112056436 +-2.16508434739743016451 +-2.16501976619768088383 +-2.16495517218328759057 +-2.16489056535566071204 +-2.16482594571620801105 +-2.16476131326633369767 +-2.16469666800744020563 +-2.16463200994092463958 +-2.16456733906818321600 +-2.16450265539060771047 +-2.16443795890958723405 +-2.16437324962650823323 +-2.16430852754275315775 +-2.16424379265970090458 +-2.16417904497872948255 +-2.16411428450121201550 +-2.16404951122851896272 +-2.16398472516201723082 +-2.16391992630307195000 +-2.16385511465304292145 +-2.16379029021328994631 +-2.16372545298516705259 +-2.16366060297002604784 +-2.16359574016921563100 +-2.16353086458408361281 +-2.16346597621596936634 +-2.16340107506621404099 +-2.16333616113615523346 +-2.16327123442712387913 +-2.16320629494045224561 +-2.16314134267746638329 +-2.16307637763949101029 +-2.16301139982784773608 +-2.16294640924385328518 +-2.16288140588882349391 +-2.16281638976407064590 +-2.16275136087090169568 +-2.16268631921062448598 +-2.16262126478453975409 +-2.16255619759394823731 +-2.16249111764014712023 +-2.16242602492442781426 +-2.16236091944808128673 +-2.16229580121239628454 +-2.16223067021865578141 +-2.16216552646814186289 +-2.16210036996213039728 +-2.16203520070189902924 +-2.16197001868871874208 +-2.16190482392385874277 +-2.16183961640858468556 +-2.16177439614415911606 +-2.16170916313184147128 +-2.16164391737288941187 +-2.16157865886855660165 +-2.16151338762009270766 +-2.16144810362874739695 +-2.16138280689576278704 +-2.16131749742238188361 +-2.16125217520984147512 +-2.16118684025937879412 +-2.16112149257222485588 +-2.16105613214961067570 +-2.16099075899276149570 +-2.16092537310289900532 +-2.16085997448124578213 +-2.16079456312901818649 +-2.16072913904743035829 +-2.16066370223769377290 +-2.16059825270101502070 +-2.16053279043860024800 +-2.16046731545165204835 +-2.16040182774136813038 +-2.16033632730894575857 +-2.16027081415557686839 +-2.16020528828245117481 +-2.16013974969075572830 +-2.16007419838167535886 +-2.16000863435639001153 +-2.15994305761607829908 +-2.15987746816191394927 +-2.15981186599506891355 +-2.15974625111671247879 +-2.15968062352801082326 +-2.15961498323012524025 +-2.15954933022421702304 +-2.15948366451144124767 +-2.15941798609295299016 +-2.15935229496990332976 +-2.15928659114343757253 +-2.15922087461470280090 +-2.15915514538483943596 +-2.15908940345498701063 +-2.15902364882627972875 +-2.15895788149985268234 +-2.15889210147683385799 +-2.15882630875835035411 +-2.15876050334552482823 +-2.15869468523947993788 +-2.15862885444133167923 +-2.15856301095219604846 +-2.15849715477318415680 +-2.15843128590540356271 +-2.15836540434996182469 +-2.15829951010796117217 +-2.15823360318050072593 +-2.15816768356867916268 +-2.15810175127358716551 +-2.15803580629631763799 +-2.15796984863795815457 +-2.15790387829959318111 +-2.15783789528230540711 +-2.15777189958717308116 +-2.15770589121527178733 +-2.15763987016767444516 +-2.15757383644545219781 +-2.15750779004966997121 +-2.15744173098139491174 +-2.15737565924168483988 +-2.15730957483159935251 +-2.15724347775219404966 +-2.15717736800451964641 +-2.15711124558962685782 +-2.15704511050856018173 +-2.15697896276236544821 +-2.15691280235208049376 +-2.15684662927874359895 +-2.15678044354338949162 +-2.15671424514704845876 +-2.15664803409075034324 +-2.15658181037551965886 +-2.15651557400237958717 +-2.15644932497234975699 +-2.15638306328644624443 +-2.15631678894568290517 +-2.15625050195107137441 +-2.15618420230361795831 +-2.15611789000432807484 +-2.15605156505420447743 +-2.15598522745424636682 +-2.15591887720544850282 +-2.15585251430880431300 +-2.15578613876530411630 +-2.15571975057593467895 +-2.15565334974168143489 +-2.15558693626352537720 +-2.15552051014244394622 +-2.15545407137941325004 +-2.15538761997540673221 +-2.15532115593139250720 +-2.15525467924833824540 +-2.15518818992720717631 +-2.15512168796896030898 +-2.15505517337455643201 +-2.15498864614494944902 +-2.15492210628109237547 +-2.15485555378393245363 +-2.15478898865441736987 +-2.15472241089349036969 +-2.15465582050209247811 +-2.15458921748116027928 +-2.15452260183162769280 +-2.15445597355442819421 +-2.15438933265048948584 +-2.15432267912073704963 +-2.15425601296609414703 +-2.15418933418748137498 +-2.15412264278581488952 +-2.15405593876200995851 +-2.15398922211697652074 +-2.15392249285162451500 +-2.15385575096685855101 +-2.15378899646358146214 +-2.15372222934269297312 +-2.15365544960508925598 +-2.15358865725166515048 +-2.15352185228331105549 +-2.15345503470091514941 +-2.15338820450536294615 +-2.15332136169753729504 +-2.15325450627831571637 +-2.15318763824857750677 +-2.15312075760919396927 +-2.15305386436103773917 +-2.15298695850497479043 +-2.15292004004187109700 +-2.15285310897258952423 +-2.15278616529798894064 +-2.15271920901892466205 +-2.15265224013625244837 +-2.15258525865081962181 +-2.15251826456347705729 +-2.15245125787506852433 +-2.15238423858643557196 +-2.15231720669841752880 +-2.15225016221185061482 +-2.15218310512756882957 +-2.15211603544640261987 +-2.15204895316917932391 +-2.15198185829672317126 +-2.15191475082985750333 +-2.15184763076940033244 +-2.15178049811616833864 +-2.15171335287097509337 +-2.15164619503463061534 +-2.15157902460794225874 +-2.15151184159171515731 +-2.15144464598675178024 +-2.15137743779385015586 +-2.15131021701380742428 +-2.15124298364741761702 +-2.15117573769546943652 +-2.15110847915875202929 +-2.15104120803804965689 +-2.15097392433414391633 +-2.15090662804781507234 +-2.15083931917983894877 +-2.15077199773098870494 +-2.15070466370203527973 +-2.15063731709374694745 +-2.15056995790688709747 +-2.15050258614221911913 +-2.15043520180050284907 +-2.15036780488249368304 +-2.15030039538894568452 +-2.15023297332060936427 +-2.15016553867823301260 +-2.15009809146256269941 +-2.15003063167433872138 +-2.14996315931430181934 +-2.14989567438318918136 +-2.14982817688173311055 +-2.14976066681066635411 +-2.14969314417071544199 +-2.14962560896260779231 +-2.14955806118706460595 +-2.14949050084480441924 +-2.14942292793654798899 +-2.14935534246300585792 +-2.14928774442489078922 +-2.14922013382291243744 +-2.14915251065777468398 +-2.14908487493018096615 +-2.14901722664083161263 +-2.14894956579042428757 +-2.14888189237965177014 +-2.14881420640920861587 +-2.14874650787978183075 +-2.14867879679205797672 +-2.14861107314672095114 +-2.14854333694445021052 +-2.14847558818592432317 +-2.14840782687181741650 +-2.14834005300280184159 +-2.14827226657954772904 +-2.14820446760272076858 +-2.14813665607298531768 +-2.14806883199100218107 +-2.14800099535742905488 +-2.14793314617292230295 +-2.14786528443813384825 +-2.14779741015371383739 +-2.14772952332030930833 +-2.14766162393856507862 +-2.14759371200912196898 +-2.14752578753261946787 +-2.14745785050969217878 +-2.14738990094097470518 +-2.14732193882709676558 +-2.14725396416868674621 +-2.14718597696636859240 +-2.14711797722076536132 +-2.14704996493249522516 +-2.14698194010217635608 +-2.14691390273042115311 +-2.14684585281784201527 +-2.14677779036504645660 +-2.14670971537263888251 +-2.14664162784122503069 +-2.14657352777140264521 +-2.14650541516377035833 +-2.14643729001892147323 +-2.14636915233744884901 +-2.14630100211994090387 +-2.14623283936698472374 +-2.14616466407916162140 +-2.14609647625705513008 +-2.14602827590124256574 +-2.14596006301229813573 +-2.14589183759079560332 +-2.14582359963730429087 +-2.14575534915239085620 +-2.14568708613662062490 +-2.14561881059055536980 +-2.14555052251475197878 +-2.14548222190976956014 +-2.14541390877615922861 +-2.14534558311447298706 +-2.14527724492525750932 +-2.14520889420905902512 +-2.14514053096642021146 +-2.14507215519788019265 +-2.14500376690397676072 +-2.14493536608524326681 +-2.14486695274221128571 +-2.14479852687541150402 +-2.14473008848536794702 +-2.14466163757260508405 +-2.14459317413764338767 +-2.14452469818100199817 +-2.14445620970319472676 +-2.14438770870473538466 +-2.14431919518613200992 +-2.14425066914789397288 +-2.14418213059052487068 +-2.14411357951452608006 +-2.14404501592039764546 +-2.14397643980863472635 +-2.14390785117973159402 +-2.14383925003418029931 +-2.14377063637246667582 +-2.14370201019507833351 +-2.14363337150249755325 +-2.14356472029520439548 +-2.14349605657367670020 +-2.14342738033838831058 +-2.14335869158981218163 +-2.14328999032841815975 +-2.14322127655467253859 +-2.14315255026903850322 +-2.14308381147197879457 +-2.14301506016395126863 +-2.14294629634541244911 +-2.14287752001681441882 +-2.14280873117860970467 +-2.14273992983124506040 +-2.14267111597516590749 +-2.14260228961081544696 +-2.14253345073863243897 +-2.14246459935905519956 +-2.14239573547251760388 +-2.14232685907945308301 +-2.14225797018028929486 +-2.14218906877545345324 +-2.14212015486536877518 +-2.14205122845045758950 +-2.14198228953113778417 +-2.14191333810782591485 +-2.14184437418093454042 +-2.14177539775087533158 +-2.14170640881805507405 +-2.14163740738288010945 +-2.14156839344575233852 +-2.14149936700707321791 +-2.14143032806723754291 +-2.14136127662664188520 +-2.14129221268567793146 +-2.14122313624473425975 +-2.14115404730420033630 +-2.14108494586445718966 +-2.14101583192588851290 +-2.14094670548887133776 +-2.14087756655378269599 +-2.14080841512099739887 +-2.14073925119088404045 +-2.14067007476381210296 +-2.14060088584014840407 +-2.14053168442025354423 +-2.14046247050448945615 +-2.14039324409321451981 +-2.14032400518678223023 +-2.14025475378554652650 +-2.14018548988985646275 +-2.14011621350005931674 +-2.14004692461650103397 +-2.13997762323952178676 +-2.13990830936946263563 +-2.13983898300665886794 +-2.13976964415144532694 +-2.13970029280415419137 +-2.13963092896511453134 +-2.13956155263465142014 +-2.13949216381308904289 +-2.13942276250074936428 +-2.13935334869794946400 +-2.13928392240500686583 +-2.13921448362223376449 +-2.13914503234994102243 +-2.13907556858843772574 +-2.13900609233802718734 +-2.13893660359901449652 +-2.13886710237169985760 +-2.13879758865637947807 +-2.13872806245335000952 +-2.13865852376290277448 +-2.13858897258532820729 +-2.13851940892091407775 +-2.13844983276994415888 +-2.13838024413270177959 +-2.13831064300946538381 +-2.13824102940051297139 +-2.13817140330611854537 +-2.13810176472655388835 +-2.13803211366208811839 +-2.13796245011298857719 +-2.13789277407951860965 +-2.13782308556194022842 +-2.13775338456051100522 +-2.13768367107549028816 +-2.13761394510712898764 +-2.13754420665567979043 +-2.13747445572139138648 +-2.13740469230450891303 +-2.13733491640527617506 +-2.13726512802393564527 +-2.13719532716072446732 +-2.13712551381587756438 +-2.13705568798962985966 +-2.13698584968221139135 +-2.13691599889385086541 +-2.13684613562477343507 +-2.13677625987520247719 +-2.13670637164535737185 +-2.13663647093545838729 +-2.13656655774571824224 +-2.13649663207635187590 +-2.13642669392756845426 +-2.13635674329957625517 +-2.13628678019258044785 +-2.13621680460678353697 +-2.13614681654238580677 +-2.13607681599958532104 +-2.13600680297857659085 +-2.13593677747955190682 +-2.13586673950270222733 +-2.13579668904821406983 +-2.13572662611627306362 +-2.13565655070706172936 +-2.13558646282075903500 +-2.13551636245754261623 +-2.13544624961758788828 +-2.13537612430106715777 +-2.13530598650814917860 +-2.13523583623900270467 +-2.13516567349379160490 +-2.13509549827267752775 +-2.13502531057582123353 +-2.13495511040337948572 +-2.13488489775550727146 +-2.13481467263235646925 +-2.13474443503407629308 +-2.13467418496081462465 +-2.13460392241271579294 +-2.13453364738992190652 +-2.13446335989257329757 +-2.13439305992080541330 +-2.13432274747475503318 +-2.13425242255455227536 +-2.13418208516032814615 +-2.13411173529220965506 +-2.13404137295032025889 +-2.13397099813478385855 +-2.13390061084571813765 +-2.13383021108324122395 +-2.13375979884746813653 +-2.13368937413851034179 +-2.13361893695647841795 +-2.13354848730147894642 +-2.13347802517361673225 +-2.13340755057299436004 +-2.13333706349971130578 +-2.13326656395386393683 +-2.13319605193554773237 +-2.13312552744485595113 +-2.13305499048187652278 +-2.13298444104669959742 +-2.13291387913940688748 +-2.13284330476008276989 +-2.13277271790880718072 +-2.13270211858565650331 +-2.13263150679070712101 +-2.13256088252403053218 +-2.13249024578569690291 +-2.13241959657577506704 +-2.13234893489432852931 +-2.13227826074142123858 +-2.13220757411711270279 +-2.13213687502146109765 +-2.13206616345452149019 +-2.13199543941634761524 +-2.13192470290698787849 +-2.13185395392649335022 +-2.13178319247490666299 +-2.13171241855227266981 +-2.13164163215863133871 +-2.13157083329402086136 +-2.13150002195847720898 +-2.13142919815203368827 +-2.13135836187472138548 +-2.13128751312656872230 +-2.13121665190760056774 +-2.13114577821784223488 +-2.13107489205731370774 +-2.13100399342603408215 +-2.13093308232401978941 +-2.13086215875128415220 +-2.13079122270783916093 +-2.13072027419369369738 +-2.13064931320885397881 +-2.13057833975332444609 +-2.13050735382710776378 +-2.13043635543020171141 +-2.13036534456260273629 +-2.13029432122430728569 +-2.13022328541530692192 +-2.13015223713559009866 +-2.13008117638514526959 +-2.13001010316395644750 +-2.12993901747200720109 +-2.12986791930927576999 +-2.12979680867574128200 +-2.12972568557137842404 +-2.12965454999615966258 +-2.12958340195005613182 +-2.12951224143303541325 +-2.12944106844506242382 +-2.12936988298610208048 +-2.12929868505611352703 +-2.12922747465505590725 +-2.12915625178288481223 +-2.12908501643955494487 +-2.12901376862501656717 +-2.12894250833921816479 +-2.12887123558210733520 +-2.12879995035362723499 +-2.12872865265372057664 +-2.12865734248232563175 +-2.12858601983937978375 +-2.12851468472481775152 +-2.12844333713857114532 +-2.12837197708057068724 +-2.12830060455074310255 +-2.12822921954901378427 +-2.12815782207530501680 +-2.12808641212953775224 +-2.12801498971162850182 +-2.12794355482149510905 +-2.12787210745904964426 +-2.12780064762420284552 +-2.12772917531686278636 +-2.12765769053693665214 +-2.12758619328432674322 +-2.12751468355893669226 +-2.12744316136066302647 +-2.12737162668940449350 +-2.12730007954505495604 +-2.12722851992750605632 +-2.12715694783664677203 +-2.12708536327236608088 +-2.12701376623454718739 +-2.12694215672307418430 +-2.12687053473782627933 +-2.12679890027868268021 +-2.12672725334551726561 +-2.12665559393820480238 +-2.12658392205661606056 +-2.12651223770061781337 +-2.12644054087007861042 +-2.12636883156486122814 +-2.12629710978482844297 +-2.12622537552983725817 +-2.12615362879974600929 +-2.12608186959440947916 +-2.12601009791367934199 +-2.12593831375740682788 +-2.12586651712543739379 +-2.12579470801761738485 +-2.12572288643379003759 +-2.12565105237379770031 +-2.12557920583747472776 +-2.12550734682466124781 +-2.12543547533518761838 +-2.12536359136888775012 +-2.12529169492558978050 +-2.12521978600512007063 +-2.12514786460730409345 +-2.12507593073196288103 +-2.12500398437891835357 +-2.12493202554798576998 +-2.12486005423898127731 +-2.12478807045171835810 +-2.12471607418600783035 +-2.12464406544165740343 +-2.12457204421847301035 +-2.12450001051625880777 +-2.12442796433481717600 +-2.12435590567394516626 +-2.12428383453344293841 +-2.12421175091310221461 +-2.12413965481271693747 +-2.12406754623207660870 +-2.12399542517096939775 +-2.12392329162918036545 +-2.12385114560649324034 +-2.12377898710268908644 +-2.12370681611754585916 +-2.12363463265084106979 +-2.12356243670234734466 +-2.12349022827183864237 +-2.12341800735908181608 +-2.12334577396384727166 +-2.12327352808589875366 +-2.12320126972499911844 +-2.12312899888090811373 +-2.12305671555338504319 +-2.12298441974218610184 +-2.12291211144706437608 +-2.12283979066777117595 +-2.12276745740405692331 +-2.12269511165566804323 +-2.12262275342234918440 +-2.12255038270384233101 +-2.12247799949988857904 +-2.12240560381022635994 +-2.12233319563459055246 +-2.12226077497271514716 +-2.12218834182433102598 +-2.12211589618916729449 +-2.12204343806695128194 +-2.12197096745740676482 +-2.12189848436025751965 +-2.12182598877522199388 +-2.12175348070201907902 +-2.12168096014036322572 +-2.12160842708996932870 +-2.12153588155054784181 +-2.12146332352180833070 +-2.12139075300345680830 +-2.12131816999519839939 +-2.12124557449673423193 +-2.12117296650776587796 +-2.12110034602799046866 +-2.12102771305710335881 +-2.12095506759479812686 +-2.12088240964076613082 +-2.12080973919469606415 +-2.12073705625627528804 +-2.12066436082518894324 +-2.12059165290111728552 +-2.12051893248374279111 +-2.12044619957274127486 +-2.12037345416779032803 +-2.12030069626856265685 +-2.12022792587472963532 +-2.12015514298596086107 +-2.12008234760192193491 +-2.12000953972227934585 +-2.11993671934669514201 +-2.11986388647483003922 +-2.11979104110633986835 +-2.11971818324088401297 +-2.11964531287811386306 +-2.11957243001768302904 +-2.11949953465923845997 +-2.11942662680242976947 +-2.11935370644690124209 +-2.11928077359229449783 +-2.11920782823825204488 +-2.11913487038441195054 +-2.11906190003041006165 +-2.11898891717588133687 +-2.11891592182045762627 +-2.11884291396376811534 +-2.11876989360544154550 +-2.11869686074510221729 +-2.11862381538237443124 +-2.11855075751687982333 +-2.11847768714823558867 +-2.11840460427606069871 +-2.11833150889996835176 +-2.11825840101957174610 +-2.11818528063448185961 +-2.11811214774430611740 +-2.11803900234865105645 +-2.11796584444711966100 +-2.11789267403931535938 +-2.11781949112483669495 +-2.11774629570328176698 +-2.11767308777424601018 +-2.11759986733732175068 +-2.11752663439210042640 +-2.11745338893817214299 +-2.11738013097512167704 +-2.11730686050253602559 +-2.11723357751999552434 +-2.11716028202708228534 +-2.11708697402337353566 +-2.11701365350844605828 +-2.11694032048187485984 +-2.11686697494322917379 +-2.11679361689208045405 +-2.11672024632799571364 +-2.11664686325054196558 +-2.11657346765928133792 +-2.11650005955377507050 +-2.11642663893358262683 +-2.11635320579826080589 +-2.11627976014736551846 +-2.11620630198044823445 +-2.11613283129706042374 +-2.11605934809675133579 +-2.11598585237906666734 +-2.11591234414355122695 +-2.11583882338974627046 +-2.11576529011719349782 +-2.11569174432543016806 +-2.11561818601399176387 +-2.11554461518241287976 +-2.11547103183022633388 +-2.11539743595696005940 +-2.11532382756214287767 +-2.11525020664530005732 +-2.11517657320595420245 +-2.11510292724362747308 +-2.11502926875783936467 +-2.11495559774810759635 +-2.11488191421394500225 +-2.11480821815486663695 +-2.11473450957038311415 +-2.11466078846000282709 +-2.11458705482323239266 +-2.11451330865957709548 +-2.11443954996853999972 +-2.11436577874962017276 +-2.11429199500231757014 +-2.11421819872612815061 +-2.11414438992054565247 +-2.11407056858506425812 +-2.11399673471917148859 +-2.11392288832235797358 +-2.11384902939410856959 +-2.11377515793390768906 +-2.11370127394123752396 +-2.11362737741557671356 +-2.11355346835640522940 +-2.11347954676319682576 +-2.11340561263542747739 +-2.11333166597256694175 +-2.11325770677408630860 +-2.11318373503945267089 +-2.11310975076813267748 +-2.11303575395958764815 +-2.11296174461328112315 +-2.11288772272867131363 +-2.11281368830521643076 +-2.11273964134237157708 +-2.11266558183959096695 +-2.11259150979632481793 +-2.11251742521202334757 +-2.11244332808613322072 +-2.11236921841809976996 +-2.11229509620736610742 +-2.11222096145337401296 +-2.11214681415556215782 +-2.11207265431336832506 +-2.11199848192622718912 +-2.11192429699357253625 +-2.11185009951483415591 +-2.11177588948944183755 +-2.11170166691682359428 +-2.11162743179640344238 +-2.11155318412760495406 +-2.11147892390984859290 +-2.11140465114255349022 +-2.11133036582513788915 +-2.11125606795701603602 +-2.11118175753760084490 +-2.11110743456630300940 +-2.11103309904253277907 +-2.11095875096569596252 +-2.11088439033519836840 +-2.11081001715044269673 +-2.11073563141083075934 +-2.11066123311576081534 +-2.11058682226463067977 +-2.11051239885683417086 +-2.11043796289176599501 +-2.11036351436881641774 +-2.11028905328737526048 +-2.11021457964682879194 +-2.11014009344656328082 +-2.11006559468596011087 +-2.10999108336440333034 +-2.10991655948127032616 +-2.10984202303593848526 +-2.10976747402778386231 +-2.10969291245617940334 +-2.10961833832049627802 +-2.10954375162010521194 +-2.10946915235437160163 +-2.10939454052266306405 +-2.10931991612434144301 +-2.10924527915876991457 +-2.10917062962530721393 +-2.10909596752331029990 +-2.10902129285213701948 +-2.10894660561113944652 +-2.10887190579966965487 +-2.10879719341707794200 +-2.10872246846271194087 +-2.10864773093591839626 +-2.10857298083604050021 +-2.10849821816242055661 +-2.10842344291439864890 +-2.10834865509131352823 +-2.10827385469250083716 +-2.10819904171729621822 +-2.10812421616502998489 +-2.10804937803503422700 +-2.10797452732663703756 +-2.10789966403916562143 +-2.10782478817194318665 +-2.10774989972429427354 +-2.10767499869553942560 +-2.10760008508499607771 +-2.10752515889198255294 +-2.10745022011581450982 +-2.10737526875580449826 +-2.10730030481126240360 +-2.10722532828150033168 +-2.10715033916582328288 +-2.10707533746353758985 +-2.10700032317394780890 +-2.10692529629635449950 +-2.10685025683005777708 +1.64032663064655026552 +1.63919755783538723115 +1.63806880064274751341 +1.63694035948710014949 +1.63581223478635995328 +1.63468442695788840346 +1.63355693641848720432 +1.63242976358440672335 +1.63130290887133977407 +1.63017637269442339232 +1.62905015546824016859 +1.62792425760681602753 +1.62679867952362022798 +1.62567342163156713930 +1.62454848434301624138 +1.62342386806976723967 +1.62229957322306694856 +1.62117560021360529454 +1.62005194945151442809 +1.61892862134637094407 +1.61780561630719565969 +1.61668293474245117203 +1.61556057706004385643 +1.61443854366732586492 +1.61331683497108735459 +1.61219545137756692377 +1.61107439329244339632 +1.60995366112083848620 +1.60883325526731724153 +1.60771317613588871076 +1.60659342413000283400 +1.60547399965255332965 +1.60435490310587702822 +1.60323613489175098579 +1.60211769541139759099 +1.60099958506548056825 +1.59988180425410453367 +1.59876435337681876980 +1.59764723283261433906 +1.59653044301992275145 +1.59541398433661929523 +1.59429785718002170469 +1.59318206194688749555 +1.59206659903341862794 +1.59095146883525795367 +1.58983667174748966033 +1.58872220816463993742 +1.58760807848067808656 +1.58649428308901274676 +1.58538082238249589118 +1.58426769675342127286 +1.58315490659352242631 +1.58204245229397666428 +1.58093033424539997078 +1.57981855283785299626 +1.57870710846083572854 +1.57759600150328838097 +1.57648523235359516725 +1.57537480139958030456 +1.57426470902850779154 +1.57315495562708473898 +1.57204554158145937137 +1.57093646727721880652 +1.56982773309939260820 +1.56871933943245300824 +1.56761128666030891132 +1.56650357516631366650 +1.56539620533326151453 +1.56428917754338425716 +1.56318249217835725240 +1.56207614961929719399 +1.56097015024675855877 +1.55986449444073893567 +1.55875918258067636124 +1.55765421504544776532 +1.55654959221337163555 +1.55544531446220846149 +1.55434138216915718189 +1.55323779571085784923 +1.55213455546339140767 +1.55103166180227902693 +1.54992911510248165818 +1.54882691573840247656 +1.54772506408388266230 +1.54662356051220473141 +1.54552240539609275771 +1.54442159910770904219 +1.54332114201865677749 +1.54222103449998138025 +1.54112127692216493990 +1.54002186965513288008 +1.53892281306824996179 +1.53782410753031961725 +1.53672575340958661450 +1.53562775107373794548 +1.53453010088989616477 +1.53343280322462804932 +1.53233585844393882525 +1.53123926691327327809 +1.53014302899751730713 +1.52904714506099725924 +1.52795161546747726433 +1.52685644058016389835 +1.52576162076170351867 +1.52466715637418137597 +1.52357304777912294647 +1.52247929533749526421 +1.52138589940970336833 +1.52029286035559318968 +1.51920017853445199485 +1.51810785430500372328 +1.51701588802541653678 +1.51592428005329526997 +1.51483303074568653734 +1.51374214045907562465 +1.51265160954938981952 +1.51156143837199463675 +1.51047162728169626078 +1.50938217663274110159 +1.50829308677881512857 +1.50720435807304453668 +1.50611599086799619052 +1.50502798551567584795 +1.50394034236752993650 +1.50285306177444599740 +1.50176614408674935497 +1.50067958965420711337 +1.49959339882602660232 +1.49850757195085382278 +1.49742210937677588944 +1.49633701145132103072 +1.49525227852145503604 +1.49416791093358614084 +1.49308390903356236201 +1.49200027316667038768 +1.49091700367763912993 +1.48983410091063772640 +1.48875156520927265369 +1.48766939691659372258 +1.48658759637509074736 +1.48550616392669176946 +1.48442509991276794246 +1.48334440467412775888 +1.48226407855102326749 +1.48118412188314407807 +1.48010453500962291251 +1.47902531826902960965 +1.47794647199937889681 +1.47686799653812106392 +1.47578989222215062327 +1.47471215938780120247 +1.47363479837084798696 +1.47255780950650505545 +1.47148119312942871062 +1.47040494957371481455 +1.46932907917290078714 +1.46825358225996449590 +1.46717845916732536615 +1.46610371022684149445 +1.46502933576981453356 +1.46395533612698436343 +1.46288171162853419816 +1.46180846260408725534 +1.46073558938270720020 +1.45966309229289925575 +1.45859097166260975875 +1.45751922781922638173 +1.45644786108957768889 +1.45537687179993313613 +1.45430626027600373718 +1.45323602684294206355 +1.45216617182534246666 +1.45109669554723841323 +1.45002759833210759233 +1.44895888050286836268 +1.44789054238187886448 +1.44682258429094123819 +1.44575500655129873806 +1.44468780948363373362 +1.44362099340807414904 +1.44255455864418813405 +1.44148850551098361983 +1.44042283432691364808 +1.43935754540987148609 +1.43829263907719262505 +1.43722811564565433606 +1.43616397543147744642 +1.43510021875032234284 +1.43403684591729385644 +1.43297385724693926434 +1.43191125305324562511 +1.43084903364964466377 +1.42978719934901055133 +1.42872575046365835050 +1.42766468730534734632 +1.42660401018527926986 +1.42554371941409674385 +1.42448381530188750155 +1.42342429815818105610 +1.42236516829194981071 +1.42130642601160883665 +1.42024807162501764957 +1.41919010543947599068 +1.41813252776173071013 +1.41707533889796888360 +1.41601853915382136506 +1.41496212883436323082 +1.41390610824411289137 +1.41285047768703098114 +1.41179523746652413330 +1.41074038788544053880 +1.40968592924607283301 +1.40863186185015720753 +1.40757818599887496447 +1.40652490199284896377 +1.40547201013214873022 +1.40441951071628667869 +1.40336740404421900230 +1.40231569041434678269 +1.40126437012451598996 +1.40021344347201548430 +1.39916291075358012463 +1.39811277226538899221 +1.39706302830306472451 +1.39601367916167706795 +1.39496472513573865903 +1.39391616651920702274 +1.39286800360548634892 +1.39182023668742460565 +1.39077286605731531566 +1.38972589200689733424 +1.38867931482735462723 +1.38763313480931693711 +1.38658735224285956100 +1.38554196741750401678 +1.38449698062221582262 +1.38345239214540827177 +1.38240820227493976802 +1.38136441129811382567 +1.38032101950168129001 +1.37927802717184011527 +1.37823543459423181190 +1.37719324205394699767 +1.37615144983552029068 +1.37511005822293541634 +1.37406906749962054448 +1.37302847794845339635 +1.37198828985175502737 +1.37094850349129715461 +1.36990911914829505136 +1.36887013710341398642 +1.36783155763676500527 +1.36679338102790826071 +1.36575560755584857198 +1.36471823749904119794 +1.36368127113538739614 +1.36264470874223686536 +1.36160855059638663533 +1.36057279697408373131 +1.35953744815102051113 +1.35850250440234043836 +1.35746796600263230914 +1.35643383322593558127 +1.35540010634573859782 +1.35436678563497614469 +1.35333387136603455758 +1.35230136381074683705 +1.35126926324039642324 +1.35023756992571475344 +1.34920628413688437064 +1.34817540614353492678 +1.34714493621474740159 +1.34611487461905210417 +1.34508522162442756276 +1.34405597749830407750 +1.34302714250756149994 +1.34199871691852878897 +1.34097070099698623125 +1.33994309500816366487 +1.33891589921674203367 +1.33788911388685205495 +1.33686273928207688400 +1.33583677566544811732 +1.33481122329944978944 +1.33378608244601792876 +1.33276135336653656083 +1.33173703632184481371 +1.33071313157223092283 +1.32968963937743533954 +1.32866655999664984300 +1.32764389368852020468 +1.32662164071114041519 +1.32559980132205956771 +1.32457837577827830522 +1.32355736433624882054 +1.32253676725187663266 +1.32151658478052058676 +1.32049681717699041172 +1.31947746469554982873 +1.31845852758991610720 +1.31744000611325917660 +1.31642190051820118235 +1.31540421105682048264 +1.31438693798064565321 +1.31337008154066192667 +1.31235364198730763974 +1.31133761957047334512 +1.31032201453950625236 +1.30930682714320667515 +1.30829205762982847538 +1.30727770624708239389 +1.30626377324213227560 +1.30525025886159662392 +1.30423716335155015500 +1.30322448695752246550 +1.30221222992449670031 +1.30120039249691465955 +1.30018897491867102545 +1.29917797743311691505 +1.29816740028306032428 +1.29715724371076546184 +1.29614750795794919647 +1.29513819326579038282 +1.29412929987492009154 +1.29312082802542738236 +1.29211277795685819392 +1.29110514990821623194 +1.29009794411796052671 +1.28909116082400876380 +1.28808480026373595173 +1.28707886267397419999 +1.28607334829101338514 +1.28506825735060159488 +1.28406359008794423993 +1.28305934673770627441 +1.28205552753401108568 +1.28105213271043805179 +1.28004916250002853673 +1.27904661713527989519 +1.27804449684815080168 +1.27704280187005791980 +1.27604153243187790068 +1.27504068876394516252 +1.27404027109605699764 +1.27304027965746757722 +1.27204071467689172614 +1.27104157638250558904 +1.27004286500194574216 +1.26904458076230697294 +1.26804672389014694289 +1.26704929461148307901 +1.26605229315179457217 +1.26505571973602104485 +1.26405957458856499365 +1.26306385793328734835 +1.26206856999351479942 +1.26107371099203180442 +1.26007928115108813749 +1.25908528069239289415 +1.25809170983712159675 +1.25709856880590797878 +1.25610585781885131240 +1.25511357709551196749 +1.25412172685491474233 +1.25313030731554708730 +1.25213931869536132524 +1.25114876121177043267 +1.25015863508165425699 +1.24916894052135396542 +1.24817967774667715197 +1.24719084697289472885 +1.24620244841474181463 +1.24521448228641862244 +1.24422694880159134811 +1.24323984817338861752 +1.24225318061440659356 +1.24126694633670697776 +1.24028114555181501188 +1.23929577847072303065 +1.23831084530389068377 +1.23732634626124160526 +1.23634228155216652212 +1.23535865138552347631 +1.23437545596963627048 +1.23339269551229624433 +1.23241037022076160845 +1.23142848030175833252 +1.23044702596147903506 +1.22946600740558542597 +1.22848542483920497581 +1.22750527846693513467 +1.22652556849284199991 +1.22554629512045742956 +1.22456745855278481550 +1.22358905899229575276 +1.22261109664092915139 +1.22163357170009545527 +1.22065648437067375554 +1.21967983485301134650 +1.21870362334692794448 +1.21772785005171235717 +1.21675251516612181746 +1.21577761888838709048 +1.21480316141620781067 +1.21382914294675403610 +1.21285556367666780275 +1.21188242380206290250 +1.21090972351852332878 +1.20993746302110460888 +1.20896564250433669052 +1.20799426216221772457 +1.20702332218822117049 +1.20605282277529290980 +1.20508276411584880350 +1.20411314640178046531 +1.20314396982445170892 +1.20217523457469832593 +1.20120694084283186065 +1.20023908881863672349 +1.19927167869137019096 +1.19830471064976573636 +1.19733818488202969910 +1.19637210157584261694 +1.19540646091836189058 +1.19444126309621778681 +1.19347650829551699125 +1.19251219670184083199 +1.19154832850024772206 +1.19058490387526960674 +1.18962192301091729263 +1.18865938609067578469 +1.18769729329750672875 +1.18673564481385040992 +1.18577444082162131167 +1.18481368150221300084 +1.18385336703649568513 +1.18289349760481865559 +1.18193407338700651188 +1.18097509456236382519 +1.18001656130967158553 +1.17905847380719142059 +1.17810083223266315322 +1.17714363676330457942 +1.17618688757581302262 +1.17523058484636599985 +1.17427472875061900126 +1.17331931946370859876 +1.17236435716025177989 +1.17140984201434505962 +1.17045577419956492449 +1.16950215388897094115 +1.16854898125509976126 +1.16759625646997267090 +1.16664397970509114977 +1.16569215113143864748 +1.16474077091947947338 +1.16378983923916168308 +1.16283935625991352580 +1.16188932215064810727 +1.16093973707976005905 +1.15999060121512775900 +1.15904191472411177699 +1.15809367777355709528 +1.15714589052979222039 +1.15619855315862918310 +1.15525166582536531479 +1.15430522869478147108 +1.15335924193114380820 +1.15241370569820289482 +1.15146862015919415612 +1.15052398547683898400 +1.14957980181334540326 +1.14863606933040496294 +1.14769278818919695517 +1.14674995855038752701 +1.14580758057412768203 +1.14486565442005616688 +1.14392418024729969339 +1.14298315821447005192 +1.14204258847966899637 +1.14110247120048535763 +1.14016280653399482148 +1.13922359463676259317 +1.13828483566484273126 +1.13734652977377659333 +1.13640867711859550049 +1.13547127785382095944 +1.13453433213346221997 +1.13359784011101938361 +1.13266180193948273747 +1.13172621777133142196 +1.13079108775853698354 +1.12985641205256115427 +1.12892219080435607381 +1.12798842416436451153 +1.12705511228252364120 +1.12612225530825926789 +1.12518985339049049088 +1.12425790667762925956 +1.12332641531757926323 +1.12239537945773704131 +1.12146479924499331560 +1.12053467482572921554 +1.11960500634582271751 +1.11867579395064442593 +1.11774703778505801743 +1.11681873799342246123 +1.11589089471959179711 +1.11496350810691335909 +1.11403657829823066194 +1.11311010543588340127 +1.11218408966170412278 +1.11125853111702421749 +1.11033342994266970294 +1.10940878627896277742 +1.10848460026572315229 +1.10756087204226760790 +1.10663760174740821718 +1.10571478951945656455 +1.10479243549622219156 +1.10387053981500971034 +1.10294910261262502083 +1.10202812402537175807 +1.10110760418905062608 +1.10018754323896361669 +1.09926794130991134502 +1.09834879853619260537 +1.09743011505160792396 +1.09651189098945711642 +1.09559412648253995393 +1.09467682166315727343 +1.09375997666311142176 +1.09284359161370425717 +1.09192766664574092417 +1.09101220188952696688 +1.09009719747487010544 +1.08918265353108134619 +1.08826857018697342738 +1.08735494757086104123 +1.08644178581056372046 +1.08552908503340250768 +1.08461684536620395214 +1.08370506693529655706 +1.08279374986651499846 +1.08188289428519635038 +1.08097250031618452581 +1.08006256808382583579 +1.07915309771197409638 +1.07824408932398796424 +1.07733554304273204671 +1.07642745899057579173 +1.07551983728939704044 +1.07461267806057958474 +1.07370598142501250116 +1.07279974750309436970 +1.07189397641473060929 +1.07098866827933347778 +1.07008382321582451446 +1.06917944134263254163 +1.06827552277769544098 +1.06737206763846037560 +1.06646907604188379004 +1.06556654810443030001 +1.06466448394207602313 +1.06376288367030547022 +1.06286174740411420991 +1.06196107525800864657 +1.06106086734600646437 +1.06016112378163551710 +1.05926184467793538246 +1.05836303014745714002 +1.05746468030226492552 +1.05656679525393415453 +1.05566937511355418700 +1.05477241999172499654 +1.05387592999856249953 +1.05297990524369344811 +1.05208434583626031511 +1.05118925188491885159 +1.05029462349783941910 +1.04940046078270676766 +1.04850676384672092389 +1.04761353279659608084 +1.04672076773856326248 +1.04582846877836876942 +1.04493663602127528911 +1.04404526957206167381 +1.04315436953502294060 +1.04226393601397182564 +1.04137396911223834017 +1.04048446893266977042 +1.03959543557763156585 +1.03870686914900622888 +1.03781876974819753379 +1.03693113747612386533 +1.03604397243322665645 +1.03515727471946483718 +1.03427104443431572278 +1.03338528167677945468 +1.03249998654537522569 +1.03161515913814105794 +1.03073079955263868790 +1.02984690788594934752 +1.02896348423467576261 +1.02808052869494237491 +1.02719804136239734049 +1.02631602233220875497 +1.02543447169906842831 +1.02455338955719166272 +1.02367277600031658658 +1.02279263112170459848 +1.02191295501414169955 +1.02103374776993716111 +1.02015500948092552314 +1.01927674023846592810 +1.01839894013344256507 +1.01752160925626466970 +1.01664474769686785649 +1.01576835554471323064 +1.01489243288878872029 +1.01401697981760885447 +1.01314199641921454109 +1.01226748278117417712 +1.01139343899058387066 +1.01051986513406744095 +1.00964676129777797264 +1.00877412756739515132 +1.00790196402812881615 +1.00703027076471784973 +1.00615904786142928984 +1.00528829540206188220 +1.00441801346994274979 +1.00354820214793094557 +1.00267886151841434383 +1.00180999166331363703 +1.00094159266407922715 +1.00007366460169477840 +0.99920620755667410862 +0.99833922160906529708 +0.99747270683844702077 +0.99660666332393210709 +0.99574109114416597954 +0.99487599037732776797 +0.99401136110113075262 +0.99314720339282236417 +0.99228351732918373962 +0.99142030298653172071 +0.99055756044071741062 +0.98969528976712739521 +0.98883349104068529734 +0.98797216433584977846 +0.98711130972661531580 +0.98625092728651475582 +0.98539101708861609463 +0.98453157920552569760 +0.98367261370938774423 +0.98281412067188456128 +0.98195610016423540145 +0.98109855225719988514 +0.98024147702107533586 +0.97938487452569866765 +0.97852874484044705117 +0.97767308803423802477 +0.97681790417552649686 +0.97596319333231162929 +0.97510895557213028706 +0.97425519096206347758 +0.97340189956873135468 +0.97254908145829799260 +0.97169673669646761116 +0.97084486534848879469 +0.96999346747915171640 +0.96914254315279013685 +0.96829209243328162593 +0.96744211538404800699 +0.96659261206805369149 +0.96574358254780945376 +0.96489502688536921138 +0.96404694514233335578 +0.96319933737984697597 +0.96235220365860230096 +0.96150554403883525811 +0.96065935858033091321 +0.95981364734241947367 +0.95896841038397850898 +0.95812364776343372785 +0.95727935953875853414 +0.95643554576747369378 +0.95559220650664977725 +0.95474934181290405100 +0.95390695174240547338 +0.95306503635087136406 +0.95222359569356784803 +0.95138262982531274226 +0.95054213880047400131 +0.94970212267296949538 +0.94886258149626934166 +0.94802351532339523832 +0.94718492420691902112 +0.94634680819896677129 +0.94550916735121648404 +0.94467200171489784655 +0.94383531134079490243 +0.94299909627924560773 +0.94216335658014016552 +0.94132809229292435660 +0.94049330346659865132 +0.93965899014971676628 +0.93882515239038932808 +0.93799179023628209695 +0.93715890373461585572 +0.93632649293216874131 +0.93549455787527502348 +0.93466309860982554891 +0.93383211518126907347 +0.93300160763461248425 +0.93217157601441835713 +0.93134202036481039677 +0.93051294072946832969 +0.92968433715163256714 +0.92885620967410276183 +0.92802855833923836304 +0.92720138318895706231 +0.92637468426473901228 +0.92554846160762394014 +0.92472271525821270188 +0.92389744525666861463 +0.92307265164271534719 +0.92224833445563947354 +0.92142449373428947368 +0.92060112951707673279 +0.91977824184197642943 +0.91895583074652620326 +0.91813389626782893060 +0.91731243844254928277 +0.91649145730691961020 +0.91567095289673394731 +0.91485092524735389663 +0.91403137439370518713 +0.91321230037028100490 +0.91239370321113910656 +0.91157558294990492787 +0.91075793961977025148 +0.90994077325349465024 +0.90912408388340515408 +0.90830787154139758233 +0.90749213625893399016 +0.90667687806704810871 +0.90586209699633990500 +0.90504779307698102198 +0.90423396633871155892 +0.90342061681084262492 +0.90260774452225478459 +0.90179534950140005645 +0.90098343177630191292 +0.90017199137455483626 +0.89936102832332542878 +0.89855054264935330099 +0.89774053437894907326 +0.89693100353799770641 +0.89612195015195672543 +0.89531337424585766271 +0.89450527584430628014 +0.89369765497148268008 +0.89289051165114097230 +0.89208384590661193858 +0.89127765776079981297 +0.89047194723618605661 +0.88966671435482791441 +0.88886195913835974736 +0.88805768160799181121 +0.88725388178451236598 +0.88645055968828623261 +0.88564771533925790159 +0.88484534875694886846 +0.88404345996046018730 +0.88324204896847124946 +0.88244111579924133792 +0.88164066047060896114 +0.88084068299999307428 +0.88004118340439374535 +0.87924216170039148910 +0.87844361790414704494 +0.87764555203140492967 +0.87684796409748877455 +0.87605085411730665435 +0.87525422210534931100 +0.87445806807568970953 +0.87366239204198370416 +0.87286719401747225877 +0.87207247401497933748 +0.87127823204691479120 +0.87048446812527191518 +0.86969118226162922536 +0.86889837446715234570 +0.86810604475259123269 +0.86731419312828239576 +0.86652281960415045159 +0.86573192418970545958 +0.86494150689404514232 +0.86415156772585599576 +0.86336210669341162394 +0.86257312380457429324 +0.86178461906679559856 +0.86099659248711624127 +0.86020904407216580712 +0.85942197382816531981 +0.85863538176092535359 +0.85784926787584603325 +0.85706363217792080889 +0.85627847467173368035 +0.85549379536145897518 +0.85470959425086567851 +0.85392587134331332521 +0.85314262664175610773 +0.85235986014873998950 +0.85157757186640592462 +0.85079576179648730427 +0.85001442994031362055 +0.84923357629880824593 +0.84845320087248965457 +0.84767330366147242149 +0.84689388466546666745 +0.84611494388377828102 +0.84533648131531080594 +0.84455849695856410886 +0.84378099081163515649 +0.84300396287221979197 +0.84222741313761151360 +0.84145134160470169693 +0.84067574826998114901 +0.83990063312953933128 +0.83912599617906680205 +0.83835183741385221889 +0.83757815682878633545 +0.83680495441835933690 +0.83603223017666350447 +0.83525998409739132811 +0.83448821617383894811 +0.83371692639890304655 +0.83294611476508495507 +0.83217578126448632503 +0.83140592588881445657 +0.83063654862937874590 +0.82986764947709423801 +0.82909922842247940622 +0.82833128545565859469 +0.82756382056635990896 +0.82679683374391876871 +0.82603032497727468808 +0.82526429425497582759 +0.82449874156517510837 +0.82373366689563465304 +0.82296907023372145584 +0.82220495156641282275 +0.82144131088029270771 +0.82067814816155493229 +0.81991546339600140936 +0.81915325656904469653 +0.81839152766570544273 +0.81763027667061582981 +0.81686950356801790729 +0.81610920834176503558 +0.81534939097532121988 +0.81459005145176421880 +0.81383118975378021531 +0.81307280586367169928 +0.81231489976335147229 +0.81155747143434631141 +0.81080052085779652504 +0.81004404801445772932 +0.80928805288469729540 +0.80853253544849934542 +0.80777749568546219905 +0.80702293357480048286 +0.80626884909534335399 +0.80551524222553860799 +0.80476211294344812686 +0.80400946122675209793 +0.80325728705274834773 +0.80250559039835134278 +0.80175437124009529821 +0.80100362955413240140 +0.80025336531623292302 +0.79950357850178765950 +0.79875426908580660079 +0.79800543704291948544 +0.79725708234737779900 +0.79650920497305222057 +0.79576180489343606439 +0.79501488208164383664 +0.79426843651041179051 +0.79352246815209859232 +0.79277697697868698690 +0.79203196296178113300 +0.79128742607260937891 +0.79054336628202481752 +0.78979978356050350996 +0.78905667787814759428 +0.78831404920468373110 +0.78757189750946365869 +0.78683022276146585838 +0.78608902492929499939 +0.78534830398118138373 +0.78460805988498349972 +0.78386829260818724485 +0.78312900211790537064 +0.78239018838087970309 +0.78165185136348080963 +0.78091399103170744400 +0.78017660735118832260 +0.77943970028718245757 +0.77870326980457726940 +0.77796731586789313884 +0.77723183844127918807 +0.77649683748851694443 +0.77576231297301978529 +0.77502826485783316013 +0.77429469310563392437 +0.77356159767873367006 +0.77282897853907572827 +0.77209683564823738955 +0.77136516896743079208 +0.77063397845750214454 +0.76990326407893183713 +0.76917302579183643996 +0.76844326355596748179 +0.76771397733071244929 +0.76698516707509589718 +0.76625683274777933729 +0.76552897430705979520 +0.76480159171087391812 +0.76407468491679486622 +0.76334825388203475516 +0.76262229856344476708 +0.76189681891751492859 +0.76117181490037422176 +0.76044728646779280457 +0.75972323357517956843 +0.75899965617758569092 +0.75827655422970219323 +0.75755392768586293784 +0.75683177650004207493 +0.75611010062585792824 +0.75538890001656877615 +0.75466817462507906900 +0.75394792440393454402 +0.75322814930532655531 +0.75250884928108863203 +0.75179002428270036429 +0.75107167426128595977 +0.75035379916761535402 +0.74963639895210387731 +0.74891947356481358700 +0.74820302295545249027 +0.74748704707337676467 +0.74677154586758787147 +0.74605651928673744067 +0.74534196727912416236 +0.74462788979269556311 +0.74391428677504767286 +0.74320115817342657927 +0.74248850393472765052 +0.74177632400549653457 +0.74106461833192938116 +0.74035338685987439611 +0.73964262953482895480 +0.73893234630194470913 +0.73822253710602281362 +0.73751320189151914342 +0.73680434060254174078 +0.73609595318285248045 +0.73538803957586484916 +0.73468059972464994090 +0.73397363357193035061 +0.73326714106008517025 +0.73256112213114832343 +0.73185557672680989771 +0.73115050478841514536 +0.73044590625696725894 +0.72974178107312503982 +0.72903812917720489661 +0.72833495050918151126 +0.72763224500868750599 +0.72693001261501322130 +0.72622825326710915839 +0.72552696690358386977 +0.72482615346270629075 +0.72412581288240607247 +0.72342594510027158350 +0.72272655005355368463 +0.72202762767916428555 +0.72132917791367590077 +0.72063120069332431417 +0.71993369595400813488 +0.71923666363128679890 +0.71854010366038501001 +0.71784401597619085234 +0.71714840051325534631 +0.71645325720579533524 +0.71575858598769226404 +0.71506438679249162416 +0.71437065955340595114 +0.71367740420331415852 +0.71298462067475965043 +0.71229230889995520659 +0.71160046881077954062 +0.71090910033877818819 +0.71021820341516661568 +0.70952777797082766664 +0.70883782393631300511 +0.70814834124184444786 +0.70745932981731252109 +0.70677078959227812582 +0.70608272049597331499 +0.70539512245729940609 +0.70470799540483064494 +0.70402133926681220721 +0.70333515397116208590 +0.70264943944546898180 +0.70196419561699685552 +0.70127942241268070855 +0.70059511975913058013 +0.69991128758263021492 +0.69922792580913850635 +0.69854503436428738716 +0.69786261317338660337 +0.69718066216141949543 +0.69649918125304621785 +0.69581817037260373926 +0.69513762944410584232 +0.69445755839124323483 +0.69377795713738454886 +0.69309882560557645181 +0.69242016371854420154 +0.69174197139869142426 +0.69106424856810233504 +0.69038699514853940631 +0.68971021106144636548 +0.68903389622794575242 +0.68835805056884336040 +0.68768267400462457228 +0.68700776645545780230 +0.68633332784119183145 +0.68565935808135980434 +0.68498585709517567643 +0.68431282480153898806 +0.68364026111903164473 +0.68296816596592069271 +0.68229653926015554344 +0.68162538091937363571 +0.68095469086089510657 +0.68028446900172723222 +0.67961471525856265163 +0.67894542954778192012 +0.67827661178545062270 +0.67760826188732314890 +0.67694037976884058327 +0.67627296534513325899 +0.67560601853101964753 +0.67493953924100669184 +0.67427352738929080544 +0.67360798288975920478 +0.67294290565598779974 +0.67227829560124408026 +0.67161415263848611712 +0.67095047668036367217 +0.67028726763921764320 +0.66962452542708184033 +0.66896224995568209781 +0.66830044113643760628 +0.66763909888046091279 +0.66697822309855847589 +0.66631781370123033259 +0.66565787059867231878 +0.66499839370077329370 +0.66433938291711980284 +0.66368083815699274730 +0.66302275932936993730 +0.66236514634292498194 +0.66170799910603017580 +0.66105131752675261314 +0.66039510151285962802 +0.65973935097181590770 +0.65908406581078526898 +0.65842924593662888189 +0.65777489125590971053 +0.65712100167488840530 +0.65646757709952752169 +0.65581461743548996601 +0.65516212258813877334 +0.65451009246253932794 +0.65385852696345880819 +0.65320742599536607553 +0.65255678946243345084 +0.65190661726853615932 +0.65125690931725188637 +0.65060766551186399731 +0.64995888575535887277 +0.64931056995042790714 +0.64866271799946728649 +0.64801532980457987598 +0.64736840526757277736 +0.64672194428996088167 +0.64607594677296498187 +0.64543041261751266102 +0.64478534172424040172 +0.64414073399349147664 +0.64349658932531750288 +0.64285290761947944116 +0.64220968877544737374 +0.64156693269239983834 +0.64092463926922649264 +0.64028280840452733713 +0.63964143999661249307 +0.63900053394350397884 +0.63836009014293404462 +0.63772010849234828100 +0.63708058888890439775 +0.63644153122947244583 +0.63580293541063626073 +0.63516480132869279629 +0.63452712887965256883 +0.63388991795924132244 +0.63325316846289991801 +0.63261688028578300091 +0.63198105332276199864 +0.63134568746842456566 +0.63071078261707325119 +0.63007633866272882983 +0.62944235549912863625 +0.62880883301972856358 +0.62817577111770128706 +0.62754316968593870651 +0.62691102861705094718 +0.62627934780336802500 +0.62564812713693962465 +0.62501736650953509944 +0.62438706581264402651 +0.62375722493747798314 +0.62312784377496799326 +0.62249892221576874629 +0.62187046015025537748 +0.62124245746852702066 +0.62061491406040392160 +0.61998782981543132387 +0.61936120462287680422 +0.61873503837173293718 +0.61810933095071607379 +0.61748408224826889512 +0.61685929215255741465 +0.61623496055147497508 +0.61561108733263947279 +0.61498767238339702157 +0.61436471559081939908 +0.61374221684170693347 +0.61312017602258594984 +0.61249859301971276704 +0.61187746771907025600 +0.61125680000637250266 +0.61063658976706103321 +0.61001683688630825575 +0.60939754124901568400 +0.60877870273981693483 +0.60816032124307528584 +0.60754239664288500755 +0.60692492882307391699 +0.60630791766720093516 +0.60569136305855675317 +0.60507526488016705191 +0.60445962301478850520 +0.60384443734491322076 +0.60322970775276751887 +0.60261543412031159939 +0.60200161632924076294 +0.60138825426098574400 +0.60077534779671304399 +0.60016289681732593042 +0.59955090120346332672 +0.59893936083550225469 +0.59832827559355650227 +0.59771764535747695657 +0.59710747000685415742 +0.59649774942101607689 +0.59588848347903078384 +0.59527967205970522269 +0.59467131504158576849 +0.59406341230295933720 +0.59345596372185349665 +0.59284896917603724376 +0.59224242854302033834 +0.59163634170005541257 +0.59103070852413575054 +0.59042552889199817479 +0.58982080268012293534 +0.58921652976473293251 +0.58861271002179571532 +0.58800934332702292640 +0.58740642955586974683 +0.58680396858353767175 +0.58620196028497317808 +0.58560040453486850165 +0.58499930120766285846 +0.58439865017754155652 +0.58379845131843643991 +0.58319870450402744311 +0.58259940960774236896 +0.58200056650275733272 +0.58140217506199687314 +0.58080423515813539570 +0.58020674666359539629 +0.57960970945054968162 +0.57901312339092181336 +0.57841698835638655218 +0.57782130421836819245 +0.57722607084804378186 +0.57663128811634167814 +0.57603695589394232623 +0.57544307405127947952 +0.57484964245853942266 +0.57425666098566230389 +0.57366412950234257906 +0.57307204787802767942 +0.57248041598192012103 +0.57188923368297861494 +0.57129850084991629089 +0.57070821735120236262 +0.57011838305506257196 +0.56952899782947941087 +0.56894006154219156635 +0.56835157406069658492 +0.56776353525224887431 +0.56717594498386181279 +0.56658880312230652798 +0.56600210953411456138 +0.56541586408557531485 +0.56483006664273971431 +0.56424471707141754528 +0.56365981523718078350 +0.56307536100536115242 +0.56249135424105234371 +0.56190779480910979515 +0.56132468257415191193 +0.56074201740055884535 +0.56015979915247449128 +0.55957802769380560193 +0.55899670288822367326 +0.55841582459916327963 +0.55783539268982507142 +0.55725540702317355457 +0.55667586746193986613 +0.55609677386861955384 +0.55551812610547601778 +0.55493992403453773488 +0.55436216751760158949 +0.55378485641623131919 +0.55320799059175873591 +0.55263156990528339296 +0.55205559421767469441 +0.55148006338956923056 +0.55090497728137466371 +0.55033033575326795184 +0.54975613866519634776 +0.54918238587687739916 +0.54860907724780094696 +0.54803621263722590573 +0.54746379190418514860 +0.54689181490748306480 +0.54632028150569700298 +0.54574919155717627195 +0.54517854492004513833 +0.54460834145220038405 +0.54403858101131330471 +0.54346926345483059784 +0.54290038863997358565 +0.54233195642373843715 +0.54176396666289705628 +0.54119641921399863627 +0.54062931393336788322 +0.54006265067710745864 +0.53949642930109653616 +0.53893064966099257784 +0.53836531161223044606 +0.53780041501002484594 +0.53723595970936843802 +0.53667194556503383662 +0.53610837243157316578 +0.53554524016331894742 +0.53498254861438332419 +0.53442029763866027992 +0.53385848708982519550 +0.53329711682133462691 +0.53273618668642785945 +0.53217569653812646369 +0.53161564622923429546 +0.53105603561233916121 +0.53049686453981270695 +0.52993813286381064032 +0.52937984043627261954 +0.52882198710892380777 +0.52826457273327398489 +0.52770759716061910183 +0.52715106024204105850 +0.52659496182840814793 +0.52603930177037605542 +0.52548407991838708142 +0.52492929612267047457 +0.52437495023324542931 +0.52382104209991753319 +0.52326757157228298567 +0.52271453849972593364 +0.52216194273142080284 +0.52160978411633118768 +0.52105806250321218265 +0.52050677774060860603 +0.51995592967685722030 +0.51940551816008662112 +0.51885554303821590505 +0.51830600415895755617 +0.51775690136981689093 +0.51720823451809172511 +0.51666000345087448320 +0.51611220801505042211 +0.51556484805729929644 +0.51501792342409569159 +0.51447143396170924579 +0.51392537951620520520 +0.51337975993344497905 +0.51283457505908536245 +0.51228982473858075686 +0.51174550881718128270 +0.51120162713993555492 +0.51065817955169034992 +0.51011516589708949532 +0.50957258602057564634 +0.50903043976639172907 +0.50848872697857838698 +0.50794744750097631236 +0.50740660117722757860 +0.50686618785077319771 +0.50632620736485633994 +0.50578665956252077951 +0.50524754428661211580 +0.50470886137977788444 +0.50417061068446911154 +0.50363279204293842639 +0.50309540529724194879 +0.50255845028923995521 +0.50202192686059632365 +0.50148583485277942184 +0.50095017410706299543 +0.50041494446452527978 +0.49988014576604977712 +0.49934577785232736602 +0.49881184056385363679 +0.49827833374093177810 +0.49774525722367202185 +0.49721261085199230934 +0.49668039446561806916 +0.49614860790408321645 +0.49561725100673009736 +0.49508632361271054378 +0.49455582556098548475 +0.49402575669032605665 +0.49349611683931282613 +0.49296690584633806598 +0.49243812354960392330 +0.49190976978712436241 +0.49138184439672571990 +0.49085434721604576103 +0.49032727808253451229 +0.48980063683345576031 +0.48927442330588555297 +0.48874863733671441990 +0.48822327876264720592 +0.48769834742020218288 +0.48717384314571332560 +0.48664976577532892410 +0.48612611514501358201 +0.48560289109054760592 +0.48508009344752800462 +0.48455772205136871111 +0.48403577673729930586 +0.48351425734036829196 +0.48299316369544154082 +0.48247249563720356891 +0.48195225300015714920 +0.48143243561862447688 +0.48091304332674650324 +0.48039407595848437893 +0.47987553334761950952 +0.47935741532775366647 +0.47883972173231020841 +0.47832245239453302643 +0.47780560714748776530 +0.47728918582406215654 +0.47677318825696674010 +0.47625761427873486431 +0.47574246372172290798 +0.47522773641811144607 +0.47471343219990391749 +0.47419955089892840139 +0.47368609234683872744 +0.47317305637511247740 +0.47266044281505420477 +0.47214825149779321434 +0.47163648225428517202 +0.47112513491531260446 +0.47061420931148528757 +0.47010370527323980250 +0.46959362263084142297 +0.46908396121438250548 +0.46857472085378498727 +0.46806590137879822144 +0.46755750261900264064 +0.46704952440380692602 +0.46654196656245050523 +0.46603482892400344140 +0.46552811131736560046 +0.46502181357126892713 +0.46451593551427639017 +0.46401047697478298160 +0.46350543778101671588 +0.46300081776103724218 +0.46249661674273778722 +0.46199283455384437813 +0.46148947102191756331 +0.46098652597435224587 +0.46048399923837740610 +0.45998189064105649004 +0.45948020000928929685 +0.45897892716981031347 +0.45847807194919065754 +0.45797763417383807738 +0.45747761366999645238 +0.45697801026374701427 +0.45647882378100940182 +0.45598005404753949588 +0.45548170088893263907 +0.45498376413062285861 +0.45448624359788336591 +0.45398913911582539082 +0.45349245050940173440 +0.45299617760340410433 +0.45250032022246505781 +0.45200487819105866771 +0.45150985133349991196 +0.45101523947394422942 +0.45052104243639140568 +0.45002726004468113219 +0.44953389212249761364 +0.44904093849336779165 +0.44854839898066156678 +0.44805627340759318633 +0.44756456159722118882 +0.44707326337244801540 +0.44658237855602200828 +0.44609190697053641150 +0.44560184843843020364 +0.44511220278198815326 +0.44462296982334248430 +0.44413414938447076663 +0.44364574128719880264 +0.44315774535319990557 +0.44267016140399528812 +0.44218298926095395140 +0.44169622874529390621 +0.44120987967808228403 +0.44072394188023517048 +0.44023841517251915967 +0.43975329937555107662 +0.43926859430979658949 +0.43878429979557437290 +0.43830041565305288831 +0.43781694170225282647 +0.43733387776304694095 +0.43685122365515993703 +0.43636897919816919345 +0.43588714421150548395 +0.43540571851445281082 +0.43492470192614912650 +0.43444409426558633358 +0.43396389535161133955 +0.43348410500292539060 +0.43300472303808479335 +0.43252574927550219153 +0.43204718353344589987 +0.43156902563004090334 +0.43109127538326813545 +0.43061393261096653218 +0.43013699713083125564 +0.42966046876041641411 +0.42918434731713384078 +0.42870863261825409296 +0.42823332448090634106 +0.42775842272207925676 +0.42728392715862056894 +0.42680983760723878451 +0.42633615388450230022 +0.42586287580684017984 +0.42539000319054320887 +0.42491753585176261776 +0.42444547360651141421 +0.42397381627066571541 +0.42350256365996369334 +0.42303171559000629642 +0.42256127187625763808 +0.42209123233404571840 +0.42162159677856153595 +0.42115236502486197434 +0.42068353688786730427 +0.42021511218236323737 +0.41974709072300137036 +0.41927947232429807478 +0.41881225680063627337 +0.41834544396626555107 +0.41787903363530193301 +0.41741302562172910573 +0.41694741973939813962 +0.41648221580202771097 +0.41601741362320426854 +0.41555301301638425393 +0.41508901379489193673 +0.41462541577192141284 +0.41416221876053638251 +0.41369942257367048333 +0.41323702702412734578 +0.41277503192458186998 +0.41231343708758028122 +0.41185224232553985235 +0.41139144745074929244 +0.41093105227537035651 +0.41047105661143640232 +0.41001146027085422219 +0.40955226306540387649 +0.40909346480673902668 +0.40863506530638643577 +0.40817706437574868827 +0.40771946182610097065 +0.40726225746859529009 +0.40680545111425825411 +0.40634904257399251382 +0.40589303165857570921 +0.40543741817866318922 +0.40498220194478634637 +0.40452738276735361600 +0.40407296045665158646 +0.40361893482284411094 +0.40316530567597275159 +0.40271207282595877786 +0.40225923608260116815 +0.40180679525557855269 +0.40135475015444926905 +0.40090310058865186171 +0.40045184636750408291 +0.40000098730020516857 +0.39955052319583528320 +0.39910045386335535333 +0.39865077911160828883 +0.39820149874931959344 +0.39775261258509553297 +0.39730412042742713208 +0.39685602208468673258 +0.39640831736513099104 +0.39596100607689993511 +0.39551408802801818476 +0.39506756302639400857 +0.39462143087982148870 +0.39417569139597841144 +0.39373034438242943134 +0.39328538964662429489 +0.39284082699589911725 +0.39239665623747643775 +0.39195287717846610809 +0.39150948962586418212 +0.39106649338655508075 +0.39062388826731098135 +0.39018167407479265041 +0.38973985061554844433 +0.38929841769601636337 +0.38885737512252344095 +0.38841672270128685396 +0.38797646023841281249 +0.38753658753989905783 +0.38709710441163258654 +0.38665801065939209291 +0.38621930608884785796 +0.38578099050556130534 +0.38534306371498600052 +0.38490552552246865003 +0.38446837573324726955 +0.38403161415245351540 +0.38359524058511285105 +0.38315925483614421410 +0.38272365671036029378 +0.38228844601246880774 +0.38185362254707133634 +0.38141918611866515443 +0.38098513653164273185 +0.38055147359029239951 +0.38011819709879857143 +0.37968530686124257745 +0.37925280268160160846 +0.37882068436375004872 +0.37838895171146058605 +0.37795760452840299060 +0.37752664261814528057 +0.37709606578415449940 +0.37666587382979571652 +0.37623606655833313761 +0.37580664377293088174 +0.37537760527665303689 +0.37494895087246304932 +0.37452068036322605504 +0.37409279355170677039 +0.37366529024057160147 +0.37323817023238836654 +0.37281143332962696224 +0.37238507933465930799 +0.37195910804975962360 +0.37153351927710509539 +0.37110831281877615373 +0.37068348847675647306 +0.37025904605293380456 +0.36983498534910014266 +0.36941130616695155853 +0.36898800830808947682 +0.36856509157401984300 +0.36814255576615451115 +0.36772040068581146599 +0.36729862613421437878 +0.36687723191249310695 +0.36645621782168552594 +0.36603558366273486469 +0.36561532923649370241 +0.36519545434372102655 +0.36477595878508500826 +0.36435684236116122614 +0.36393810487243510865 +0.36351974611930026882 +0.36310176590206039160 +0.36268416402092917838 +0.36226694027602962533 +0.36185009446739563321 +0.36143362639497222943 +0.36101753585861451334 +0.36060182265809020974 +0.36018648659307794802 +0.35977152746316931609 +0.35935694506786680646 +0.35894273920658753552 +0.35852890967866007932 +0.35811545628332719371 +0.35770237881974525918 +0.35728967708698500250 +0.35687735088403077510 +0.35646540000978221840 +0.35605382426305348664 +0.35564262344257457915 +0.35523179734699084076 +0.35482134577486418303 +0.35441126852467164099 +0.35400156539480859275 +0.35359223618358531782 +0.35318328068923110497 +0.35277469870989208722 +0.35236649004363240767 +0.35195865448843471901 +0.35155119184219985051 +0.35114410190274775170 +0.35073738446781727029 +0.35033103933506737349 +0.34992506630207687035 +0.34951946516634402329 +0.34911423572528826886 +0.34870937777624938514 +0.34830489111648865741 +0.34790077554318893371 +0.34749703085345440279 +0.34709365684431187082 +0.34669065331270970676 +0.34628802005552000720 +0.34588575686953743071 +0.34548386355148008597 +0.34508233989798992036 +0.34468118570563255343 +0.34428040077089799853 +0.34387998489020116244 +0.34347993785988195636 +0.34308025947620512941 +0.34268094953536160086 +0.34228200783346757197 +0.34188343416656608031 +0.34148522833062600057 +0.34108739012154387638 +0.34068991933514286563 +0.34029281576717407276 +0.33989607921331582707 +0.33949970946917479298 +0.33910370633028619203 +0.33870806959211391396 +0.33831279905005084974 +0.33791789449941989076 +0.33752335573547254111 +0.33712918255339058282 +0.33673537474828674210 +0.33634193211520374556 +0.33594885444911565253 +0.33555614154492752199 +0.33516379319747624521 +0.33477180920152971311 +0.33438018935178909219 +0.33398893344288738128 +0.33359804126939091029 +0.33320751262579867413 +0.33281734730654310983 +0.33242754510599031859 +0.33203810581844067640 +0.33164902923812888957 +0.33126031515922416126 +0.33087196337583091310 +0.33048397368198889623 +0.33009634587167235864 +0.32970907973879276520 +0.32932217507719757643 +0.32893563168067019298 +0.32854944934293078829 +0.32816362785763764087 +0.32777816701838508040 +0.32739306661870631876 +0.32700832645207184024 +0.32662394631189095584 +0.32623992599151124816 +0.32585626528421962611 +0.32547296398324165878 +0.32509002188174312975 +0.32470743877282937095 +0.32432521444954587331 +0.32394334870487867528 +0.32356184133175436290 +0.32318069212304084692 +0.32279990087154736278 +0.32241946737002480372 +0.32203939141116610934 +0.32165967278760609904 +0.32128031129192285986 +0.32090130671663691375 +0.32052265885421193925 +0.32014436749705504903 +0.31976643243751790013 +0.31938885346789519515 +0.31901163038042662512 +0.31863476296729614790 +0.31825825102063309835 +0.31788209433251218838 +0.31750629269495350693 +0.31713084589992279749 +0.31675575373933240186 +0.31638101600504042743 +0.31600663248885302314 +0.31563260298252210356 +0.31525892727774784685 +0.31488560516617747354 +0.31451263643940685633 +0.31414002088897929887 +0.31376775830638709008 +0.31339584848307172615 +0.31302429121042346649 +0.31265308627978172229 +0.31228223348243633328 +0.31191173260962640201 +0.31154158345254184814 +0.31117178580232335294 +0.31080233945006241481 +0.31043324418680123822 +0.31006449980353378848 +0.30969610609120618028 +0.30932806284071640013 +0.30896036984291486149 +0.30859302688860440478 +0.30822603376854118551 +0.30785939027343411922 +0.30749309619394604720 +0.30712715132069362545 +0.30676155544424765775 +0.30639630835513359530 +0.30603140984383075951 +0.30566685970077417389 +0.30530265771635417549 +0.30493880368091613731 +0.30457529738476191161 +0.30421213861814966339 +0.30384932717129292667 +0.30348686283436260291 +0.30312474539748712754 +0.30276297465075147075 +0.30240155038419846978 +0.30204047238782899543 +0.30167974045160189656 +0.30131935436543438867 +0.30095931391920260900 +0.30059961890274167207 +0.30024026910584605821 +0.29988126431827016871 +0.29952260432972749316 +0.29916428892989194166 +0.29880631790839834450 +0.29844869105484195249 +0.29809140815877904762 +0.29773446900972727613 +0.29737787339716598156 +0.29702162111053553861 +0.29666571193923974015 +0.29631014567264424286 +0.29595492210007712242 +0.29560004101083020567 +0.29524550219415790497 +0.29489130543927799533 +0.29453745053537294663 +0.29418393727158892448 +0.29383076543703656736 +0.29347793482079104210 +0.29312544521189276558 +0.29277329639934679406 +0.29242148817212443301 +0.29207002031916240448 +0.29171889262936395726 +0.29136810489159781223 +0.29101765689470016074 +0.29066754842747377641 +0.29031777927868845923 +0.28996834923708181275 +0.28961925809135941057 +0.28927050563019413021 +0.28892209164222765194 +0.28857401591606995916 +0.28822627824030006005 +0.28787887840346615409 +0.28753181619408602065 +0.28718509140064640839 +0.28683870381160486707 +0.28649265321538847084 +0.28614693940039526154 +0.28580156215499386008 +0.28545652126752407707 +0.28511181652629680183 +0.28476744771959516811 +0.28442341463567288873 +0.28407971706275703117 +0.28373635478904690732 +0.28339332760271412903 +0.28305063529190316318 +0.28270827764473221988 +0.28236625444929264184 +0.28202456549364962601 +0.28168321056584266771 +0.28134218945388522748 +0.28100150194576506424 +0.28066114782944584505 +0.28032112689286525775 +0.27998143892393689836 +0.27964208371054999347 +0.27930306104056984440 +0.27896437070183732754 +0.27862601248217044869 +0.27828798616936373245 +0.27795029155118866626 +0.27761292841539381149 +0.27727589654970569155 +0.27693919574182818133 +0.27660282577944372839 +0.27626678645021240932 +0.27593107754177353952 +0.27559569884174489607 +0.27526065013772382795 +0.27492593121728653438 +0.27459154186798900854 +0.27425748187736742612 +0.27392375103293786776 +0.27359034912219704072 +0.27325727593262244541 +0.27292453125167209782 +0.27259211486678558423 +0.27226002656538417224 +0.27192826613487047771 +0.27159683336262924191 +0.27126572803602783113 +0.27093494994241584806 +0.27060449886912529838 +0.27027437460347197851 +0.26994457693275480947 +0.26961510564425600345 +0.26928596052524206295 +0.26895714136296322572 +0.26862864794465390883 +0.26830048005753365237 +0.26797263748880684187 +0.26764512002566248627 +0.26731792745527566124 +0.26699105956480650992 +0.26666451614140124216 +0.26633829697219235655 +0.26601240184429891800 +0.26568683054482639117 +0.26536158286086719560 +0.26503665857950120532 +0.26471205748779519373 +0.26438777937280422137 +0.26406382402157091427 +0.26374019122112640767 +0.26341688075849029049 +0.26309389242067054981 +0.26277122599466418151 +0.26244888126745763435 +0.26212685802602675444 +0.26180515605733734041 +0.26148377514834469926 +0.26116271508599486761 +0.26084197565722389012 +0.26052155664895859655 +0.26020145784811743450 +0.25988167904160941468 +0.25956222001633560970 +0.25924308055918826588 +0.25892426045705152493 +0.25860575949680253416 +0.25828757746531044726 +0.25796971414943725698 +0.25765216933603773963 +0.25733494281196023223 +0.25701803436404629943 +0.25670144377913128864 +0.25638517084404499613 +0.25606921534561077891 +0.25575357707064660939 +0.25543825580596551950 +0.25512325133837515656 +0.25480856345467861601 +0.25449419194167427483 +0.25418013658615673522 +0.25386639717491560342 +0.25355297349473765456 +0.25323986533240533392 +0.25292707247469820020 +0.25261459470839275898 +0.25230243182026279580 +0.25199058359707893207 +0.25167904982561034588 +0.25136783029262299571 +0.25105692478488206287 +0.25074633308915045271 +0.25043605499219012689 +0.25012609028076165929 +0.24981643874162490215 +0.24950710016153862525 +0.24919807432726176488 +0.24888936102555284102 +0.24858096004317081773 +0.24827287116687410395 +0.24796509418342266295 +0.24765762887957665228 +0.24735047504209733971 +0.24704363245774746405 +0.24673710091329145722 +0.24643088019549472256 +0.24612497009112552226 +0.24581937038695381159 +0.24551408086975221035 +0.24520910132629608613 +0.24490443154336402620 +0.24460007130773678274 +0.24429602040619985415 +0.24399227862554126456 +0.24368884575255356228 +0.24338572157403334795 +0.24308290587678155203 +0.24278039844760340715 +0.24247819907330925293 +0.24217630754071450827 +0.24187472363663967134 +0.24157344714791118001 +0.24127247786136069019 +0.24097181556382638035 +0.24067146004215195232 +0.24037141108318840765 +0.24007166847379282637 +0.23977223200082944943 +0.23947310145116987301 +0.23917427661169274322 +0.23887575726928420017 +0.23857754321083846083 +0.23827963422325798559 +0.23798203009345328396 +0.23768473060834383048 +0.23738773555485709332 +0.23709104471992994978 +0.23679465789050860303 +0.23649857485354866538 +0.23620279539601529706 +0.23590731930488387236 +0.23561214636713950776 +0.23531727636977756157 +0.23502270909980457758 +0.23472844434423753568 +0.23443448189010443472 +0.23414082152444484763 +0.23384746303430972714 +0.23355440620676098940 +0.23326165082887365121 +0.23296919668773419243 +0.23267704357044186048 +0.23238519126410833726 +0.23209363955585773920 +0.23180238823282800498 +0.23151143708216995187 +0.23122078589104816393 +0.23093043444664035357 +0.23064038253613888818 +0.23035062994675006842 +0.23006117646569432256 +0.22977202188020698359 +0.22948316597753823376 +0.22919460854495332658 +0.22890634936973272562 +0.22861838823917221553 +0.22833072494058340163 +0.22804335926129440382 +0.22775629098864857980 +0.22746951991000666227 +0.22718304581274448295 +0.22689686848425683063 +0.22661098771195345436 +0.22632540328326256063 +0.22604011498562953664 +0.22575512260651747765 +0.22547042593340776984 +0.22518602475379881356 +0.22490191885520835480 +0.22461810802517237495 +0.22433459205124584024 +0.22405137072100239637 +0.22376844382203464612 +0.22348581114195575914 +0.22320347246839733479 +0.22292142758901103972 +0.22263967629146927396 +0.22235821836346383873 +0.22207705359270793477 +0.22179618176693469134 +0.22151560267389822090 +0.22123531610137422976 +0.22095532183715957397 +0.22067561966907273119 +0.22039620938495313451 +0.22011709077266394807 +0.21983826362008856981 +0.21955972771513390662 +0.21928148284572948623 +0.21900352879982729060 +0.21872586536540253310 +0.21844849233045346426 +0.21817140948300173253 +0.21789461661109313373 +0.21761811350279686161 +0.21734189994620659037 +0.21706597572943950314 +0.21679034064063837373 +0.21651499446796970694 +0.21623993699962529291 +0.21596516802382223488 +0.21569068732880264383 +0.21541649470283447121 +0.21514258993421089827 +0.21486897281125136305 +0.21459564312230139382 +0.21432260065573313645 +0.21404984519994518788 +0.21377737654336209649 +0.21350519447443644383 +0.21323329878164742901 +0.21296168925350200674 +0.21269036567853416564 +0.21241932784530603850 +0.21214857554240784676 +0.21187810855845759517 +0.21160792668210162693 +0.21133802970201545635 +0.21106841740690288067 +0.21079908958549711806 +0.21053004602655975286 +0.21026128651888259524 +0.20999281085128657098 +0.20972461881262291494 +0.20945671019177228289 +0.20918908477764561193 +0.20892174235918478664 +0.20865468272536116801 +0.20838790566517811920 +0.20812141096766909043 +0.20785519842189909001 +0.20758926781696510067 +0.20732361894199408114 +0.20705825158614610260 +0.20679316553861301631 +0.20652836058861837043 +0.20626383652541868674 +0.20599959313830257246 +0.20573563021659169170 +0.20547194754964004382 +0.20520854492683557324 +0.20494542213759897598 +0.20468257897138503187 +0.20442001521768196626 +0.20415773066601136665 +0.20389572510592995913 +0.20363399832702791525 +0.20337255011893060064 +0.20311138027129763128 +0.20285048857382362297 +0.20258987481623838556 +0.20232953878830645111 +0.20206948027982862826 +0.20180969908064089191 +0.20155019498061507721 +0.20129096776965949012 +0.20103201723771776943 +0.20077334317477069092 +0.20051494537083555669 +0.20025682361596613967 +0.19999897770025390487 +0.19974140741382598319 +0.19948411254684919602 +0.19922709288952539231 +0.19897034823209600041 +0.19871387836483944689 +0.19845768307807257202 +0.19820176216215096288 +0.19794611540746731571 +0.19769074260445437807 +0.19743564354358325574 +0.19718081801536402331 +0.19692626581034633482 +0.19667198671911825802 +0.19641798053230907772 +0.19616424704058640915 +0.19591078603465841845 +0.19565759730527357285 +0.19540468064322027986 +0.19515203583932830278 +0.19489966268446670683 +0.19464756096954630160 +0.19439573048551922474 +0.19414417102337852561 +0.19389288237415860938 +0.19364186432893498724 +0.19339111667882660783 +0.19314063921499249887 +0.19289043172863493125 +0.19264049401099828107 +0.19239082585336936271 +0.19214142704707806719 +0.19189229738349633525 +0.19164343665403985040 +0.19139484465016720627 +0.19114652116338085031 +0.19089846598522652865 +0.19065067890729309186 +0.19040315972121515942 +0.19015590821866926174 +0.18990892419137764269 +0.18966220743110681624 +0.18941575772966764979 +0.18916957487891622458 +0.18892365867075305852 +0.18867800889712435519 +0.18843262535002155977 +0.18818750782148166434 +0.18794265610358748542 +0.18769806998846719215 +0.18745374926829555529 +0.18720969373529344759 +0.18696590318172798262 +0.18672237739991331962 +0.18647911618220988639 +0.18623611932102523969 +0.18599338660881387097 +0.18575091783807759493 +0.18550871280136596586 +0.18526677129127605559 +0.18502509310045242574 +0.18478367802158768285 +0.18454252584742278365 +0.18430163637074678529 +0.18406100938439748371 +0.18382064468126099732 +0.18358054205427243311 +0.18334070129641605318 +0.18310112220072458089 +0.18286180456028069963 +0.18262274816821663648 +0.18238395281771380141 +0.18214541830200403627 +0.18190714441436833804 +0.18166913094813852414 +0.18143137769669653858 +0.18119388445347506256 +0.18095665101195687607 +0.18071967716567613471 +0.18048296270821795328 +0.18024650743321790625 +0.18001031113436369302 +0.17977437360539427758 +0.17953869464010011048 +0.17930327403232376726 +0.17906811157595925454 +0.17883320706495331454 +0.17859856029330450911 +0.17836417105506441327 +0.17813003914433686581 +0.17789616435527838556 +0.17766254648209911515 +0.17742918531906137769 +0.17719608066048131434 +0.17696323230072869004 +0.17673064003422639390 +0.17649830365545177147 +0.17626622295893495940 +0.17603439773926171652 +0.17580282779107050950 +0.17557151290905531615 +0.17534045288796440420 +0.17510964752260010924 +0.17487909660782099963 +0.17464879993853921203 +0.17441875730972308811 +0.17418896851639611989 +0.17395943335363692195 +0.17373015161658067473 +0.17350112310041701513 +0.17327234760039328387 +0.17304382491181130588 +0.17281555483003022133 +0.17258753715046587507 +0.17235977166858928999 +0.17213225817993019207 +0.17190499648007354083 +0.17167798636466252704 +0.17145122762939699057 +0.17122472007003444738 +0.17099846348238997851 +0.17077245766233578594 +0.17054670240580280249 +0.17032119750877935949 +0.17009594276731229701 +0.16987093797750679736 +0.16964618293552558015 +0.16942167743759184440 +0.16919742127998557701 +0.16897341425904696677 +0.16874965617117501648 +0.16852614681282712672 +0.16830288598052173255 +0.16807987347083527818 +0.16785710908040449296 +0.16763459260592600275 +0.16741232384415624668 +0.16719030259191214327 +0.16696852864606989697 +0.16674700180356763490 +0.16652572186140265909 +0.16630468861663391666 +0.16608390186638100072 +0.16586336140782365067 +0.16564306703820469435 +0.16542301855482624551 +0.16520321575505303446 +0.16498365843631124239 +0.16476434639608852906 +0.16454527943193480999 +0.16432645734146134053 +0.16410787992234274202 +0.16388954697231475355 +0.16367145828917636918 +0.16345361367078906079 +0.16323601291507652822 +0.16301865582002647570 +0.16280154218368847463 +0.16258467180417612852 +0.16236804447966601828 +0.16215166000839817406 +0.16193551818867685244 +0.16171961881886876000 +0.16150396169740627306 +0.16128854662278424570 +0.16107337339356267436 +0.16085844180836569861 +0.16064375166588137911 +0.16042930276486361274 +0.16021509490412966237 +0.16000112788256251606 +0.15978740149911010993 +0.15957391555278530038 +0.15936066984266694657 +0.15914766416789802306 +0.15893489832768870063 +0.15872237212131359851 +0.15851008534811392159 +0.15829803780749698849 +0.15808622929893506592 +0.15787465962196881031 +0.15766332857620315999 +0.15745223596131086019 +0.15724138157703110297 +0.15703076522316991581 +0.15682038669960018940 +0.15661024580626156655 +0.15640034234316205208 +0.15619067611037570908 +0.15598124690804487935 +0.15577205453637962829 +0.15556309879565688448 +0.15535437948622304871 +0.15514589640849094088 +0.15493764936294257550 +0.15472963815012785727 +0.15452186257066549691 +0.15431432242524245613 +0.15410701751461414188 +0.15389994763960601620 +0.15369311260111084838 +0.15348651220009165708 +0.15328014623758071111 +0.15307401451467847475 +0.15286811683255668859 +0.15266245299245517764 +0.15245702279568429383 +0.15225182604362422212 +0.15204686253772484172 +0.15184213207950691960 +0.15163763447056022304 +0.15143336951254651734 +0.15122933700719659589 +0.15102553675631302799 +0.15082196856176866007 +0.15061863222550697650 +0.15041552754954357063 +0.15021265433596386885 +0.15001001238692573958 +0.14980760150465774472 +0.14960542149146000002 +0.14940347214970509104 +0.14920175328183646335 +0.14900026469037011556 +0.14879900617789371120 +0.14859797754706738360 +0.14839717860062359711 +0.14819660914136650876 +0.14799626897217435517 +0.14779615789599648279 +0.14759627571585579031 +0.14739662223484814585 +0.14719719725614197059 +0.14699800058297990413 +0.14679903201867650075 +0.14660029136662081073 +0.14640177843027479820 +0.14620349301317450696 +0.14600543491892967185 +0.14580760395122333017 +0.14560999991381379237 +0.14541262261053217175 +0.14521547184528477148 +0.14501854742205202986 +0.14482184914488838157 +0.14462537681792400623 +0.14442913024536249700 +0.14423310923148335849 +0.14403731358064061907 +0.14384174309726349694 +0.14364639758585656670 +0.14345127685099900994 +0.14325638069734680791 +0.14306170892963035457 +0.14286726135265609416 +0.14267303777130665998 +0.14247903799053956986 +0.14228526181539000173 +0.14209170905096774051 +0.14189837950245967613 +0.14170527297512894305 +0.14151238927431497583 +0.14131972820543431402 +0.14112728957397921437 +0.14093507318552031538 +0.14074307884570386173 +0.14055130636025359170 +0.14035975553497115342 +0.14016842617573396779 +0.13997731808849867008 +0.13978643107929789036 +0.13959576495424286247 +0.13940531951952217504 +0.13921509458140235438 +0.13902508994622828076 +0.13883530542042177292 +0.13864574081048464116 +0.13845639592299521792 +0.13826727056461149412 +0.13807836454206959265 +0.13788967766218379607 +0.13770120973184837854 +0.13751296055803494123 +0.13732492994779532669 +0.13713711770825975922 +0.13694952364663798283 +0.13676214757021903923 +0.13657498928637082369 +0.13638804860254175044 +0.13620132532625897626 +0.13601481926512976051 +0.13582853022684154842 +0.13564245801916055556 +0.13545660244993493193 +0.13527096332709112603 +0.13508554045863682691 +0.13490033365266010379 +0.13471534271732929500 +0.13453056746089361861 +0.13434600769168195122 +0.13416166321810579776 +0.13397753384865584980 +0.13379361939190465014 +0.13360991965650589886 +0.13342643445119378720 +0.13324316358478510702 +0.13306010686617672500 +0.13287726410434783086 +0.13269463510835913245 +0.13251221968735285572 +0.13233001765055349419 +0.13214802880726672640 +0.13196625296688127560 +0.13178468993886705007 +0.13160333953277669750 +0.13142220155824552164 +0.13124127582499006683 +0.13106056214281125438 +0.13088006032159102410 +0.13069977017129474905 +0.13051969150197095804 +0.13033982412375011428 +0.13016016784684719676 +0.12998072248155892461 +0.12980148783826603309 +0.12962246372743252421 +0.12944364995960577769 +0.12926504634541674532 +0.12908665269557950683 +0.12890846882089304626 +0.12873049453223897598 +0.12855272964058345186 +0.12837517395697684019 +0.12819782729255291276 +0.12802068945853056769 +0.12784376026621219191 +0.12766703952698513214 +0.12749052705232100102 +0.12731422265377631553 +0.12713812614299199732 +0.12696223733169348380 +0.12678655603169206034 +0.12661108205488294520 +0.12643581521324706585 +0.12626075531885061487 +0.12608590218384418957 +0.12591125562046506792 +0.12573681544103476604 +0.12556258145796106440 +0.12538855348373709186 +0.12521473133094199182 +0.12504111481224053359 +0.12486770374038300147 +0.12469449792820683220 +0.12452149718863426975 +0.12434870133467444686 +0.12417611017942298268 +0.12400372353606090026 +0.12383154121785712454 +0.12365956303816570683 +0.12348778881042822564 +0.12331621834817273198 +0.12314485146501426283 +0.12297368797465459134 +0.12280272769088229623 +0.12263197042757371935 +0.12246141599869160566 +0.12229106421828636608 +0.12212091490049604980 +0.12195096785954516461 +0.12178122290974720265 +0.12161167986550192044 +0.12144233854129750372 +0.12127319875170990138 +0.12110426031140261727 +0.12093552303512770940 +0.12076698673772422177 +0.12059865123412077947 +0.12043051633933304911 +0.12026258186846541798 +0.12009484763671104957 +0.11992731345935062071 +0.11975997915175459752 +0.11959284452938104271 +0.11942590940777732256 +0.11925917360257951017 +0.11909263692951255198 +0.11892629920439055924 +0.11876016024311593366 +0.11859421986168165730 +0.11842847787616843369 +0.11826293410274729689 +0.11809758835767832086 +0.11793244045731089698 +0.11776749021808446960 +0.11760273745652749522 +0.11743818198925864982 +0.11727382363298630152 +0.11710966220450860775 +0.11694569752071384827 +0.11678192939857974519 +0.11661835765517546137 +0.11645498210765881097 +0.11629180257327897952 +0.11612881886937530262 +0.11596603081337686358 +0.11580343822280457500 +0.11564104091526875018 +0.11547883870847085175 +0.11531683142020335286 +0.11515501886834898781 +0.11499340087088213980 +0.11483197724586721722 +0.11467074781146092965 +0.11450971238590992862 +0.11434887078755279211 +0.11418822283481934454 +0.11402776834623025437 +0.11386750714039867161 +0.11370743903602820168 +0.11354756385191466794 +0.11338788140694550099 +0.11322839152009936403 +0.11306909401044784591 +0.11290998869715335173 +0.11275107539947067103 +0.11259235393674689452 +0.11243382412842080342 +0.11227548579402392426 +0.11211733875317908549 +0.11195938282560283228 +0.11180161783110283136 +0.11164404358958006369 +0.11148665992102797795 +0.11132946664553186600 +0.11117246358327122213 +0.11101565055451670383 +0.11085902737963296283 +0.11070259387907713244 +0.11054634987339932717 +0.11039029518324303125 +0.11023442962934409950 +0.11007875303253278343 +0.10992326521373146919 +0.10976796599395662046 +0.10961285519431784863 +0.10945793263601792666 +0.10930319814035421855 +0.10914865152871637555 +0.10899429262258872320 +0.10884012124354891515 +0.10868613721326857158 +0.10853234035351337627 +0.10837873048614225790 +0.10822530743310934676 +0.10807207101646176817 +0.10791902105834144665 +0.10776615738098450914 +0.10761347980672081315 +0.10746098815797561210 +0.10730868225726758469 +0.10715656192721050022 +0.10700462699051246918 +0.10685287726997638735 +0.10670131258849981093 +0.10654993276907465116 +0.10639873763478858992 +0.10624772700882310905 +0.10609690071445532222 +0.10594625857505740596 +0.10579580041409594737 +0.10564552605513374828 +0.10549543532182774352 +0.10534552803793094389 +0.10519580402729113155 +0.10504626311385192872 +0.10489690512165240899 +0.10474772987482643127 +0.10459873719760476307 +0.10444992691431254084 +0.10430129884937144880 +0.10415285282729860872 +0.10400458867270671870 +0.10385650621030530216 +0.10370860526489873721 +0.10356088566138803297 +0.10341334722477026065 +0.10326598978013841468 +0.10311881315268205117 +0.10297181716768628867 +0.10282500165053357066 +0.10267836642670177816 +0.10253191132176567302 +0.10238563616139681467 +0.10223954077136240826 +0.10209362497752734467 +0.10194788860585242418 +0.10180233148239546670 +0.10165695343331118683 +0.10151175428485087471 +0.10136673386336300662 +0.10122189199529255110 +0.10107722850718237062 +0.10093274322567140355 +0.10078843597749627403 +0.10064430658949091724 +0.10050035488858584387 +0.10035658070180983326 +0.10021298385628815697 +0.10006956417924385561 +0.09992632149799754449 +0.09978325563996721936 +0.09964036643266853399 +0.09949765370371441153 +0.09935511728081616867 +0.09921275699178219720 +0.09907057266451893551 +0.09892856412703092406 +0.09878673120741986169 +0.09864507373388645139 +0.09850359153472845741 +0.09836228443834220403 +0.09822115227322228415 +0.09808019486796057396 +0.09793941205124842564 +0.09779880365187441915 +0.09765836949872594430 +0.09751810942078878441 +0.09737802324714681101 +0.09723811080698298304 +0.09709837192957791741 +0.09695880644431165152 +0.09681941418066236649 +0.09668019496820692837 +0.09654114863662138779 +0.09640227501567967539 +0.09626357393525568351 +0.09612504522532122619 +0.09598668871594759344 +0.09584850423730476021 +0.09571049161966196928 +0.09557265069338755081 +0.09543498128894824239 +0.09529748323691122902 +0.09516015636794179777 +0.09502300051280501703 +0.09488601550236522297 +0.09474920116758572819 +0.09461255733953018165 +0.09447608384936051484 +0.09433978052833902339 +0.09420364720782714585 +0.09406768371928607431 +0.09393188989427676827 +0.09379626556445923302 +0.09366081056159422658 +0.09352552471754130292 +0.09339040786426044960 +0.09325545983381143544 +0.09312068045835347752 +0.09298606957014660113 +0.09285162700155000226 +0.09271735258502333821 +0.09258324615312632511 +0.09244930753851880734 +0.09231553657396085466 +0.09218193309231234589 +0.09204849692653421789 +0.09191522790968685575 +0.09178212587493163321 +0.09164919065553002453 +0.09151642208484381258 +0.09138381999633579666 +0.09125138422356871004 +0.09111911460020603870 +0.09098701096001195199 +0.09085507313685131647 +0.09072330096468957106 +0.09059169427759258819 +0.09046025290972785349 +0.09032897669536277263 +0.09019786546886603140 +0.09006691906470729037 +0.08993613731745647710 +0.08980552006178563196 +0.08967506713246670147 +0.08954477836437331473 +0.08941465359248003397 +0.08928469265186241011 +0.08915489537769748229 +0.08902526160526294530 +0.08889579116993856500 +0.08876648390720440207 +0.08863733965264262993 +0.08850835824193646617 +0.08837953951087020033 +0.08825088329533049836 +0.08812238943130429325 +0.08799405775488099157 +0.08786588810225103019 +0.08773788030970651464 +0.08761003421364142729 +0.08748234965055078083 +0.08735482645703224192 +0.08722746446978411894 +0.08710026352560730489 +0.08697322346140420879 +0.08684634411417872790 +0.08671962532103734411 +0.08659306691918775001 +0.08646666874593998686 +0.08634043063870593115 +0.08621435243499947498 +0.08608843397243659545 +0.08596267508873506324 +0.08583707562171535854 +0.08571163540929940816 +0.08558635428951168189 +0.08546123210047906760 +0.08533626868042987201 +0.08521146386769590242 +0.08508681750071005190 +0.08496232941800836713 +0.08483799945822899369 +0.08471382746011218989 +0.08458981326250139543 +0.08446595670434163539 +0.08434225762468096355 +0.08421871586266986565 +0.08409533125756137040 +0.08397210364871118826 +0.08384903287557710083 +0.08372611877772062616 +0.08360336119480482608 +0.08348075996659617970 +0.08335831493296369521 +0.08323602593387884052 +0.08311389280941662572 +0.08299191539975413201 +0.08287009354517173298 +0.08274842708605262276 +0.08262691586288271883 +0.08250555971625128659 +0.08238435848684978746 +0.08226331201547389116 +0.08214242014302101935 +0.08202168271049245507 +0.08190109955899242677 +0.08178067052972787243 +0.08166039546400968852 +0.08154027420325099529 +0.08142030658896869111 +0.08130049246278281405 +0.08118083166641644477 +0.08106132404169631711 +0.08094196943055181892 +0.08082276767501657411 +0.08070371861722658302 +0.08058482209942197105 +0.08046607796394592005 +0.08034748605324490422 +0.08022904620986941182 +0.08011075827647272385 +0.07999262209581206595 +0.07987463751074808105 +0.07975680436424492648 +0.07963912249937041277 +0.07952159175929569834 +0.07940421198729603891 +0.07928698302674978826 +0.07916990472113934196 +0.07905297691405080429 +0.07893619944917351638 +0.07881957217030141627 +0.07870309492133130413 +0.07858676754626429950 +0.07847058988920518896 +0.07835456179436255109 +0.07823868310604907561 +0.07812295366868057811 +0.07800737332677797065 +0.07789194192496487479 +0.07777665930796962002 +0.07766152532062428615 +0.07754653980786459233 +0.07743170261473097948 +0.07731701358636720867 +0.07720247256802144353 +0.07708807940504579237 +0.07697383394289661340 +0.07685973602713420949 +0.07674578550342273098 +0.07663198221753117489 +0.07651832601533210820 +0.07640481674280238944 +0.07629145424602333525 +0.07617823837117997099 +0.07606516896456233523 +0.07595224587256384219 +0.07583946894168275277 +0.07572683801852146679 +0.07561435294978662014 +0.07550201358228951498 +0.07538981976294496790 +0.07527777133877333604 +0.07516586815689826895 +0.07505411006454842937 +0.07494249690905688266 +0.07483102853786059716 +0.07471970479850184588 +0.07460852553862645786 +0.07449749060598535866 +0.07438659984843379314 +0.07427585311393143652 +0.07416525025054265807 +0.07405479110643597984 +0.07394447552988511752 +0.07383430336926775917 +0.07372427447306650894 +0.07361438868986856787 +0.07350464586836541470 +0.07339504585735373565 +0.07328558850573427264 +0.07317627366251287790 +0.07306710117679986183 +0.07295807089781009003 +0.07284918267486363563 +0.07274043635738462743 +0.07263183179490234620 +0.07252336883705078063 +0.07241504733356851631 +0.07230686713429912427 +0.07219882808919056427 +0.07209093004829623952 +0.07198317286177358110 +0.07187555637988539414 +0.07176808045299924721 +0.07166074493158712533 +0.07155354966622658186 +0.07144649450759936460 +0.07133957930649242885 +0.07123280391379752108 +0.07112616818051144263 +0.07101967195773575825 +0.07091331509667671285 +0.07080709744864620292 +0.07070101886506023614 +0.07059507919744029136 +0.07048927829741265250 +0.07038361601670840850 +0.07027809220716409178 +0.07017270672072069282 +0.07006745940942449291 +0.06996235012542677267 +0.06985737872098372880 +0.06975254504845677939 +0.06964784896031193939 +0.06954329030912087539 +0.06943886894755976757 +0.06933458472841023956 +0.06923043750455880330 +0.06912642712899683128 +0.06902255345482133375 +0.06891881633523398720 +0.06881521562354162014 +0.06871175117315622694 +0.06860842283759500948 +0.06850523047048023839 +0.06840217392553889220 +0.06829925305660398960 +0.06819646771761275761 +0.06809381776260810260 +0.06799130304573805517 +0.06788892342125536772 +0.06778667874351873568 +0.06768456886699125707 +0.06758259364624166765 +0.06748075293594367474 +0.06737904659087633197 +0.06727747446592374780 +0.06717603641607515497 +0.06707473229642546553 +0.06697356196217400803 +0.06687252526862610957 +0.06677162207119177739 +0.06667085222538628175 +0.06657021558683065554 +0.06646971201125038975 +0.06636934135447672412 +0.06626910347244596711 +0.06616899822119966246 +0.06606902545688476958 +0.06596918503575301129 +0.06586947681416205347 +0.06576990064857415885 +0.06567045639555725567 +0.06557114391178445190 +0.06547196305403378547 +0.06537291367918919571 +0.06527399564423928824 +0.06517520880627819535 +0.06507655302250522911 +0.06497802815022518663 +0.06487963404684786439 +0.06478137056988832188 +0.06468323757696720078 +0.06458523492581003111 +0.06448736247424791124 +0.06438962008021713312 +0.06429200760175890483 +0.06419452489702047460 +0.06409717182425365978 +0.06399994824181580444 +0.06390285400816962669 +0.06380588898188295499 +0.06370905302162915840 +0.06361234598618627223 +0.06351576773443845525 +0.06341931812537443536 +0.06332299701808853654 +0.06322680427178035967 +0.06313073974575450498 +0.06303480329942130755 +0.06293899479229590754 +0.06284331408399899954 +0.06274776103425647178 +0.06265233550289921183 +0.06255703734986389764 +0.06246186643519190812 +0.06236682261903024604 +0.06227190576163110780 +0.06217711572335179321 +0.06208245236465500388 +0.06198791554610838528 +0.06189350512838542179 +0.06179922097226416000 +0.06170506293862821484 +0.06161103088846661691 +0.06151712468287299368 +0.06142334418304706140 +0.06132968925029300827 +0.06123615974602044515 +0.06114275553174453731 +0.06104947646908515796 +0.06095632241976779025 +0.06086329324562267384 +0.06077038880858577630 +0.06067760897069777309 +0.06058495359410476927 +0.06049242254105798022 +0.06040001567391369008 +0.06030773285513376519 +0.06021557394728462714 +0.06012353881303823810 +0.06003162731517158041 +0.05993983931656686470 +0.05984817468021143977 +0.05975663326919743168 +0.05966521494672279158 +0.05957391957608983157 +0.05948274702070651537 +0.05939169714408568113 +0.05930076980984501367 +0.05920996488170803679 +0.05911928222350245482 +0.05902872169916167228 +0.05893828317272387096 +0.05884796650833239856 +0.05875777157023564373 +0.05866769822278681407 +0.05857774633044470630 +0.05848791575777247120 +0.05839820636943868909 +0.05830861803021694656 +0.05821915060498540628 +0.05812980395872795886 +0.05804057795653280033 +0.05795147246359334814 +0.05786248734520803988 +0.05777362246678022922 +0.05768487769381833163 +0.05759625289193517211 +0.05750774792684939379 +0.05741936266438368852 +0.05733109697046606668 +0.05724295071112959349 +0.05715492375251165352 +0.05706701596085524825 +0.05697922720250754580 +0.05689155734392099817 +0.05680400625165274442 +0.05671657379236477725 +0.05662925983282403319 +0.05654206423990183056 +0.05645498688057502823 +0.05636802762192444360 +0.05628118633113619873 +0.05619446287550111663 +0.05610785712241452006 +0.05602136893937682827 +0.05593499819399277290 +0.05584874475397225840 +0.05576260848712942531 +0.05567658926138350367 +0.05559068694475824413 +0.05550490140538177214 +0.05541923251148752477 +0.05533368013141289066 +0.05524824413360027164 +0.05516292438659670117 +0.05507772075905343484 +0.05499263311972682478 +0.05490766133747743832 +0.05482280528127052299 +0.05473806482017577746 +0.05465343982336761525 +0.05456893016012484554 +0.05448453569983058298 +0.05440025631197297623 +0.05431609186614395901 +0.05423204223204049906 +0.05414810727946366836 +0.05406428687831889290 +0.05398058089861657027 +0.05389698921047077901 +0.05381351168410041658 +0.05373014818982873447 +0.05364689859808298433 +0.05356376277939529917 +0.05348074060440164562 +0.05339783194384247617 +0.05331503666856259732 +0.05323235464951107249 +0.05314978575774117336 +0.05306732986441022037 +0.05298498684078027654 +0.05290275655821700951 +0.05282063888819057973 +0.05273863370227548780 +0.05265674087214990140 +0.05257496026959668223 +0.05249329176650233131 +0.05241173523485774532 +0.05233029054675780722 +0.05224895757440151112 +0.05216773619009196228 +0.05208662626623598163 +0.05200562767534494535 +0.05192474029003360525 +0.05184396398302114350 +0.05176329862713067997 +0.05168274409528885588 +0.05160230026052688851 +0.05152196699597923202 +0.05144174417488472928 +0.05136163167058584167 +0.05128162935652895438 +0.05120173710626434171 +0.05112195479344574384 +0.05104228229183133125 +0.05096271947528242113 +0.05088326621776436548 +0.05080392239334631521 +0.05072468787620081770 +0.05064556254060461477 +0.05056654626093758098 +0.05048763891168363271 +0.05040884036743013136 +0.05033015050286809844 +0.05025156919279218093 +0.05017309631210038756 +0.05009473173579473415 +0.05001647533898021664 +0.04993832699686569232 +0.04986028658476348435 +0.04978235397808904866 +0.04970452905236187602 +0.04962681168320428465 +0.04954920174634235008 +0.04947169911760544714 +0.04939430367292623608 +0.04931701528834090548 +0.04923983383998847829 +0.04916275920411204703 +0.04908579125705717777 +0.04900892987527320777 +0.04893217493531259316 +0.04885552631383063837 +0.04877898388758643983 +0.04870254753344178261 +0.04862621712836165400 +0.04854999254941439607 +0.04847387367377112982 +0.04839786037870631719 +0.04832195254159712267 +0.04824615003992412110 +0.04817045275127061071 +0.04809486055332289067 +0.04801937332387044843 +0.04794399094080514095 +0.04786871328212252003 +0.04779354022592022244 +0.04771847165039927452 +0.04764350743386329412 +0.04756864745471876127 +0.04749389159147500428 +0.04741923972274376953 +0.04734469172724012354 +0.04727024748378119701 +0.04719590687128728812 +0.04712166976878120334 +0.04704753605538810474 +0.04697350561033627331 +0.04689957831295609586 +0.04682575404268070340 +0.04675203267904584625 +0.04667841410168958871 +0.04660489819035268377 +0.04653148482487811516 +0.04645817388521140262 +0.04638496525140046317 +0.04631185880359553475 +0.04623885442204943991 +0.04616595198711675313 +0.04609315137925508454 +0.04602045247902367825 +0.04594785516708431439 +0.04587535932420114954 +0.04580296483123996731 +0.04573067156916942044 +0.04565847941905974011 +0.04558638826208345762 +0.04551439797951525174 +0.04544250845273172662 +0.04537071956321171023 +0.04529903119253560206 +0.04522744322238634457 +0.04515595553454834071 +0.04508456801090814092 +0.04501328053345418634 +0.04494209298427650351 +0.04487100524556745379 +0.04480001719962067169 +0.04472912872883185592 +0.04465833971569839467 +0.04458765004281940725 +0.04451705959289567471 +0.04444656824872943862 +0.04437617589322508799 +0.04430588240938798661 +0.04423568768032550697 +0.04416559158924641265 +0.04409559401946083063 +0.04402569485438075775 +0.04395589397751922811 +0.04388619127249094448 +0.04381658662301190360 +0.04374707991289951414 +0.04367767102607257590 +0.04360835984655088426 +0.04353914625845603514 +0.04347003014601038412 +0.04340101139353770565 +0.04333208988546302654 +0.04326326550631227202 +0.04319453814071288339 +0.04312590767339299219 +0.04305737398918198927 +0.04298893697301033046 +0.04292059650990938391 +0.04285235248501154809 +0.04278420478354996725 +0.04271615329085912127 +0.04264819789237381947 +0.04258033847363017904 +0.04251257492026486173 +0.04244490711801530980 +0.04237733495271997497 +0.04230985831031765926 +0.04224247707684809783 +0.04217519113845161205 +0.04210800038136918583 +0.04204090469194235458 +0.04197390395661304563 +0.04190699806192415416 +0.04184018689451850931 +0.04177347034113984559 +0.04170684828863215066 +0.04164032062393970002 +0.04157388723410739007 +0.04150754800628019686 +0.04144130282770355084 +0.04137515158572306617 +0.04130909416778467957 +0.04124313046143445599 +0.04117726035431843595 +0.04111148373418328783 +0.04104580048887526700 +0.04098021050634095830 +0.04091471367462703318 +0.04084930988187979867 +0.04078399901634603697 +0.04071878096637202710 +0.04065365562040416936 +0.04058862286698865229 +0.04052368259477149431 +0.04045883469249864084 +0.04039407904901545082 +0.04032941555326750166 +0.04026484409429956918 +0.04020036456125640478 +0.04013597684338233301 +0.04007168083002105030 +0.04000747641061616622 +0.03994336347471044713 +0.03987934191194641292 +0.03981541161206584439 +0.03975157246491003299 +0.03968782436041973921 +0.03962416718863477627 +0.03956060083969446112 +0.03949712520383743403 +0.03943374017140145044 +0.03937044563282353360 +0.03930724147863974560 +0.03924412759948553431 +0.03918110388609512273 +0.03911817022930199478 +0.03905532652003866623 +0.03899257264933641415 +0.03892990850832581118 +0.03886733398823599694 +0.03880484898039522623 +0.03874245337623044572 +0.03868014706726748825 +0.03861792994513103122 +0.03855580190154409698 +0.03849376282832903118 +0.03843181261740622606 +0.03836995116079502943 +0.03830817835061334220 +0.03824649407907736165 +0.03818489823850220594 +0.03812339072130113693 +0.03806197141998592798 +0.03800064022716679452 +0.03793939703555228998 +0.03787824173794934746 +0.03781717422726296046 +0.03775619439649668951 +0.03769530213875204455 +0.03763449734722879025 +0.03757377991522487665 +0.03751314973613616849 +0.03745260670345688936 +0.03739215071077898328 +0.03733178165179252411 +0.03727149942028548657 +0.03721130391014374622 +0.03715119501535112806 +0.03709117262998899711 +0.03703123664823695232 +0.03697138696437192451 +0.03691162347276880085 +0.03685194606790007099 +0.03679235464433567443 +0.03673284909674356946 +0.03667342931988884502 +0.03661409520863429662 +0.03655484665794014182 +0.03649568356286411741 +0.03643660581856127817 +0.03637761332028396216 +0.03631870596338202667 +0.03625988364330241798 +0.03620114625558937954 +0.03614249369588445893 +0.03608392585992607066 +0.03602544264355018316 +0.03596704394268944449 +0.03590872965337370271 +0.03585049967172981161 +0.03579235389398162381 +0.03573429221644981724 +0.03567631453555187432 +0.03561842074780240813 +0.03556061074981250320 +0.03550288443829015261 +0.03544524171004015400 +0.03538768246196365846 +0.03533020659105889222 +0.03527281399442029619 +0.03521550456923906031 +0.03515827821280283205 +0.03510113482249576505 +0.03504407429579840111 +0.03498709653028753142 +0.03493020142363668229 +0.03487338887361528250 +0.03481665877808917675 +0.03476001103502054934 +0.03470344554246748009 +0.03464696219858456189 +0.03459056090162214436 +0.03453424154992679185 +0.03447800404194109603 +0.03442184827620352328 +0.03436577415134867836 +0.03430978156610670765 +0.03425387041930396531 +0.03419804060986234712 +0.03414229203679965130 +0.03408662459922941201 +0.03403103819636062871 +0.03397553272749827963 +0.03392010809204257932 +0.03386476418948953376 +0.03380950091943060037 +0.03375431818155263247 +0.03369921587563805970 +0.03364419390156443701 +0.03358925215930484709 +0.03353439054892767140 +0.03347960897059645141 +0.03342490732457004815 +0.03337028551120223285 +0.03331574343094229063 +0.03326128098433427799 +0.03320689807201735594 +0.03315259459472578996 +0.03309837045328852673 +0.03304422554862982558 +0.03299015978176843278 +0.03293617305381805332 +0.03288226526598723304 +0.03282843631957911568 +0.03277468611599172049 +0.03272101455671737325 +0.03266742154334340703 +0.03261390697755140594 +0.03256047076111760058 +0.03250711279591267377 +0.03245383298390162874 +0.03240063122714405275 +0.03234750742779366611 +0.03229446148809856504 +0.03224149331040111754 +0.03218860279713779693 +0.03213578985083940381 +0.03208305437413051103 +0.03203039626973024079 +0.03197781544045133484 +0.03192531178920066104 +0.03187288521897909538 +0.03182053563288117504 +0.03176826293409563268 +0.03171606702590461235 +0.03166394781168425238 +0.03161190519490432449 +0.03155993907912825469 +0.03150804936801320649 +0.03145623596530962296 +0.03140449877486189978 +0.03135283770060747627 +0.03130125264657748763 +0.03124974351689646310 +0.03119831021578207270 +0.03114695264754557477 +0.03109567071659122967 +0.03104446432741660503 +0.03099333338461248213 +0.03094227779286263386 +0.03089129745694410220 +0.03084039228172662239 +0.03078956217217332370 +0.03073880703333988981 +0.03068812677037507930 +0.03063752128852056250 +0.03058699049311055379 +0.03053653428957231464 +0.03048615258342551176 +0.03043584528028267164 +0.03038561228584886822 +0.03033545350592174034 +0.03028536884639145693 +0.03023535821324051243 +0.03018542151254411873 +0.03013555865046951485 +0.03008576953327653583 +0.03003605406731718955 +0.02998641215903562890 +0.02993684371496847102 +0.02988734864174414502 +0.02983792684608336729 +0.02978857823479883621 +0.02973930271479528764 +0.02969010019306943249 +0.02964097057670971383 +0.02959191377289667468 +0.02954292968890250698 +0.02949401823209118340 +0.02944517930991851634 +0.02939641282993170346 +0.02934771869976992090 +0.02929909682716363983 +0.02925054711993495607 +0.02920206948599748942 +0.02915366383335621023 +0.02910533007010752959 +0.02905706810443908425 +0.02900887784462997249 +0.02896075919905035517 +0.02891271207616167779 +0.02886473638451656637 +0.02881683203275854643 +0.02876899892962246977 +0.02872123698393392460 +0.02867354610460962419 +0.02862592620065711885 +0.02857837718117468145 +0.02853089895535175155 +0.02848349143246807491 +0.02843615452189442172 +0.02838888813309211820 +0.02834169217561301884 +0.02829456655909974583 +0.02824751119328511306 +0.02820052598799280621 +0.02815361085313655351 +0.02810676569872064268 +0.02805999043483969890 +0.02801328497167840034 +0.02796664921951200200 +0.02792008308870559327 +0.02787358648971452121 +0.02782715933308424483 +0.02778080152945015119 +0.02773451298953775665 +0.02768829362416218298 +0.02764214334422888941 +0.02759606206073273935 +0.02755004968475859711 +0.02750410612748103653 +0.02745823130016410846 +0.02741242511416188896 +0.02736668748091763276 +0.02732101831196434572 +0.02727541751892445521 +0.02722988501350978932 +0.02718442070752164971 +0.02713902451285045425 +0.02709369634147626438 +0.02704843610546797325 +0.02700324371698391984 +0.02695811908827148995 +0.02691306213166706762 +0.02686807275959630578 +0.02682315088457350866 +0.02677829641920219730 +0.02673350927617461348 +0.02668878936827187923 +0.02664413660836395178 +0.02659955090940929051 +0.02655503218445540506 +0.02651058034663810944 +0.02646619530918190022 +0.02642187698539990096 +0.02637762528869348755 +0.02633344013255280169 +0.02628932143055597720 +0.02624526909636972979 +0.02620128304374900671 +0.02615736318653692424 +0.02611350943866481977 +0.02606972171415203668 +0.02602599992710625740 +0.02598234399172287198 +0.02593875382228542567 +0.02589522933316539335 +0.02585177043882191938 +0.02580837705380229982 +0.02576504909274129548 +0.02572178647036148583 +0.02567858910147315793 +0.02563545690097417465 +0.02559238978385003360 +0.02554938766517349247 +0.02550645046010516920 +0.02546357808389269547 +0.02542077045187132039 +0.02537802747946354617 +0.02533534908217894080 +0.02529273517561460294 +0.02525018567545450968 +0.02520770049746985306 +0.02516527955751887008 +0.02512292277154677331 +0.02508063005558576822 +0.02503840132575479993 +0.02499623649825990709 +0.02495413548939365983 +0.02491209821553553097 +0.02487012459315169483 +0.02482821453879472534 +0.02478636796910416856 +0.02474458480080570302 +0.02470286495071175731 +0.02466120833572107290 +0.02461961487281878400 +0.02457808447907637583 +0.02453661707165149045 +0.02449521256778822154 +0.02445387088481656285 +0.02441259194015277592 +0.02437137565129920275 +0.02433022193584402637 +0.02428913071146163866 +0.02424810189591208864 +0.02420713540704142255 +0.02416623116278153807 +0.02412538908114980271 +0.02408460908024970260 +0.02404389107826998204 +0.02400323499348513967 +0.02396264074425525498 +0.02392210824902578359 +0.02388163742632778974 +0.02384122819477739463 +0.02380088047307642174 +0.02376059418001172027 +0.02372036923445542886 +0.02368020555536489574 +0.02364010306178248796 +0.02360006167283580300 +0.02356008130773723855 +0.02352016188578431866 +0.02348030332635936412 +0.02344050554892961394 +0.02340076847304714547 +0.02336109201834857263 +0.02332147610455551079 +0.02328192065147391063 +0.02324242557899452652 +0.02320299080709260081 +0.02316361625582768341 +0.02312430184534407587 +0.02308504749587013405 +0.02304585312771871217 +0.02300671866128688528 +0.02296764401705595274 +0.02292862911559140002 +0.02288967387754268709 +0.02285077822364353986 +0.02281194207471150606 +0.02277316535164813222 +0.02273444797543894974 +0.02269578986715309332 +0.02265719094794384214 +0.02261865113904785660 +0.02258017036178568138 +0.02254174853756140198 +0.02250338558786274187 +0.02246508143426092716 +0.02242683599841050618 +0.02238864920204973463 +0.02235052096699989208 +0.02231245121516577112 +0.02227443986853539989 +0.02223648684917984772 +0.02219859207925361372 +0.02216075548099395026 +0.02212297697672138685 +0.02208525648883931727 +0.02204759393983407936 +0.02200998925227495157 +0.02197244234881390312 +0.02193495315218580918 +0.02189752158520813161 +0.02186014757078104387 +0.02182283103188737555 +0.02178557189159231397 +0.02174837007304382744 +0.02171122549947207547 +0.02167413809418966900 +0.02163710778059164605 +0.02160013448215520118 +0.02156321812243985891 +0.02152635862508712680 +0.02148955591382086666 +0.02145280991244685395 +0.02141612054485284022 +0.02137948773500866415 +0.02134291140696590108 +0.02130639148485811984 +0.02126992789290054961 +0.02123352055539021524 +0.02119716939670582279 +0.02116087434130767275 +0.02112463531373768785 +0.02108845223861916668 +0.02105232504065711677 +0.02101625364463766479 +0.02098023797542847638 +0.02094427795797850628 +0.02090837351731776594 +0.02087252457855775373 +0.02083673106689084079 +0.02080099290759059374 +0.02076531002601155265 +0.02072968234758929343 +0.02069410979784027521 +0.02065859230236169117 +0.02062312978683178422 +0.02058772217700931273 +0.02055236939873383850 +0.02051707137792559837 +0.02048182804058523709 +0.02044663931279422017 +0.02041150512071416429 +0.02037642539058730565 +0.02034140004873625021 +0.02030642902156370999 +0.02027151223555295409 +0.02023664961726719808 +0.02020184109334990583 +0.02016708659052462299 +0.02013238603559490761 +0.02009773935544433010 +0.02006314647703628939 +0.02002860732741423497 +0.01999412183370121582 +0.01995968992310016843 +0.01992531152289373983 +0.01989098656044406208 +0.01985671496319312349 +0.01982249665866222046 +0.01978833157445219343 +0.01975421963824337476 +0.01972016077779537371 +0.01968615492094722558 +0.01965220199561702741 +0.01961830192980234047 +0.01958445465157965595 +0.01955066008910466904 +0.01951691817061209505 +0.01948322882441557574 +0.01944959197890778688 +0.01941600756256015373 +0.01938247550392300025 +0.01934899573162539299 +0.01931556817437504736 +0.01928219276095847687 +0.01924886942024051778 +0.01921559808116489465 +0.01918237867275348132 +0.01914921112410677626 +0.01911609536440365270 +0.01908303132290111934 +0.01905001892893468801 +0.01901705811191788456 +0.01898414880134246391 +0.01895129092677826088 +0.01891848441787313123 +0.01888572920435300020 +0.01885302521602150519 +0.01882037238276043634 +0.01878777063452913634 +0.01875521990136487169 +0.01872272011338258976 +0.01869027120077471765 +0.01865787309381159581 +0.01862552572284076682 +0.01859322901828744726 +0.01856098291065421887 +0.01852878733052101129 +0.01849664220854515401 +0.01846454747546104683 +0.01843250306208053454 +0.01840050889929235878 +0.01836856491806253960 +0.01833667104943405637 +0.01830482722452676445 +0.01827303337453766927 +0.01824128943074039208 +0.01820959532448552728 +0.01817795098720033017 +0.01814635635038880021 +0.01811481134563159082 +0.01808331590458582896 +0.01805186995898533375 +0.01802047344064024864 +0.01798912628143720799 +0.01795782841333919480 +0.01792657976838542278 +0.01789538027869152365 +0.01786422987644909963 +0.01783312849392607383 +0.01780207606346632596 +0.01777107251748979294 +0.01774011778849246199 +0.01770921180904601322 +0.01767835451179826031 +0.01764754582947257108 +0.01761678569486820056 +0.01758607404086004120 +0.01755541080039854998 +0.01752479590650992888 +0.01749422929229566687 +0.01746371089093288684 +0.01743324063567405768 +0.01740281845984699427 +0.01737244429685481928 +0.01734211808017576545 +0.01731183974336350864 +0.01728160922004657110 +0.01725142644392868574 +0.01722129134878861226 +0.01719120386847989429 +0.01716116393693122022 +0.01713117148814589583 +0.01710122645620215653 +0.01707132877525290374 +0.01704147837952565622 +0.01701167520332269589 +0.01698191918102071041 +0.01695221024707095625 +0.01692254833599915465 +0.01689293338240537706 +0.01686336532096406604 +0.01683384408642381314 +0.01680436961360766771 +0.01677494183741260955 +0.01674556069280983689 +0.01671622611484462062 +0.01668693803863608924 +0.01665769639937751331 +0.01662850113233584753 +0.01659935217285195969 +0.01657024945634052310 +0.01654119291828985705 +0.01651218249426196841 +0.01648321811989238855 +0.01645429973089033646 +0.01642542726303837172 +0.01639660065219255766 +0.01636781983428233986 +0.01633908474531034841 +0.01631039532135271711 +0.01628175149855853529 +0.01625315321315017741 +0.01622460040142307755 +0.01619609299974566693 +0.01616763094455942601 +0.01613921417237862768 +0.01611084261979056978 +0.01608251622345524545 +0.01605423492010541603 +0.01602599864654657286 +0.01599780733965674648 +0.01596966093638671133 +0.01594155937375961105 +0.01591350258887112498 +0.01588549051888933292 +0.01585752310105465607 +0.01582960027267987443 +0.01580172197114987701 +0.01577388813392189074 +0.01574609869852513361 +0.01571835360256097075 +0.01569065278370279995 +0.01566299617969585389 +0.01563538372835747772 +0.01560781536757662770 +0.01558029103531416784 +0.01555281066960269819 +0.01552537420854647500 +0.01549798159032138474 +0.01547063275317477234 +0.01544332763542567016 +0.01541606617546439212 +0.01538884831175273311 +0.01536167398282381469 +0.01533454312728193755 +0.01530745568380281231 +0.01528041159113312231 +0.01525341078809075608 +0.01522645321356465298 +0.01519953880651469906 +0.01517266750597179130 +0.01514583925103758083 +0.01511905398088471408 +0.01509231163475645288 +0.01506561215196682881 +0.01503895547190058253 +0.01501234153401289836 +0.01498577027782969397 +0.01495924164294723351 +0.01493275556903222827 +0.01490631199582180544 +0.01487991086312339534 +0.01485355211081465857 +0.01482723567884341488 +0.01480096150722776983 +0.01477472953605575567 +0.01474853970548554126 +0.01472239195574523256 +0.01469628622713278591 +0.01467022246001620234 +0.01464420059483307300 +0.01461822057209084640 +0.01459228233236666700 +0.01456638581630726598 +0.01454053096462902886 +0.01451471771811771276 +0.01448894601762873782 +0.01446321580408675006 +0.01443752701848584168 +0.01441187960188939840 +0.01438627349542997633 +0.01436070864030943722 +0.01433518497779859291 +0.01430970244923746720 +0.01428426099603504265 +0.01425886055966917726 +0.01423350108168685429 +0.01420818250370361677 +0.01418290476740393345 +0.01415766781454103583 +0.01413247158693674808 +0.01410731602648156342 +0.01408220107513442898 +0.01405712667492296265 +0.01403209276794304371 +0.01400709929635908860 +0.01398214620240376474 +0.01395723342837796796 +0.01393236091665097168 +0.01390752860966004527 +0.01388273644991062580 +0.01385798437997622437 +0.01383327234249829250 +0.01380860028018627422 +0.01378396813581739440 +0.01375937585223684784 +0.01373482337235741937 +0.01371031063915973715 +0.01368583759569204882 +0.01366140418507009666 +0.01363701035047733615 +0.01361265603516457166 +0.01358834118245005884 +0.01356406573571945079 +0.01353982963842569227 +0.01351563283408899890 +0.01349147526629674264 +0.01346735687870353158 +0.01344327761503092897 +0.01341923741906759018 +0.01339523623466921942 +0.01337127400575825742 +0.01334735067632424055 +0.01332346619042324397 +0.01329962049217831010 +0.01327581352577907395 +0.01325204523548178560 +0.01322831556560933632 +0.01320462446055102081 +0.01318097186476279922 +0.01315735772276684611 +0.01313378197915175856 +0.01311024457857247298 +0.01308674546575003435 +0.01306328458547185295 +0.01303986188259130194 +0.01301647730202791682 +0.01299313078876719596 +0.01296982228786063528 +0.01294655174442561028 +0.01292331910364528762 +0.01290012431076875687 +0.01287696731111066806 +0.01285384805005148311 +0.01283076647303720876 +0.01280772252557939479 +0.01278471615325522254 +0.01276174730170713881 +0.01273881591664311266 +0.01271592194383640642 +0.01269306532912557045 +0.01267024601841440154 +0.01264746395767175034 +0.01262471909293177465 +0.01260201137029349355 +0.01257934073592101819 +0.01255670713604343028 +0.01253411051695456704 +0.01251155082501330222 +0.01248902800664306208 +0.01246654200833210123 +0.01244409277663337948 +0.01242168025816435366 +0.01239930439960711291 +0.01237696514770813411 +0.01235466244927848831 +0.01233239625119348336 +0.01231016650039279230 +0.01228797314388041344 +0.01226581612872441884 +0.01224369540205722841 +0.01222161091107516750 +0.01219956260303873066 +0.01217755042527234044 +0.01215557432516436998 +0.01213363425016709615 +0.01211173014779650357 +0.01208986196563251007 +0.01206802965131854172 +0.01204623315256183116 +0.01202447241713315226 +0.01200274739286675933 +0.01198105802766049124 +0.01195940426947554591 +0.01193778606633647162 +0.01191620336633119129 +0.01189465611761082210 +0.01187314426838976568 +0.01185166776694547563 +0.01183022656161853044 +0.01180882060081257096 +0.01178744983299417383 +0.01176611420669286534 +0.01174481367050099131 +0.01172354817307378998 +0.01170231766312914909 +0.01168112208944772391 +0.01165996140087281578 +0.01163883554631022117 +0.01161774447472842080 +0.01159668813515822398 +0.01157566647669291265 +0.01155467944848814765 +0.01153372699976188895 +0.01151280907979434359 +0.01149192563792783735 +0.01147107662356700898 +0.01145026198617838872 +0.01142948167529066542 +0.01140873564049444885 +0.01138802383144223163 +0.01136734619784847238 +0.01134670268948931651 +0.01132609325620270377 +0.01130551784788826758 +0.01128497641450730039 +0.01126446890608266867 +0.01124399527269868797 +0.01122355546450126002 +0.01120314943169761941 +0.01118277712455640997 +0.01116243849340753727 +0.01114213348864213915 +0.01112186206071264123 +0.01110162416013249498 +0.01108141973747629218 +0.01106124874337962270 +0.01104111112853907797 +0.01102100684371215382 +0.01100093583971714466 +0.01098089806743325798 +0.01096089347780033335 +0.01094092202181896900 +0.01092098365055042992 +0.01090107831511645184 +0.01088120596669943374 +0.01086136655654210137 +0.01084156003594772058 +0.01082178635627984234 +0.01080204546896233568 +0.01078233732547935995 +0.01076266187737516013 +0.01074301907625427842 +0.01072340887378120042 +0.01070383122168047825 +0.01068428607173669415 +0.01066477337579423319 +0.01064529308575749671 +0.01062584515359051365 +0.01060642953131720433 +0.01058704617102112361 +0.01056769502484549220 +0.01054837604499307170 +0.01052908918372617679 +0.01050983439336664743 +0.01049061162629565461 +0.01047142083495378878 +0.01045226197184094714 +0.01043313498951622953 +0.01041403984059803382 +0.01039497647776380790 +0.01037594485375012941 +0.01035694492135260519 +0.01033797663342582093 +0.01031903994288330302 +0.01030013480269736936 +0.01028126116589929591 +0.01026241898557894716 +0.01024360821488503291 +0.01022482880702483936 +0.01020608071526423084 +0.01018736389292769842 +0.01016867829339809098 +0.01015002387011681301 +0.01013140057658353660 +0.01011280836635635068 +0.01009424719305154240 +0.01007571701034357115 +0.01005721777196520381 +0.01003874943170711756 +0.01002031194341818088 +0.01000190526100518812 +0.00998352933843282483 +0.00996518412972380654 +0.00994686958895849536 +0.00992858567027516542 +0.00991033232786971835 +0.00989210951599574402 +0.00987391718896449798 +0.00985575530114467246 +0.00983762380696256294 +0.00981952266090184607 +0.00980145181750363172 +0.00978341123136635543 +0.00976540085714564313 +0.00974742064955451577 +0.00972947056336300603 +0.00971155055339836121 +0.00969366057454487326 +0.00967580058174376083 +0.00965797052999333404 +0.00964017037434868225 +0.00962240006992180590 +0.00960465957188146036 +0.00958694883545315769 +0.00956926781591911461 +0.00955161646861805991 +0.00953399474894546343 +0.00951640261235317877 +0.00949884001434959935 +0.00948130691049949015 +0.00946380325642395305 +0.00944632900780047711 +0.00942888412036268532 +0.00941146854990048375 +0.00939408225225986208 +0.00937672518334292313 +0.00935939729910779082 +0.00934209855556852702 +0.00932482890879522686 +0.00930758831491370654 +0.00929037673010570976 +0.00927319411060867349 +0.00925604041271575577 +0.00923891559277582007 +0.00922181960719323235 +0.00920475241242797032 +0.00918771396499546734 +0.00917070422146662803 +0.00915372313846771028 +0.00913677067268028539 +0.00911984678084125366 +0.00910295141974267613 +0.00908608454623180233 +0.00906924611721101823 +0.00905243608963770575 +0.00903565442052433124 +0.00901890106693824939 +0.00900217598600172066 +0.00898547913489189387 +0.00896881047084066053 +0.00895216995113466349 +0.00893555753311520498 +0.00891897317417828482 +0.00890241683177438355 +0.00888588846340855956 +0.00886938802664035894 +0.00885291547908365409 +0.00883647077840679467 +0.00882005388233234218 +0.00880366474863714457 +0.00878730333515226336 +0.00877096959976287477 +0.00875466350040830787 +0.00873838499508181564 +0.00872213404183080389 +0.00870591059875644793 +0.00868971462401390594 +0.00867354607581211946 +0.00865740491241378043 +0.00864129109213538846 +0.00862520457334699751 +0.00860914531447234781 +0.00859311327398871311 +0.00857710841042688167 +0.00856113068237108858 +0.00854518004845894984 +0.00852925646738147969 +0.00851335989788293453 +0.00849749029876081807 +0.00848164762886585710 +0.00846583184710185226 +0.00845004291242574916 +0.00843428078384746496 +0.00841854542042990914 +0.00840283678128894707 +0.00838715482559327166 +0.00837149951256441378 +0.00835587080147661213 +0.00834026865165691036 +0.00832469302248492644 +0.00830914387339291849 +0.00829362116386570675 +0.00827812485344053654 +0.00826265490170722920 +0.00824721126830784898 +0.00823179391293692164 +0.00821640279534118986 +0.00820103787531960803 +0.00818569911272343416 +0.00817038646745590728 +0.00815509989947243996 +0.00813983936878040840 +0.00812460483543920448 +0.00810939625956012648 +0.00809421360130627328 +0.00807905682089269696 +0.00806392587858604550 +0.00804882073470479173 +0.00803374134961903734 +0.00801868768375041223 +0.00800365969757221331 +0.00798865735160915469 +0.00797368060643741279 +0.00795872942268456215 +0.00794380376102952514 +0.00792890358220250252 +0.00791402884698490586 +0.00789917951620938176 +0.00788435555075965577 +0.00786955691157055666 +0.00785478355962795048 +0.00784003545596864172 +0.00782531256168040625 +0.00781061483790181695 +0.00779594224582234787 +0.00778129474668215991 +0.00776667230177219454 +0.00775207487243401421 +0.00773750242005976849 +0.00772295490609223225 +0.00770843229202461139 +0.00769393453940061828 +0.00767946160981434079 +0.00766501346491018504 +0.00765059006638294826 +0.00763619137597756036 +0.00762181735548920353 +0.00760746796676320652 +0.00759314317169494830 +0.00757884293222988413 +0.00756456721036340846 +0.00755031596814092869 +0.00753608916765762579 +0.00752188677105858957 +0.00750770874053866694 +0.00749355503834241155 +0.00747942562676409597 +0.00746532046814756510 +0.00745123952488625004 +0.00743718275942312473 +0.00742315013425059577 +0.00740914161191054670 +0.00739515715499410899 +0.00738119672614187799 +0.00736726028804358425 +0.00735334780343822967 +0.00733945923511397907 +0.00732559454590804744 +0.00731175369870679624 +0.00729793665644550352 +0.00728414338210845152 +0.00727037383872881308 +0.00725662798938859176 +0.00724290579721864353 +0.00722920722539847904 +0.00721553223715642230 +0.00720188079576933229 +0.00718825286456273478 +0.00717464840691067491 +0.00716106738623566250 +0.00714750976600872067 +0.00713397550974916722 +0.00712046458102473263 +0.00710697694345140824 +0.00709351256069340550 +0.00708007139646315165 +0.00706665341452116999 +0.00705325857867611806 +0.00703988685278463498 +0.00702653820075136836 +0.00701321258652891528 +0.00699990997411765840 +0.00698663032756596369 +0.00697337361096982047 +0.00696013978847304093 +0.00694692882426706758 +0.00693374068259101049 +0.00692057532773152767 +0.00690743272402277644 +0.00689431283584642472 +0.00688121562763155652 +0.00686814106385460338 +0.00685508910903936692 +0.00684205972775684800 +0.00682905288462534732 +0.00681606854431025724 +0.00680310667152415633 +0.00679016723102666021 +0.00677725018762436165 +0.00676435550617092343 +0.00675148315156681375 +0.00673863308875943638 +0.00672580528274299878 +0.00671299969855845922 +0.00670021630129351460 +0.00668745505608247127 +0.00667471592810633604 +0.00666199888259260027 +0.00664930388481533088 +0.00663663090009502382 +0.00662397989379856587 +0.00661135083133929363 +0.00659874367817675153 +0.00658615839981681328 +0.00657359496181155866 +0.00656105332975920504 +0.00654853346930412035 +0.00653603534613667568 +0.00652355892599334932 +0.00651110417465648132 +0.00649867105795438361 +0.00648625954176122819 +0.00647386959199695074 +0.00646150117462734527 +0.00644915425566382206 +0.00643682880116350740 +0.00642452477722913612 +0.00641224215000898382 +0.00639998088569688345 +0.00638774095053206824 +0.00637552231079926805 +0.00636332493282850634 +0.00635114878299514705 +0.00633899382771984689 +0.00632686003346840786 +0.00631474736675190045 +0.00630265579412642336 +0.00629058528219316945 +0.00627853579759836584 +0.00626650730703320977 +0.00625449977723379399 +0.00624251317498107552 +0.00623054746710088696 +0.00621860262046375952 +0.00620667860198498809 +0.00619477537862454970 +0.00618289291738697254 +0.00617103118532147735 +0.00615919014952169726 +0.00614736977712579229 +0.00613557003531634178 +0.00612379089132030884 +0.00611203231240897270 +0.00610029426589787840 +0.00608857671914683589 +0.00607687963955979692 +0.00606520299458486543 +0.00605354675171422899 +0.00604191087848407818 +0.00603029534247465429 +0.00601870011131004027 +0.00600712515265828563 +0.00599557043423122951 +0.00598403592378451976 +0.00597252158911754788 +0.00596102739807335361 +0.00594955331853870215 +0.00593809931844384818 +0.00592666536576266518 +0.00591525142851250050 +0.00590385747475411643 +0.00589248347259174743 +0.00588112939017290063 +0.00586979519568840962 +0.00585848085737237633 +0.00584718634350209644 +0.00583591162239800818 +0.00582465666242365076 +0.00581342143198567383 +0.00580220589953365541 +0.00579101003356019203 +0.00577983380260078863 +0.00576867717523375356 +0.00575754012008031918 +0.00574642260580437991 +0.00573532460111259718 +0.00572424607475430398 +0.00571318699552144683 +0.00570214733224854581 +0.00569112705381263906 +0.00568012612913325419 +0.00566914452717234233 +0.00565818221693423565 +0.00564723916746560656 +0.00563631534785537670 +0.00562541072723476976 +0.00561452527477712942 +0.00560365895969798349 +0.00559281175125493895 +0.00558198361874761868 +0.00557117453151771257 +0.00556038445894877636 +0.00554961337046632440 +0.00553886123553769087 +0.00552812802367203763 +0.00551741370442027871 +0.00550671824737499708 +0.00549604162217051315 +0.00548538379848268441 +0.00547474474602897398 +0.00546412443456838030 +0.00545352283390130102 +0.00544293991386964656 +0.00543237564435662852 +0.00542182999528682472 +0.00541130293662608723 +0.00540079443838148519 +0.00539030447060130213 +0.00537983300337489725 +0.00536938000683281121 +0.00535894545114653544 +0.00534852930652861529 +0.00533813154323252523 +0.00532775213155262019 +0.00531739104182414775 +0.00530704824442311887 +0.00529672370976633222 +0.00528641740831127700 +0.00527612931055611127 +0.00526585938703963505 +0.00525560760834115157 +0.00524537394508056698 +0.00523515836791819955 +0.00522496084755480917 +0.00521478135473157391 +0.00520461986022993737 +0.00519447633487168936 +0.00518435074951881410 +0.00517424307507352403 +0.00516415328247815143 +0.00515408134271513451 +0.00514402722680696624 +0.00513399090581612150 +0.00512397235084507963 +0.00511397153303617350 +0.00510398842357163811 +0.00509402299367351253 +0.00508407521460357836 +0.00507414505766341437 +0.00506423249419418915 +0.00505433749557675314 +0.00504446003323152058 +0.00503460007861845742 +0.00502475760323699370 +0.00501493257862601748 +0.00500512497636382454 +0.00499533476806803250 +0.00498556192539557821 +0.00497580642004267441 +0.00496606822374467960 +0.00495634730827620648 +0.00494664364545090683 +0.00493695720712154524 +0.00492728796517989155 +0.00491763589155670873 +0.00490800095822168259 +0.00489838313718336370 +0.00488878240048919600 +0.00487919872022536151 +0.00486963206851680815 +0.00486008241752720980 +0.00485054973945882317 +0.00484103400655260755 +0.00483153519108800444 +0.00482205326538301393 +0.00481258820179409318 +0.00480313997271611740 +0.00479370855058236338 +0.00478429390786439326 +0.00477489601707210658 +0.00476551485075359799 +0.00475615038149518679 +0.00474680258192132325 +0.00473747142469455561 +0.00472815688251552145 +0.00471885892812281150 +0.00470957753429303034 +0.00470031267384067065 +0.00469106431961812016 +0.00468183244451558355 +0.00467261702146102957 +0.00466341802342018236 +0.00465423542339645467 +0.00464506919443089580 +0.00463591930960215864 +0.00462678574202644854 +0.00461766846485747381 +0.00460856745128640066 +0.00459948267454181846 +0.00459041410788969730 +0.00458136172463331250 +0.00457232549811322896 +0.00456330540170723966 +0.00455430140883033262 +0.00454531349293463196 +0.00453634162750937448 +0.00452738578608083073 +0.00451844594221230671 +0.00450952206950402858 +0.00450061414159316775 +0.00449172213215377498 +0.00448284601489669452 +0.00447398576356959100 +0.00446514135195684534 +0.00445631275387950443 +0.00444749994319530717 +0.00443870289379856824 +0.00442992157962016769 +0.00442115597462747634 +0.00441240605282436360 +0.00440367178825107602 +0.00439495315498427287 +0.00438625012713693679 +0.00437756267885832263 +0.00436889078433393054 +0.00436023441778548432 +0.00435159355347080300 +0.00434296816568385726 +0.00433435822875466361 +0.00432576371704926008 +0.00431718460496964985 +0.00430862086695378392 +0.00430007247747545961 +0.00429153941104433011 +0.00428302164220585851 +0.00427451914554123104 +0.00426603189566735711 +0.00425755986723679904 +0.00424910303493771652 +0.00424066137349385971 +0.00423223485766450677 +0.00422382346224440229 +0.00421542716206375120 +0.00420704593198811642 +0.00419867974691843625 +0.00419032858179094105 +0.00418199241157712551 +0.00417367121128370093 +0.00416536495595254668 +0.00415707362066065206 +0.00414879718052011369 +0.00414053561067805834 +0.00413228888631659956 +0.00412405698265280209 +0.00411583987493865239 +0.00410763753846096150 +0.00409944994854139193 +0.00409127708053636224 +0.00408311890983702880 +0.00407497541186922514 +0.00406684656209343586 +0.00405873233600471776 +0.00405063270913270414 +0.00404254765704153453 +0.00403447715532979836 +0.00402642117963052104 +0.00401837970561110501 +0.00401035270897326986 +0.00400234016545303415 +0.00399434205082066507 +0.00398635834088062729 +0.00397838901147154912 +0.00397043403846616960 +0.00396249339777128042 +0.00395456706532772411 +0.00394665501711031867 +0.00393875722912782281 +0.00393087367742287613 +0.00392300433807199998 +0.00391514918718548991 +0.00390730820090742000 +0.00389948135541560168 +0.00389166862692149739 +0.00388386999167021759 +0.00387608542594047867 +0.00386831490604450578 +0.00386055840832805761 +0.00385281590917034046 +0.00384508738498398617 +0.00383737281221500220 +0.00382967216734270794 +0.00382198542687973335 +0.00381431256737192834 +0.00380665356539836020 +0.00379900839757124935 +0.00379137704053592690 +0.00378375947097080167 +0.00377615566558729687 +0.00376856560112983057 +0.00376098925437576238 +0.00375342660213534183 +0.00374587762125168236 +0.00373834228860070580 +0.00373082058109109379 +0.00372331247566426741 +0.00371581794929432517 +0.00370833697898800309 +0.00370086954178463308 +0.00369341561475611604 +0.00368597517500683473 +0.00367854819967366282 +0.00367113466592588903 +0.00366373455096519847 +0.00365634783202559068 +0.00364897448637339699 +0.00364161449130715733 +0.00363426782415765797 +0.00362693446228784392 +0.00361961438309277902 +0.00361230756399961821 +0.00360501398246756239 +0.00359773361598778561 +0.00359046644208343502 +0.00358321243830957365 +0.00357597158225312186 +0.00356874385153284129 +0.00356152922379926462 +0.00355432767673466779 +0.00354713918805303575 +0.00353996373550000002 +0.00353280129685281307 +0.00352565184992029807 +0.00351851537254281414 +0.00351139184259218742 +0.00350428123797170976 +0.00349718353661607065 +0.00349009871649131080 +0.00348302675559480262 +0.00347596763195518518 +0.00346892132363232734 +0.00346188780871729569 +0.00345486706533230156 +0.00344785907163067205 +0.00344086380579679361 +0.00343388124604606000 +0.00342691137062486580 +0.00341995415781053526 +0.00341300958591128720 +0.00340607763326619814 +0.00339915827824515846 +0.00339225149924881481 +0.00338535727470855004 +0.00337847558308643952 +0.00337160640287518815 +0.00336474971259811398 +0.00335790549080909866 +0.00335107371609251730 +0.00334425436706324228 +0.00333744742236657954 +0.00333065286067821568 +0.00332387066070420319 +0.00331710080118088804 +0.00331034326087488841 +0.00330359801858304530 +0.00329686505313238733 +0.00329014434338008224 +0.00328343586821339779 +0.00327673960654966453 +0.00327005553733621546 +0.00326338363955036657 +0.00325672389219937297 +0.00325007627432036912 +0.00324344076498035011 +0.00323681734327611621 +0.00323020598833422424 +0.00322360667931096724 +0.00321701939539232023 +0.00321044411579389424 +0.00320388081976091333 +0.00319732948656814868 +0.00319079009551989168 +0.00318426262594990453 +0.00317774705722140413 +0.00317124336872698061 +0.00316475153988857779 +0.00315827155015747279 +0.00315180337901417714 +0.00314534700596845159 +0.00313890241055924182 +0.00313246957235464168 +0.00312604847095183805 +0.00311963908597710001 +0.00311324139708569255 +0.00310685538396187916 +0.00310048102631885984 +0.00309411830389873114 +0.00308776719647244503 +0.00308142768383978544 +0.00307509974582927895 +0.00306878336229821167 +0.00306247851313255727 +0.00305618517824693837 +0.00304990333758459360 +0.00304363297111732945 +0.00303737405884546520 +0.00303112658079783418 +0.00302489051703169406 +0.00301866584763273160 +0.00301245255271497935 +0.00300625061242080875 +0.00300006000692084728 +0.00299388071641399755 +0.00298771272112734665 +0.00298155600131614533 +0.00297541053726376944 +0.00296927630928166915 +0.00296315329770932556 +0.00295704148291423257 +0.00295094084529182827 +0.00294485136526547895 +0.00293877302328641142 +0.00293270579983371258 +0.00292664967541423097 +0.00292060463056258808 +0.00291457064584111801 +0.00290854770183982978 +0.00290253577917634880 +0.00289653485849591594 +0.00289054492047129477 +0.00288456594580277980 +0.00287859791521813098 +0.00287264080947252816 +0.00286669460934855505 +0.00286075929565614111 +0.00285483484923250213 +0.00284892125094214719 +0.00284301848167680402 +0.00283712652235538262 +0.00283124535392394964 +0.00282537495735566292 +0.00281951531365075454 +0.00281366640383648708 +0.00280782820896709889 +0.00280200071012377729 +0.00279618388841461557 +0.00279037772497456749 +0.00278458220096540953 +0.00277879729757570968 +0.00277302299602077062 +0.00276725927754261150 +0.00276150612340990463 +0.00275576351491794123 +0.00275003143338859975 +0.00274430986017031421 +0.00273859877663800355 +0.00273289816419306246 +0.00272720800426330505 +0.00272152827830291410 +0.00271585896779243758 +0.00271020005423871103 +0.00270455151917484539 +0.00269891334416015767 +0.00269328551078017089 +0.00268766800064651869 +0.00268206079539696704 +0.00267646387669533614 +0.00267087722623146184 +0.00266530082572117178 +0.00265973465690624291 +0.00265417870155433338 +0.00264863294145898558 +0.00264309735843956195 +0.00263757193434120945 +0.00263205665103481310 +0.00262655149041697523 +0.00262105643440994863 +0.00261557146496162098 +0.00261009656404546367 +0.00260463171366049490 +0.00259917689583123810 +0.00259373209260768895 +0.00258829728606525550 +0.00258287245830475130 +0.00257745759145232333 +0.00257205266765943991 +0.00256665766910283300 +0.00256127257798446223 +0.00255589737653147235 +0.00255053204699617073 +0.00254517657165596618 +0.00253983093281333989 +0.00253449511279581034 +0.00252916909395589249 +0.00252385285867102929 +0.00251854638934360551 +0.00251324966840087453 +0.00250796267829490969 +0.00250268540150259868 +0.00249741782052557851 +0.00249215991789019471 +0.00248691167614748226 +0.00248167307787310661 +0.00247644410566733942 +0.00247122474215500645 +0.00246601496998546509 +0.00246081477183253232 +0.00245562413039448427 +0.00245044302839400032 +0.00244527144857811579 +0.00244010937371819895 +0.00243495678660990466 +0.00242981367007311187 +0.00242468000695193883 +0.00241955578011465505 +0.00241444097245366090 +0.00240933556688545120 +0.00240423954635056924 +0.00239915289381356384 +0.00239407559226297115 +0.00238900762471124656 +0.00238394897419475817 +0.00237889962377371613 +0.00237385955653215443 +0.00236882875557787925 +0.00236380720404244689 +0.00235879488508110519 +0.00235379178187277100 +0.00234879787761997855 +0.00234381315554885606 +0.00233883759890906283 +0.00233387119097377017 +0.00232891391503962800 +0.00232396575442670155 +0.00231902669247845356 +0.00231409671256170265 +0.00230917579806656607 +0.00230426393240644976 +0.00229936109901799539 +0.00229446728136102791 +0.00228958246291854732 +0.00228470662719666489 +0.00227983975772457371 +0.00227498183805450917 +0.00227013285176171737 +0.00226529278244440432 +0.00226046161372370517 +0.00225563932924364129 +0.00225082591267108901 +0.00224602134769573240 +0.00224122561803003330 +0.00223643870740918276 +0.00223166059959108458 +0.00222689127835627157 +0.00222213072750791947 +0.00221737893087178058 +0.00221263587229615253 +0.00220790153565183014 +0.00220317590483208679 +0.00219845896375261063 +0.00219375069635149118 +0.00218905108658916121 +0.00218436011844837762 +0.00217967777593416337 +0.00217500404307379141 +0.00217033890391671877 +0.00216568234253456792 +0.00216103434302109421 +0.00215639488949213214 +0.00215176396608555845 +0.00214714155696126962 +0.00214252764630111893 +0.00213792221830890567 +0.00213332525721032047 +0.00212873674725290929 +0.00212415667270603788 +0.00211958501786085407 +0.00211502176703024913 +0.00211046690454881917 +0.00210592041477282834 +0.00210138228208017405 +0.00209685249087034151 +0.00209233102556437853 +0.00208781787060483221 +0.00208331301045574374 +0.00207881642960259507 +0.00207432811255226250 +0.00206984804383299930 +0.00206537620799438848 +0.00206091258960728035 +0.00205645717326380850 +0.00205200994357730487 +0.00204757088518228193 +0.00204313998273440191 +0.00203871722091041822 +0.00203430258440815337 +0.00202989605794646122 +0.00202549762626518404 +0.00202110727412511655 +0.00201672498630797769 +0.00201235074761635683 +0.00200798454287368089 +0.00200362635692418953 +0.00199927617463289140 +0.00199493398088551599 +0.00199059976058849151 +0.00198627349866890585 +0.00198195518007444763 +0.00197764478977340355 +0.00197334231275460336 +0.00196904773402737212 +0.00196476103862151851 +0.00196048221158728019 +0.00195621123799527997 +0.00195194810293651435 +0.00194769279152229541 +0.00194344528888422234 +0.00193920558017413752 +0.00193497365056411320 +0.00193074948524636940 +0.00192653306943328054 +0.00192232438835732307 +0.00191812342727103072 +0.00191393017144697499 +0.00190974460617771295 +0.00190556671677575293 +0.00190139648857352741 +0.00189723390692334601 +0.00189307895719737353 +0.00188893162478757427 +0.00188479189510568838 +0.00188065975358318459 +0.00187653518567124371 +0.00187241817684070250 +0.00186830871258202740 +0.00186420677840527381 +0.00186011235984005719 +0.00185602544243550108 +0.00185194601176021751 +0.00184787405340226720 +0.00184380955296911935 +0.00183975249608761722 +0.00183570286840393734 +0.00183166065558356523 +0.00182762584331124900 +0.00182359841729096963 +0.00181957836324590392 +0.00181556566691838759 +0.00181156031406987019 +0.00180756229048089994 +0.00180357158195107361 +0.00179958817429900272 +0.00179561205336228164 +0.00179164320499745357 +0.00178768161507995545 +0.00178372726950411925 +0.00177978015418310152 +0.00177584025504887145 +0.00177190755805215990 +0.00176798204916243560 +0.00176406371436785953 +0.00176015253967525876 +0.00175624851111009193 +0.00175235161471640373 +0.00174846183655679843 +0.00174457916271240974 +0.00174070357928284535 +0.00173683507238617816 +0.00173297362815889042 +0.00172911923275585657 +0.00172527187235029552 +0.00172143153313374011 +0.00171759820131599824 +0.00171377186312513191 +0.00170995250480740642 +0.00170614011262726719 +0.00170233467286730073 +0.00169853617182820515 +0.00169474459582874180 +0.00169095993120571639 +0.00168718216431394067 +0.00168341128152620003 +0.00167964726923321107 +0.00167589011384360422 +0.00167213980178385980 +0.00166839631949831078 +0.00166465965344908605 +0.00166092979011608325 +0.00165720671599693583 +0.00165349041760697664 +0.00164978088147920148 +0.00164607809416425044 +0.00164238204223035524 +0.00163869271226332121 +0.00163501009086648719 +0.00163133416466069645 +0.00162766492028424336 +0.00162400234439288296 +0.00162034642365975283 +0.00161669714477537555 +0.00161305449444759858 +0.00160941845940158236 +0.00160578902637974956 +0.00160216618214177294 +0.00159854991346452201 +0.00159494020714205220 +0.00159133704998555081 +0.00158774042882332475 +0.00158415033050075013 +0.00158056674188025150 +0.00157698964984127237 +0.00157341904128023504 +0.00156985490311051701 +0.00156629722226240978 +0.00156274598568308869 +0.00155920118033659621 +0.00155566279320379277 +0.00155213081128233415 +0.00154860522158663683 +0.00154508601114785715 +0.00154157316701383535 +0.00153806667624909496 +0.00153456652593479573 +0.00153107270316870194 +0.00152758519506516013 +0.00152410398875506238 +0.00152062907138581253 +0.00151716043012130992 +0.00151369805214190381 +0.00151024192464437480 +0.00150679203484189658 +0.00150334836996401325 +0.00149991091725660088 +0.00149647966398184628 +0.00149305459741821381 +0.00148963570486042177 +0.00148622297361939836 +0.00148281639102227627 +0.00147941594441233196 +0.00147602162114898977 +0.00147263340860777077 +0.00146925129418027387 +0.00146587526527415198 +0.00146250530931306194 +0.00145914141373666099 +0.00145578356600057084 +0.00145243175357634316 +0.00144908596395143942 +0.00144574618462920139 +0.00144241240312881431 +0.00143908460698530033 +0.00143576278374947237 +0.00143244692098791460 +0.00142913700628295465 +0.00142583302723264219 +0.00142253497145070355 +0.00141924282656654094 +0.00141595658022519049 +0.00141267622008730176 +0.00140940173382910340 +0.00140613310914239321 +0.00140287033373449194 +0.00139961339532823636 +0.00139636228166194415 +0.00139311698048939130 +0.00138987747957978504 +0.00138664376671774887 +0.00138341582970327379 +0.00138019365635172562 +0.00137697723449379931 +0.00137376655197549722 +0.00137056159665811675 +0.00136736235641821374 +0.00136416881914758073 +0.00136098097275323031 +0.00135779880515736885 +0.00135462230429737678 +0.00135145145812577130 +0.00134828625461021071 +0.00134512668173343976 +0.00134197272749329096 +0.00133882437990265790 +0.00133568162698947121 +0.00133254445679667531 +0.00132941285738220959 +0.00132628681681898212 +0.00132316632319486082 +0.00132005136461264263 +0.00131694192919003619 +0.00131383800505964119 +0.00131073958036893347 +0.00130764664328022898 +0.00130455918197068836 +0.00130147718463227875 +0.00129840063947176581 +0.00129532953471068568 +0.00129226385858534267 +0.00128920359934676259 +0.00128614874526070534 +0.00128309928460762848 +0.00128005520568268269 +0.00127701649679568051 +0.00127398314627108944 +0.00127095514244800634 +0.00126793247368015352 +0.00126491512833585770 +0.00126190309479802770 +0.00125889636146414314 +0.00125589491674624946 +0.00125289874907091956 +0.00124990784687926157 +0.00124692219862689740 +0.00124394179278394280 +0.00124096661783500172 +0.00123799666227915176 +0.00123503191462992078 +0.00123207236341528535 +0.00122911799717766038 +0.00122616880447387567 +0.00122322477387517550 +0.00122028589396719930 +0.00121735215334996629 +0.00121442354063788088 +0.00121150004445970969 +0.00120858165345857373 +0.00120566835629194063 +0.00120276014163161614 +0.00119985699816372465 +0.00119695891458871631 +0.00119406587962134756 +0.00119117788199068217 +0.00118829491044007132 +0.00118541695372716333 +0.00118254400062387183 +0.00117967603991639522 +0.00117681306040520088 +0.00117395505090501346 +0.00117110200024481766 +0.00116825389726785372 +0.00116541073083159552 +0.00116257248980778131 +0.00115973916308237434 +0.00115691073955558272 +0.00115408720814184950 +0.00115126855776984485 +0.00114845477738247082 +0.00114564585593686223 +0.00114284178240437991 +0.00114004254577061248 +0.00113724813503538304 +0.00113445853921272295 +0.00113167374733091301 +0.00112889374843245702 +0.00112611853157409394 +0.00112334808582679243 +0.00112058240027576475 +0.00111782146402045353 +0.00111506526617456041 +0.00111231379586602540 +0.00110956704223705077 +0.00110682499444408975 +0.00110408764165787474 +0.00110135497306338781 +0.00109862697785991122 +0.00109590364526100420 +0.00109318496449452144 +0.00109047092480261975 +0.00108776151544177782 +0.00108505672568277867 +0.00108235654481075275 +0.00107966096212516694 +0.00107696996693984991 +0.00107428354858299336 +0.00107160169639716730 +0.00106892439973933382 +0.00106625164798086780 +0.00106358343050756286 +0.00106091973671964992 +0.00105826055603181466 +0.00105560587787320820 +0.00105295569168746897 +0.00105030998693273945 +0.00104766875308168628 +0.00104503197962151962 +0.00104239965605400415 +0.00103977177189549467 +0.00103714831667694194 +0.00103452927994392477 +0.00103191465125666653 +0.00102930442019006610 +0.00102669857633371463 +0.00102409710929192281 +0.00102150000868373846 +0.00101890726414299447 +0.00101631886531830615 +0.00101373480187312657 +0.00101115506348575606 +0.00100857963984937933 +0.00100600852067209254 +0.00100344169567693803 +0.00100087915460193118 +0.00099832088720009662 +0.00099576688323949861 +0.00099321713250327855 +0.00099067162478968097 +0.00098813034991210259 +0.00098559329769911893 +0.00098306045799452491 +0.00098053182065737298 +0.00097800737556201435 +0.00097548711259813105 +0.00097297102167078441 +0.00097045909270045735 +0.00096795131562309514 +0.00096544768039014226 +0.00096294817696860127 +0.00096045279534106296 +0.00095796152550576121 +0.00095547435747662349 +0.00095299128128330777 +0.00095051228697126395 +0.00094803736460178061 +0.00094556650425202447 +0.00094309969601510946 +0.00094063693000014602 +0.00093817819633228530 +0.00093572348515278824 +0.00093327278661907312 +0.00093082609090476976 +0.00092838338819978879 +0.00092594466871037738 +0.00092350992265917496 +0.00092107914028527829 +0.00091865231184430727 +0.00091622942760845847 +0.00091381047786658434 +0.00091139545292425058 +0.00090898434310380410 +0.00090657713874444637 +0.00090417383020229209 +0.00090177440785045332 +0.00089937886207910325 +0.00089698718329555205 +0.00089459936192432064 +0.00089221538840721906 +0.00088983525320341067 +0.00088745894678951011 +0.00088508645965964985 +0.00088271778232556347 +0.00088035290531666489 +0.00087799181918014099 +0.00087563451448101695 +0.00087328098180226561 +0.00087093121174488055 +0.00086858519492796380 +0.00086624292198881920 +0.00086390438358304985 +0.00086156957038463207 +0.00085923847308602870 +0.00085691108239827573 +0.00085458738905108114 +0.00085226738379291767 +0.00084995105739112944 +0.00084763840063202252 +0.00084532940432098470 +0.00084302405928257207 +0.00084072235636062213 +0.00083842428641835870 +0.00083612984033850832 +0.00083383900902338757 +0.00083155178339504191 +0.00082926815439534176 +0.00082698811298610145 +0.00082471165014919435 +0.00082243875688667301 +0.00082016942422088025 +0.00081790364319458183 +0.00081564140487107599 +0.00081338270033433314 +0.00081112752068910232 +0.00080887585706105507 +0.00080662770059690251 +0.00080438304246453155 +0.00080214187385313864 +0.00079990418597335980 +0.00079766997005740906 +0.00079543921735921342 +0.00079321191915454733 +0.00079098806674118881 +0.00078876765143904464 +0.00078655066459030684 +0.00078433709755959394 +0.00078212694173409865 +0.00077992018852373376 +0.00077771682936129266 +0.00077551685570260054 +0.00077332025902666262 +0.00077112703083582861 +0.00076893716265594993 +0.00076675064603653467 +0.00076456747255091969 +0.00076238763379643353 +0.00076021112139455641 +0.00075803792699109438 +0.00075586804225635080 +0.00075370145888529029 +0.00075153816859772579 +0.00074937816313848632 +0.00074722143427759789 +0.00074506797381046551 +0.00074291777355805014 +0.00074077082536706113 +0.00073862712111013543 +0.00073648665268603365 +0.00073434941201982063 +0.00073221539106307367 +0.00073008458179406050 +0.00072795697621794511 +0.00072583256636698634 +0.00072371134430074128 +0.00072159330210626088 +0.00071947843189830646 +0.00071736672581954736 +0.00071525817604077395 +0.00071315277476111626 +0.00071105051420824854 +0.00070895138663861000 +0.00070685538433762764 +0.00070476249961993142 +0.00070267272482957516 +0.00070058605234026993 +0.00069850247455561078 +0.00069642198390929987 +0.00069434457286538410 +0.00069227023391849095 +0.00069019895959405552 +0.00068813074244857811 +0.00068606557506984572 +0.00068400345007718969 +0.00068194436012172755 +0.00067988829788660668 +0.00067783525608726457 +0.00067578522747167565 +0.00067373820482060894 +0.00067169418094788615 +0.00066965314870064076 +0.00066761510095957944 +0.00066558003063925386 +0.00066354793068832232 +0.00066151879408982216 +0.00065949261386143859 +0.00065746938305578852 +0.00065544909476068184 +0.00065343174209942022 +0.00065141731823106829 +0.00064940581635073639 +0.00064739722968987657 +0.00064539155151656499 +0.00064338877513579100 +0.00064138889388976451 +0.00063939190115819939 +0.00063739779035862278 +0.00063540655494666873 +0.00063341818841638976 +0.00063143268430055364 +0.00062945003617096806 +0.00062747023763877832 +0.00062549328235478709 +0.00062351916400977538 +0.00062154787633481089 +0.00061957941310157897 +0.00061761376812270394 +0.00061565093525207558 +0.00061369090838517780 +0.00061173368145942041 +0.00060977924845447614 +0.00060782760339260554 +0.00060587874033901298 +0.00060393265340217672 +0.00060198933673419634 +0.00060004878453113369 +0.00059811099103337538 +0.00059617595052596241 +0.00059424365733896098 +0.00059231410584781763 +0.00059038729047370601 +0.00058846320568389614 +0.00058654184599211978 +0.00058462320595892314 +0.00058270728019205100 +0.00058079406334680711 +0.00057888355012642703 +0.00057697573528245382 +0.00057507061361511934 +0.00057316817997371593 +0.00057126842925698076 +0.00056937135641348493 +0.00056747695644201535 +0.00056558522439195712 +0.00056369615536369828 +0.00056180974450900526 +0.00055992598703143127 +0.00055804487818671069 +0.00055616641328315008 +0.00055429058768203838 +0.00055241739679804834 +0.00055054683609963415 +0.00054867890110945055 +0.00054681358740475369 +0.00054495089061781560 +0.00054309080643633544 +0.00054123333060385821 +0.00053937845892018340 +0.00053752618724179550 +0.00053567651148227343 +0.00053382942761272018 +0.00053198493166218414 +0.00053014301971808704 +0.00052830368792664137 +0.00052646693249329532 +0.00052463274968315326 +0.00052280113582140820 +0.00052097208729377700 +0.00051914560054694013 +0.00051732167208896473 +0.00051550029848976004 +0.00051368147638150259 +0.00051186520245908554 +0.00051005147348055335 +0.00050824028626755304 +0.00050643163770576623 +0.00050462552474536660 +0.00050282194440145838 +0.00050102089375452853 +0.00049922236995088916 +0.00049742637020313164 +0.00049563289179056910 +0.00049384193205969621 +0.00049205348842463385 +0.00049026755836758282 +0.00048848413943927672 +0.00048670322925942910 +0.00048492482551719596 +0.00048314892597162276 +0.00048137552845209912 +0.00047960463085881409 +0.00047783623116321122 +0.00047607032740843423 +0.00047430691770979544 +0.00047254600025521660 +0.00047078757330569395 +0.00046903163519574015 +0.00046727818433384930 +0.00046552721920293584 +0.00046377873836080276 +0.00046203274044058056 +0.00046028922415118563 +0.00045854818827776797 +0.00045680963168216054 +0.00045507355330332250 +0.00045333995215779941 +0.00045160882734015791 +0.00044988017802343383 +0.00044815400345957971 +0.00044643030297990223 +0.00044470907599550015 +0.00044299032199771418 +0.00044127404055855333 +0.00043956023133113543 +0.00043784889405011819 +0.00043614002853213538 +0.00043443363467621568 +0.00043272971246421878 +0.00043102826196125691 +0.00042932928331611465 +0.00042763277676167137 +0.00042593874261531500 +0.00042424718127934992 +0.00042255809324141883 +0.00042087147907490178 +0.00041918733943932144 +0.00041750567508074384 +0.00041582648683217434 +0.00041414977561394926 +0.00041247554243412706 +0.00041080378838887659 +0.00040913451466284919 +0.00040746772252956407 +0.00040580341335177527 +0.00040414158858183974 +0.00040248224976208364 +0.00040082539852515678 +0.00039917103659438890 +0.00039751916578413597 +0.00039586978800012275 +0.00039422290523977562 +0.00039257851959256524 +0.00039093663324032084 +0.00038929724845755865 +0.00038766036761178635 +0.00038602599316382241 +0.00038439412766808290 +0.00038276477377288828 +0.00038113793422074420 +0.00037951361184862656 +0.00037789180958824805 +0.00037627253046633284 +0.00037465577760486645 +0.00037304155422135496 +0.00037142986362906572 +0.00036982070923725923 +0.00036821409455142020 +0.00036661002317347655 +0.00036500849880200203 +0.00036340952523242979 +0.00036181310635723725 +0.00036021924616612947 +0.00035862794874621655 +0.00035703921828217865 +0.00035545305905641263 +0.00035386947544918998 +0.00035228847193878441 +0.00035071005310159621 +0.00034913422361227076 +0.00034756098824379947 +0.00034599035186761123 +0.00034442231945365867 +0.00034285689607048788 +0.00034129408688529482 +0.00033973389716397516 +0.00033817633227116288 +0.00033662139767024542 +0.00033506909892338583 +0.00033351944169151578 +0.00033197243173432536 +0.00033042807491023368 +0.00032888637717635359 +0.00032734734458843268 +0.00032581098330079636 +0.00032427729956626230 +0.00032274629973604832 +0.00032121799025966962 +0.00031969237768480558 +0.00031816946865717783 +0.00031664926992039150 +0.00031513178831577221 +0.00031361703078218456 +0.00031210500435583971 +0.00031059571617007576 +0.00030908917345514312 +0.00030758538353795398 +0.00030608435384182653 +0.00030458609188620771 +0.00030309060528638894 +0.00030159790175318646 +0.00030010798909262863 +0.00029862087520561033 +0.00029713656808753175 +0.00029565507582792616 +0.00029417640661006685 +0.00029270056871054753 +0.00029122757049886605 +0.00028975742043697015 +0.00028829012707879157 +0.00028682569906976366 +0.00028536414514632224 +0.00028390547413537548 +0.00028244969495377437 +0.00028099681660774707 +0.00027954684819232186 +0.00027809979889073121 +0.00027665567797379272 +0.00027521449479926663 +0.00027377625881121122 +0.00027234097953929504 +0.00027090866659810336 +0.00026947932968642111 +0.00026805297858649511 +0.00026662962316327104 +0.00026520927336362116 +0.00026379193921553980 +0.00026237763082732370 +0.00026096635838672802 +0.00025955813216010665 +0.00025815296249152213 +0.00025675085980184510 +0.00025535183458782448 +0.00025395589742113934 +0.00025256305894742499 +0.00025117332988528683 +0.00024978672102527632 +0.00024840324322886436 +0.00024702290742738042 +0.00024564572462092881 +0.00024427170587728962 +0.00024290086233079398 +0.00024153320518117510 +0.00024016874569240254 +0.00023880749519149109 +0.00023744946506728458 +0.00023609466676922187 +0.00023474311180608201 +0.00023339481174469433 +0.00023204977820864901 +0.00023070802287696265 +0.00022936955748273475 +0.00022803439381177784 +0.00022670254370122682 +0.00022537401903812047 +0.00022404883175797266 +0.00022272699384330929 +0.00022140851732218987 +0.00022009341426670491 +0.00021878169679145334 +0.00021747337705199180 +0.00021616846724327452 +0.00021486697959805965 +0.00021356892638529803 +0.00021227431990850418 +0.00021098317250410086 +0.00020969549653974254 +0.00020841130441262543 +0.00020713060854776722 +0.00020585342139626986 +0.00020457975543356424 +0.00020330962315763114 +0.00020204303708719958 +0.00020078000975993625 +0.00019952055373060248 +0.00019826468156919718 +0.00019701240585908093 +0.00019576373919508234 +0.00019451869418157656 +0.00019327728343056024 +0.00019203951955969501 +0.00019080541519033841 +0.00018957498294555776 +0.00018834823544812394 +0.00018712518531848604 +0.00018590584517273840 +0.00018469022762055985 +0.00018347834526314424 +0.00018227021069111028 +0.00018106583648239621 +0.00017986523520014603 +0.00017866841939057139 +0.00017747540158080395 +0.00017628619427673283 +0.00017510080996082936 +0.00017391926108995291 +0.00017274156009315346 +0.00017156771936945386 +0.00017039775128562226 +0.00016923166817393216 +0.00016806948232991488 +0.00016691120601009183 +0.00016575685142971135 +0.00016460643076046178 +0.00016345995612818110 +0.00016231743961055842 +0.00016117889323482624 +0.00016004432897543912 +0.00015891375875175761 +0.00015778719442571256 +0.00015666464779947077 +0.00015554613061308982 +0.00015443165454217376 +0.00015332123119551395 +0.00015221487211273922 +0.00015111258876195096 +0.00015001439253736109 +0.00014892029475692444 +0.00014783030665997283 +0.00014674443940484002 +0.00014566270406650035 +0.00014458511163419142 +0.00014351167300904666 +0.00014244239900172652 +0.00014137730033005207 +0.00014031638761663633 +0.00013925967138652860 +0.00013820716206485221 +0.00013715886997445079 +0.00013611480533354034 +0.00013507497825336579 +0.00013403939873586003 +0.00013300807667131781 +0.00013198102183607048 +0.00013095824389017051 +0.00012993975237508596 +0.00012892555671140286 +0.00012791566619653726 +0.00012691009000246207 +0.00012590883717344099 +0.00012491191662377510 +0.00012391933713556402 +0.00012293110735648102 +0.00012194723579755565 +0.00012096773083098366 +0.00011999260068793976 +0.00011902185345641399 +0.00011805549707906165 +0.00011709353935107408 +0.00011613598791806021 +0.00011518285027395829 +0.00011423413375895662 +0.00011328984555744068 +0.00011234999269595689 +0.00011141458204120259 +0.00011048362029803099 +0.00010955711400748957 +0.00010863506954487006 +0.00010771749311779088 +0.00010680439076430042 +0.00010589576835100762 +0.00010499163157123387 +0.00010409198594320032 +0.00010319683680823301 +0.00010230618932900173 +0.00010142004848778428 +0.00010053841908476233 +0.00009966130573634098 +0.00009878871287350876 +0.00009792064474021815 +0.00009705710539180302 +0.00009619809869342526 +0.00009534362831855720 +0.00009449369774749003 +0.00009364831026588584 +0.00009280746896335510 +0.00009197117673207145 +0.00009113943626542206 +0.00009031225005669428 +0.00008948962039779276 +0.00008867154937800208 +0.00008785803888277903 +0.00008704909059258394 +0.00008624470598175084 +0.00008544488631739589 +0.00008464963265836112 +0.00008385894585420282 +0.00008307282654421436 +0.00008229127515649126 +0.00008151429190703497 +0.00008074187679889607 +0.00007997402962136396 +0.00007921074994918931 +0.00007845203714185478 +0.00007769789034288226 +0.00007694830847918706 +0.00007620329026046763 +0.00007546283417864649 +0.00007472693850734542 +0.00007399560130140877 +0.00007326882039646755 +0.00007254659340854837 +0.00007182891773372154 +0.00007111579054780104 +0.00007040720880607996 +0.00006970316924311348 +0.00006900366837254568 +0.00006830870248698106 +0.00006761826765789567 +0.00006693235973560079 +0.00006625097434924249 +0.00006557410690685042 +0.00006490175259542856 +0.00006423390638109204 +0.00006357056300924453 +0.00006291171700480662 +0.00006225736267248095 +0.00006160749409706569 +0.00006096210514381129 +0.00006032118945882083 +0.00005968474046949159 +0.00005905275138500751 +0.00005842521519686650 +0.00005780212467945706 +0.00005718347239067491 +0.00005656925067258489 +0.00005595945165212168 +0.00005535406724183958 +0.00005475308914069832 +0.00005415650883489387 +0.00005356431759873060 +0.00005297650649553491 +0.00005239306637860843 +0.00005181398789222634 +0.00005123926147267150 +0.00005066887734931090 +0.00005010282554571235 +0.00004954109588080025 +0.00004898367797004783 +0.00004843056122671436 +0.00004788173486311390 +0.00004733718789192601 +0.00004679690912754181 +0.00004626088718744920 +0.00004572911049365049 +0.00004520156727412210 +0.00004467824556430446 +0.00004415913320862963 +0.00004364421786208276 +0.00004313348699179797 +0.00004262692787868560 +0.00004212452761909734 +0.00004162627312651888 +0.00004113215113329622 +0.00004064214819239368 +0.00004015625067918300 +0.00003967444479325917 +0.00003919671656029207 +0.00003872305183390125 +0.00003825343629756171 +0.00003778785546653699 +0.00003732629468983973 +0.00003686873915221652 +0.00003641517387616299 +0.00003596558372395931 +0.00003551995339973329 +0.00003507826745154608 +0.00003464051027350154 +0.00003420666610787687 +0.00003377671904727831 +0.00003335065303681454 +0.00003292845187629214 +0.00003251009922243104 +0.00003209557859109932 +0.00003168487335956411 +0.00003127796676876486 +0.00003087484192559915 +0.00003047548180522744 +0.00003007986925339238 +0.00002968798698875439 +0.00002929981760523876 +0.00002891534357440082 +0.00002853454724779964 +0.00002815741085938625 +0.00002778391652790322 +0.00002741404625929296 +0.00002704778194912029 +0.00002668510538500004 +0.00002632599824903585 +0.00002597044212026597 +0.00002561841847711761 +0.00002526990869986501 +0.00002492489407309813 +0.00002458335578819186 +0.00002424527494578295 +0.00002391063255824903 +0.00002357940955219246 +0.00002325158677092413 +0.00002292714497695307 +0.00002260606485447356 +0.00002228832701185480 +0.00002197391198413001 +0.00002166280023548498 +0.00002135497216174414 +0.00002105040809285716 +0.00002074908829538038 +0.00002045099297495611 +0.00002015610227878796 +0.00001986439629811220 +0.00001957585507066212 +0.00001929045858313014 +0.00001900818677362025 +0.00001872901953409570 +0.00001845293671281841 +0.00001817991811678108 +0.00001790994351412923 +0.00001764299263657701 +0.00001737904518181073 +0.00001711808081588327 +0.00001686007917559819 +0.00001660501987088228 +0.00001635288248714554 +0.00001610364658763157 +0.00001585729171575246 +0.00001561379739741215 +0.00001537314314331576 +0.00001513530845126507 +0.00001490027280843841 +0.00001466801569365813 +0.00001443851657963970 +0.00001421175493522709 +0.00001398771022761099 +0.00001376636192453095 +0.00001354768949645947 +0.00001333167241877061 +0.00001311829017388911 +0.00001290752225342203 +0.00001269934816027206 +0.00001249374741073204 +0.00001229069953655943 +0.00001209018408703322 +0.00001189218063098919 +0.00001169666875883610 +0.00001150362808455129 +0.00001131303824765573 +0.00001112487891516740 +0.00001093912978353534 +0.00001075577058055027 +0.00001057478106723465 +0.00001039614103971062 +0.00001021983033104593 +0.00001004582881307678 +0.00000987411639820984 +0.00000970467304119977 +0.00000953747874090482 +0.00000937251354201913 +0.00000920975753678193 +0.00000904919086666259 +0.00000889079372402380 +0.00000873454635375948 +0.00000858042905490958 +0.00000842842218225094 +0.00000827850614786440 +0.00000813066142267675 +0.00000798486853798033 +0.00000784110808692664 +0.00000769936072599646 +0.00000755960717644533 +0.00000742182822572486 +0.00000728600472887876 +0.00000715211760991561 +0.00000702014786315592 +0.00000689007655455510 +0.00000676188482300175 +0.00000663555388159154 +0.00000651106501887576 +0.00000638839960008658 +0.00000626753906833648 +0.00000614846494579379 +0.00000603115883483353 +0.00000591560241916396 +0.00000580177746492804 +0.00000568966582178178 +0.00000557924942394700 +0.00000547051029124053 +0.00000536343053007917 +0.00000525799233445998 +0.00000515417798691779 +0.00000505196985945748 +0.00000495135041446324 +0.00000485230220558377 +0.00000475480787859418 +0.00000465885017223371 +0.00000456441191902126 +0.00000447147604604650 +0.00000438002557573850 +0.00000429004362661122 +0.00000420151341398622 +0.00000411441825069232 +0.00000402874154774348 +0.00000394446681499353 +0.00000386157766176898 +0.00000378005779747980 +0.00000369989103220823 +0.00000362106127727558 +0.00000354355254578819 +0.00000346734895316124 +0.00000339243471762174 +0.00000331879416069041 +0.00000324641170764281 +0.00000317527188794950 +0.00000310535933569641 +0.00000303665878998425 +0.00000296915509530826 +0.00000290283320191803 +0.00000283767816615768 +0.00000277367515078635 +0.00000271080942528005 +0.00000264906636611388 +0.00000258843145702569 +0.00000252889028926120 +0.00000247042856180067 +0.00000241303208156721 +0.00000235668676361770 +0.00000230137863131540 +0.00000224709381648532 +0.00000219381855955238 +0.00000214153920966238 +0.00000209024222478610 +0.00000203991417180713 +0.00000199054172659297 +0.00000194211167405008 +0.00000189461090816317 +0.00000184802643201864 +0.00000180234535781251 +0.00000175755490684345 +0.00000171364240949048 +0.00000167059530517602 +0.00000162840114231447 +0.00000158704757824644 +0.00000154652237915873 +0.00000150681341999093 +0.00000146790868432795 +0.00000142979626427932 +0.00000139246436034547 +0.00000135590128127092 +0.00000132009544388473 +0.00000128503537292880 +0.00000125070970087358 +0.00000121710716772192 +0.00000118421662080116 +0.00000115202701454361 +0.00000112052741025560 +0.00000108970697587568 +0.00000105955498572167 +0.00000103006082022713 +0.00000100121396566739 +0.00000097300401387535 +0.00000094542066194718 +0.00000091845371193836 +0.00000089209307055004 +0.00000086632874880603 +0.00000084115086172062 +0.00000081654962795755 +0.00000079251536948002 +0.00000076903851119244 +0.00000074610958057368 +0.00000072371920730217 +0.00000070185812287317 +0.00000068051716020834 +0.00000065968725325755 +0.00000063935943659376 +0.00000061952484500041 +0.00000060017471305203 +0.00000058130037468812 +0.00000056289326278040 +0.00000054494490869372 +0.00000052744694184084 +0.00000051039108923112 +0.00000049376917501348 +0.00000047757312001364 +0.00000046179494126599 +0.00000044642675154007 +0.00000043146075886206 +0.00000041688926603128 +0.00000040270467013187 +0.00000038889946203999 +0.00000037546622592643 +0.00000036239763875514 +0.00000034968646977753 +0.00000033732558002280 +0.00000032530792178459 +0.00000031362653810391 +0.00000030227456224860 +0.00000029124521718949 +0.00000028053181507340 +0.00000027012775669300 +0.00000026002653095392 +0.00000025022171433899 +0.00000024070697036996 +0.00000023147604906671 +0.00000022252278640417 +0.00000021384110376696 +0.00000020542500740208 +0.00000019726858786956 +0.00000018936601949134 +0.00000018171155979855 +0.00000017429954897704 +0.00000016712440931172 +0.00000016018064462933 +0.00000015346283974026 +0.00000014696565987903 +0.00000014068385014403 +0.00000013461223493628 +0.00000012874571739742 +0.00000012307927884713 +0.00000011760797821998 +0.00000011232695150180 +0.00000010723141116583 +0.00000010231664560849 +0.00000009757801858513 +0.00000009301096864570 +0.00000008861100857040 +0.00000008437372480561 +0.00000008029477689993 +0.00000007636989694061 +0.00000007259488899033 +0.00000006896562852451 +0.00000006547806186922 +0.00000006212820563961 +0.00000005891214617924 +0.00000005582603900006 +0.00000005286610822341 +0.00000005002864602183 +0.00000004731001206203 +0.00000004470663294883 +0.00000004221500167036 +0.00000003983167704441 +0.00000003755328316614 +0.00000003537650885708 +0.00000003329810711556 +0.00000003131489456865 +0.00000002942375092561 +0.00000002762161843286 +0.00000002590550133074 +0.00000002427246531179 +0.00000002271963698091 +0.00000002124420331725 +0.00000001984341113796 +0.00000001851456656381 +0.00000001725503448680 +0.00000001606223803967 +0.00000001493365806750 +0.00000001386683260132 +0.00000001285935633389 +0.00000001190888009753 +0.00000001101311034420 +0.00000001016980862774 +0.00000000937679108844 +0.00000000863192793977 +0.00000000793314295749 +0.00000000727841297114 +0.00000000666576735781 +0.00000000609328753836 +0.00000000555910647606 +0.00000000506140817769 +0.00000000459842719706 +0.00000000416844814110 +0.00000000376980517843 +0.00000000340088155045 +0.00000000306010908504 +0.00000000274596771278 +0.00000000245698498584 +0.00000000219173559937 +0.00000000194884091563 +0.00000000172696849069 +0.00000000152483160381 +0.00000000134118878948 +0.00000000117484337213 +0.00000000102464300354 +0.00000000088947920294 +0.00000000076828689983 +0.00000000066004397945 +0.00000000056377083107 +0.00000000047852989894 +0.00000000040342523597 +0.00000000033760206013 +0.00000000028024631366 +0.00000000023058422497 +0.00000000018788187322 +0.00000000015144475576 +0.00000000012061735824 +0.00000000009478272745 +0.00000000007336204696 +0.00000000005581421541 +0.00000000004163542763 +0.00000000003035875840 +0.00000000002155374902 +0.00000000001482599650 +0.00000000000981674556 +0.00000000000620248330 +0.00000000000369453654 +0.00000000000203867190 +0.00000000000101469855 +0.00000000000043607361 +0.00000000000014951028 +0.00000000000003458852 +0.00000000000000336852 +0.00000000000000000666 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +25.30325876473197865835 +50.33068723221295925896 +75.08452989996564497233 +99.56701517655753264080 +123.78035548904902896084 +147.72674738975246100381 +171.40837166230983257265 +194.82739342708941876481 +217.98596224590846759384 +240.88621222608529137688 +263.53026212382513904231 +285.92021544694233625705 +308.05816055692667987387 +329.94617077035269403495 +351.58630445964155342153 +372.98060515317575891459 +394.13110163477034575408 +415.03980804251074232525 +435.70872396695193629057 +456.13983454868861144860 +476.33511057530165544449 +496.29650857768081095855 +516.02597092572648307396 +535.52542592344275362848 +554.79678790341586136492 +573.84195732068417328264 +592.66282084601164115156 +611.26125145855405662587 +629.63910853793697697256 +647.79823795573702227557 +665.74047216637734436517 +683.46763029743556216999 +700.98151823937803328590 +718.28392873470716040174 +735.37664146654287833371 +752.26142314663343313441 +768.94002760279340691341 +785.41419586578740563709 +801.68565625564951915294 +817.75612446744332828530 +833.62730365648098995734 +849.30088452298264201090 +864.77854539619420393137 +880.06195231796402822511 +895.15275912578044881229 +910.05260753526692951709 +924.76312722216368911177 +939.28593590375908206624 +953.62263941980756953853 +967.77483181292609515367 +981.74409540847000243957 +995.53200089388099058851 +1009.14010739755019585573 +1022.56996256714887749695 +1035.82310264745615313586 +1048.90105255769708492153 +1061.80532596836610537139 +1074.53742537756261299364 +1087.09884218683282597340 +1099.49105677651778023574 +1111.71553858061292885395 +1123.77374616115298522345 +1135.66712728210723071243 +1147.39711898279392698896 +1158.96514765083020392922 +1170.37262909460582704924 +1181.62096861527970759198 +1192.71156107832348425291 +1203.64579098459012129751 +1214.42503254092798670172 +1225.05064973034200193069 +1235.52399638168913043046 +1245.84641623892980533128 +1256.01924302993165838416 +1266.04380053481736467802 +1275.92140265388024999993 +1285.65335347505970275961 +1295.24094734096343017882 +1304.68546891547634913877 +1313.98819324992132351326 +1323.15038584879380323400 +1332.17330273507604943006 +1341.05819051512116857339 +1349.80628644311491370900 +1358.41881848512298347487 +1366.89700538271949881164 +1375.24205671620484281448 +1383.45517296740581514314 +1391.53754558207174341078 +1399.49035703187087165134 +1407.31478087597315607127 +1415.01198182223402000091 +1422.58311578798998198181 +1430.02932996044251012790 +1437.35176285666352669068 +1444.55154438320346343971 +1451.62979589530868906877 +1458.58763025575808569556 +1465.42615189331172587117 +1472.14645686078711150913 +1478.74963289274455746636 +1485.23675946280513926467 +1491.60890784059483848978 +1497.86714114830760991026 +1504.01251441691556465230 +1510.04607464199739297328 +1515.96886083921026511234 +1521.78190409939566052344 +1527.48622764333299528516 +1533.08284687612376728794 +1538.57276944122941131354 +1543.95699527415149532317 +1549.23651665576403502200 +1554.41231826529792670044 +1559.48537723297181400994 +1564.45666319229280816216 +1569.32713833198931752122 +1574.09775744764328919700 +1578.76946799293909862172 +1583.34321013062162819551 +1587.81991678307872462028 +1592.20051368263148106053 +1596.48591942145731081837 +1600.67704550122221007769 +1604.77479638235899983556 +1608.78006953304065973498 +1612.69375547781714885787 +1616.51673784595186589286 +1620.24989341942409737385 +1623.89409218062860418286 +1627.45019735975347430212 +1630.91906548185124847805 +1634.30154641359854394977 +1637.59848340975872815761 +1640.81071315931603749050 +1643.93906583133525600715 +1646.98436512050216151692 +1649.94742829236633951950 +1652.82906622829091247695 +1655.63008347010691068135 +1658.35127826446046128694 +1660.99344260689667862607 +1663.55736228562614087423 +1666.04381692501033285225 +1668.45358002877378567064 +1670.78741902291585574858 +1673.04609529833646774932 +1675.23036425320833586738 +1677.34097533504177590657 +1679.37867208247644157382 +1681.34419216681885700382 +1683.23826743327208532719 +1685.06162394192392639525 +1686.81498200844930579478 +1688.49905624454936514667 +1690.11455559810906379425 +1691.66218339311876661668 +1693.14263736930934101110 +1694.55660972151827081689 +1695.90478713883021555375 +1697.18785084342448499228 +1698.40647662917444904451 +1699.56133490000684105325 +1700.65309070799366963911 +1701.68240379117764859984 +1702.64992861119321787555 +1703.55631439058652176755 +1704.40220514992029166024 +1705.18823974462111436878 +1705.91505190159159610630 +1706.58327025555581712979 +1707.19351838520697128843 +1707.74641484906987898285 +1708.24257322115522583772 +1708.68260212637187578366 +1709.06710527568202451221 +1709.39668150106058419624 +1709.67192479019627171510 +1709.89342432096395896224 +1710.06176449568079078745 +1710.17752497512537956936 +1710.24128071233235459658 +1710.25360198615453555249 +1710.21505443462228868157 +1710.12619908806300372817 +1709.98759240200661224662 +1709.79978628986873445683 +1709.56332815542600656045 +1709.27876092505675842403 +1708.94662307978933313279 +1708.56744868712189600046 +1708.14176743263442403986 +1707.67010465138650943118 +1707.15298135911712051893 +1706.59091428321698913351 +1705.98441589351796210394 +1705.33399443285543384263 +1704.64015394744478726352 +1703.90339431703455375100 +1703.12421128487858368317 +1702.30309648750085216307 +1701.44053748425380945264 +1700.53701778668664701399 +1699.59301688772188754228 +1698.60901029062029010674 +1697.58546953777567978250 +1696.52286223929036168556 +1695.42165210138000475126 +1694.28229895458684950427 +1693.10525878178896164172 +1691.89098374603349839163 +1690.63992221818057259952 +1689.35251880437203908514 +1688.02921437329200671229 +1686.67044608327114474378 +1685.27664740919158248289 +1683.84824816921968704264 +1682.38567455134625561186 +1680.88934913977936957963 +1679.35969094112010679964 +1677.79711541039364419703 +1676.20203447689141285082 +1674.57485656983044464141 +1672.91598664387038297718 +1671.22582620442722145526 +1669.50477333283492953342 +1667.75322271132722562470 +1665.97156564786291710334 +1664.16019010077457096486 +1662.31948070324938271369 +1660.44981878765702276723 +1658.55158240970536098757 +1656.62514637242702519870 +1654.67088225002339640923 +1652.68915841152806933678 +1650.68034004431069661223 +1648.64478917744872887852 +1646.58286470490929787047 +1644.49492240859035518952 +1642.38131498120378637395 +1640.24239204900868571713 +1638.07850019437023547653 +1635.88998297820148764004 +1633.67718096222392887285 +1631.44043173108229893842 +1629.18006991432798713504 +1626.89642720822439514450 +1624.58983239742724435928 +1622.26061137651640819968 +1619.90908717137290295796 +1617.53557996040285615891 +1615.14040709564869757742 +1612.72388312372822838370 +1610.28631980663089962036 +1607.82802614240245020483 +1605.34930838565878730151 +1602.85047006797003632528 +1600.33181201811134997115 +1597.79363238217570142297 +1595.23622664352933497867 +1592.65988764267262922658 +1590.06490559692565511796 +1587.45156811999868295970 +1584.82016024143285903847 +1582.17096442589559046610 +1579.50426059234678177745 +1576.82032613309365842724 +1574.11943593268961194553 +1571.40186238671935825550 +1568.66787542044562542287 +1565.91774250735034001991 +1563.15172868751142232213 +1560.37009658590022809221 +1557.57310643051164333883 +1554.76101607039981900016 +1551.93408099357861829048 +1549.09255434479587165697 +1546.23668694319212590926 +1543.36672729984093166422 +1540.48292163516975961102 +1537.58551389624426519731 +1534.67474577395819324011 +1531.75085672009436166263 +1528.81408396425945284136 +1525.86466253072217114095 +1522.90282525512679967505 +1519.92880280108306578768 +1516.94282367665846322780 +1513.94511425073937971320 +1510.93589876930241189257 +1507.91539937154902872862 +1504.88383610595292338985 +1501.84142694617867164197 +1498.78838780689216036990 +1495.72493255948347723461 +1492.65127304765155713540 +1489.56761910290379091748 +1486.47417855993671764736 +1483.37115727191871883406 +1480.25875912566175429674 +1477.13718605668805139430 +1474.00663806419834145345 +1470.86731322593254844833 +1467.71940771293157013133 +1464.56311580418991979968 +1461.39862990121059738158 +1458.22614054247037529422 +1455.04583641776048352767 +1451.85790438245408040530 +1448.66252947166117337474 +1445.45989491428599649225 +1442.25018214699025520531 +1439.03357082805382560764 +1435.81023885115496341314 +1432.58036235903250599222 +1429.34411575707326846896 +1426.10167172679393843282 +1422.85320123922451784892 +1419.59887356822218862362 +1416.33885630366444274841 +1413.07331536456695175730 +1409.80241501210502974573 +1406.52631786255415136111 +1403.24518490012133042910 +1399.95917548969669041981 +1396.66844738953409432725 +1393.37315676381012963247 +1390.07345819511715490080 +1386.76950469687221811910 +1383.46144772562342950550 +1380.14943719327175131184 +1376.83362147923344309675 +1373.51414744249018440314 +1370.19116043355461442843 +1366.86480430637379868131 +1363.53522143013287859503 +1360.20255270097004540730 +1356.86693755363876334741 +1353.52851397305539649096 +1350.18741850578385310655 +1346.84378627143087214790 +1343.49775097397787249065 +1340.14944491300730078365 +1336.79899899487691072864 +1333.44654274379604430578 +1330.09220431283256402821 +1326.73611049484452450997 +1323.37838673333430961065 +1320.01915713320636314165 +1316.65854447149240513681 +1313.29667020796000542759 +1309.93365449566385905200 +1306.56961619142612107680 +1303.20467286623625113862 +1299.83894081556718447246 +1296.47253506965307678911 +1293.10556940365677291993 +1289.73815634778361527424 +1286.37040719732226534688 +1283.00243202261958685995 +1279.63433967895957721339 +1276.26623781641683308408 +1272.89823288960360514466 +1269.53043016735523451644 +1266.16293374235533519823 +1262.79584654069708449242 +1259.42927033135129022412 +1256.06330573560148877732 +1252.69805223638491042948 +1249.33360818758387722482 +1245.97007082323466420348 +1242.60753626669998084253 +1239.24609953973185838549 +1235.88585457151589253044 +1232.52689420762635563733 +1229.16931021891718955885 +1225.81319331035706454713 +1222.45863312980577575217 +1219.10571827670105449215 +1215.75453631073105498217 +1212.40517376040088493028 +1209.05771613155684462981 +1205.71224791584972990677 +1202.36885259913810841681 +1199.02761266982111010293 +1195.68860962712869877578 +1192.35192398933986623888 +1189.01763530194648410543 +1185.68582214575440048066 +1182.35656214493178595148 +1179.02993197500268252043 +1175.70600737077552366827 +1172.38486313421162776649 +1169.06657314225481059111 +1165.75121035458391816064 +1162.43884682132579655445 +1159.12955369069504740764 +1155.82340121660513432289 +1152.52045876620104536414 +1149.22079482734284283652 +1145.92447701604783105722 +1142.63157208385655394522 +1139.34214592517150776985 +1136.05626358452582280734 +1132.77398926380078592047 +1129.49538632939925264509 +1126.22051731935607676860 +1122.94944395041261486767 +1119.68222712501665228046 +1116.41892693830027383228 +1113.15960268498520235880 +1109.90431286624675522035 +1106.65311519652459537610 +1103.40606661028823509696 +1100.16322326875592807482 +1096.92464056654762316612 +1093.69037313831859137281 +1090.46047486531551840017 +1087.23499888190576712077 +1084.01399758203888268326 +1080.79752262569263621117 +1077.58562494523607711017 +1074.37835475176962063415 +1071.17576154141374900064 +1067.97789410154859979230 +1064.78480051699830255529 +1061.59652817620326459291 +1058.41312377730946536758 +1055.23463333423205767758 +1052.06110218267417621973 +1048.89257498609981666959 +1045.72909574165146295854 +1042.57070778605293526198 +1039.41745380143697730091 +1036.26937582115192526544 +1033.12651523550880483526 +1029.98891279750819194305 +1026.85660862849726981949 +1023.72964222381494892034 +1020.60805245836627364042 +1017.49187759218114024407 +1014.38115527591321551881 +1011.27592255631441275909 +1008.17621588164320201031 +1005.08207110707110132353 +1001.99352350001606737351 +998.91060774545053391194 +995.83335795116818189854 +992.76180765301626252040 +989.69598982007153153972 +986.63593685980697500781 +983.58168062319566615770 +980.53325240978506371903 +977.49068297273527150537 +974.45400252382046346611 +971.42324073837960440869 +968.39842676025818946073 +965.37958920668620521610 +962.36675617313221664517 +959.35995523811527618818 +956.35921346799557341001 +953.36455742170255689416 +950.37601315546032765269 +947.39360622744504780712 +944.41736170243711967487 +941.44730415641697618412 +938.48345768113688336598 +935.52584588864783654572 +932.57449191581633840542 +929.62941842877773979126 +926.69064762737650653435 +923.75820124956146628392 +920.83210057576025064918 +917.91236643319848553801 +914.99901920022057311144 +912.09207881054635436158 +909.19156475750946810876 +906.29749609826365031040 +903.40989145795936110517 +900.52876903387164020387 +897.65414659952659803821 +894.78604150877231404593 +891.92447069982790708309 +889.06945069929724922986 +886.22099762616346652067 +883.37912719573841968668 +880.54385472358933384385 +877.71519512944087182404 +874.89316294103889504186 +872.07777229798784901504 +869.26903695555154172325 +866.46697028844755550381 +863.67158529458515658916 +860.88289459879126752639 +858.10091045650233354536 +855.32564475742890408583 +852.55710902919747695705 +849.79531444094413927814 +847.04027180691559806291 +844.29199159001007046754 +841.55048390530748747551 +838.81575852356900213636 +836.08782487470170963206 +833.36669205121654613322 +830.65236881164071292005 +827.94486358390850000433 +825.24418446872857657581 +822.55033924292763458652 +819.86333536276242739405 +817.18317996720566043223 +814.50987988121164562472 +811.84344161895830893627 +809.18387138705395500438 +806.53117508772982091614 +803.88535832199306696566 +801.24642639278204114817 +798.61438430806583710364 +795.98923678394100988953 +793.37098824769498150999 +790.75964284085137023794 +788.15520442217302843346 +785.55767657068463449832 +782.96706258861365768098 +780.38336550435610661225 +777.80658807539771260053 +775.23673279120862389391 +772.67380187612980080303 +770.11779729222860169102 +767.56872074212333245669 +765.02657367180779601767 +762.49135727342888912972 +759.96307248806147072173 +757.44172000844616832183 +754.92730028172195488878 +752.41981351211802575563 +749.91925966364237865491 +747.42563846273878880311 +744.93894940091854550701 +742.45919173738900553872 +739.98636450163814970438 +737.52046649602061734186 +735.06149629830497360672 +732.60945226421324605326 +730.16433252993067526404 +727.72613501460705265345 +725.29485742282145110948 +722.87049724704661457508 +720.45305177007844577020 +718.04251806745321573544 +715.63889300983873908990 +713.24217326541997863387 +710.85235530224736066884 +708.46943539058167971234 +706.09340960521240049275 +703.72427382775413207128 +701.36202374893707656156 +699.00665487086234861636 +696.65816250925161057239 +694.31654179567715345911 +691.98178767976503422688 +689.65389493139264232013 +687.33285814285420656233 +685.01867173102550623298 +682.71132993949356659868 +680.41082684068408070743 +678.11715633796040947345 +675.83031216771485105710 +673.55028790142705474864 +671.27707694773005187017 +669.01067255443297199236 +666.75106781054807925102 +664.49825564828961432795 +662.25222884505956244539 +660.01298002541170717450 +657.78050166301500212285 +655.55478608257863015751 +653.33582546178195116227 +651.12361183317113955127 +648.91813708605684496433 +646.71939296837695110298 +644.52737108856399572687 +642.34206291737962146726 +640.16345978974629815639 +637.99155290655710359715 +635.82633333647277140699 +633.66779201769759310992 +631.51591975975702553114 +629.37070724523550779850 +627.23214503152576071443 +625.10022355254477588460 +622.97493312043866353633 +620.85626392728352129780 +618.74420604676026869129 +616.63874943581606657972 +614.53988393632198494743 +612.44759927670781962661 +610.36188507357792332186 +608.28273083333010617935 +606.21012595374804732273 +604.14405972557528912148 +602.08452133409741691139 +600.03149986068206089840 +597.98498428433413209859 +595.94496348321160894557 +593.91142623615246520785 +591.88436122416510443145 +589.86375703192845776357 +587.84960214926218213805 +585.84188497258867300843 +583.84059380639041592076 +581.84571686464448703191 +579.85724227224602600472 +577.87515806642772986379 +575.89945219815831478627 +573.93011253352790390636 +571.96712685513284668559 +570.01048286343223026051 +568.06016817810746033501 +566.11617033940069632081 +564.17847680944464627828 +562.24707497357667307369 +560.32195214165324159694 +558.40309554933401159360 +556.49049235937422963616 +554.58412966289290579880 +552.68399448062814371951 +550.79007376419781394361 +548.90235439732646227640 +547.02082319707790247776 +545.14546691507337072835 +543.27627223869308181747 +541.41322579227301048377 +539.55631413828461973026 +537.70552377851697656297 +535.86084115522817228339 +534.02225265230754303047 +532.18974459641185603687 +530.36330325809967689565 +528.54291485294754693314 +526.72856554267002593406 +524.92024143621006260219 +523.11792859083971052314 +521.32161301323503721505 +519.53128066055080580554 +517.74691744147617100680 +515.96850921729560468521 +514.19604180291969441896 +512.42950096792947078939 +510.66887243758685599460 +508.91414189386046018626 +507.16529497641829493659 +505.42231728363145748517 +503.68519437355178069993 +501.95391176489607687472 +500.22845493800735994228 +498.50880933581629506079 +496.79496036478553833149 +495.08689339585845345937 +493.38459376538042988614 +491.68804677603037589506 +489.99723769772930381805 +488.31215176854897208614 +486.63277419560262160303 +484.95909015594213542499 +483.29108479742876625096 +481.62874323961153777418 +479.97205057458558030703 +478.32099186785058009264 +476.67555215915183453035 +475.03571646332585487471 +473.40146977112294734980 +471.77279705003678600406 +470.14968324511221453577 +468.53211327975770927878 +466.92007205653652590627 +465.31354445796677055114 +463.71251534729424292891 +462.11696956927630708378 +460.52689195094478691317 +458.94226730236829325804 +457.36308041740142016351 +455.78931607443581697225 +454.22095903713102416077 +452.65799405515099351760 +451.10040586488412373001 +449.54817919016119276421 +448.00129874295885201718 +446.45974922410971430509 +444.92351532398777180788 +443.39258172320518269771 +441.86693309328632039978 +440.34655409734159547952 +438.83142939073843535880 +437.32154362176004269713 +435.81688143225352405352 +434.31742745828489660198 +432.82316633077130063612 +431.33408267612082909181 +429.85016111685303030754 +428.37138627222611830803 +426.89774275884258258884 +425.42921519126446128212 +423.96578818261144760982 +422.50744634515126563201 +421.05417429089578718049 +419.60595663217986839300 +418.16277798223319450699 +416.72462295575815005577 +415.29147616949029497846 +413.86332224275145108550 +412.44014579801074660281 +411.02193146142496971152 +409.60866386337721678501 +408.20032763901593853006 +406.79690742878165110596 +405.39838787892557547821 +404.00475364203492745219 +402.61598937753467453149 +401.23207975220191201515 +399.85300944065852490894 +398.47876312587391112174 +397.10932549964428517342 +395.74468126308664750468 +394.38481512710706056168 +393.02971181288182833669 +391.67935605232082707516 +390.33373258853146126057 +388.99282617627119407189 +387.65662158240746748561 +386.32510358635539660099 +384.99825698052728739640 +383.67606657076328247058 +382.35851717676746375218 +381.04559363252718640069 +379.73728078674378139112 +378.43356350323983861017 +377.13442666137831338347 +375.83985515646469366402 +374.54983390015235045212 +373.26434782083327945656 +371.98338186403992722262 +370.70692099282291565032 +369.43495018814354580172 +368.16745444924367802741 +366.90441879402709446367 +365.64582825942073895931 +364.39166790174851939810 +363.14192279708237265368 +361.89657804160532350579 +360.65561875195953689399 +359.41903006559385858054 +358.18679714110396616888 +356.95890515857666969168 +355.73533931991659073901 +354.51608484918187969015 +353.30112699290737054980 +352.09045102042671260278 +350.88404222418563449537 +349.68188592006322323869 +348.48396744767148902611 +347.29027217066840194093 +346.10078547705433038573 +344.91549277947132168265 +343.73437951549186664124 +342.55743114791505377070 +341.38463316504419253761 +340.21597108097569162055 +339.05143043586872408923 +337.89099679622682970148 +336.73465575516019043789 +335.58239293265955893730 +334.43419397585086016988 +333.29004455926138916766 +332.14993038507009259774 +331.01383718336154515782 +329.88175071237077418118 +328.75365675873342752311 +327.62954113771866104798 +326.50938969347487272898 +325.39318829925787213142 +324.28092285766462055108 +323.17257930085543193854 +322.06814359078441611928 +320.96760171941321004851 +319.87093970893266714484 +318.77814361197454218200 +317.68919951182209615581 +316.60409352261439153153 +315.52281178955627183313 +314.44534048911265244897 +313.37166582920821156222 +312.30177404942514840513 +311.23565142118729909271 +310.17328424795465480202 +309.11465886540617020728 +308.05976164161785391116 +307.00857897724824852048 +305.96109730571083673567 +304.91730309334406001653 +303.87718283958741949391 +302.84072307714541238965 +301.80791037214845573544 +300.77873132432006286763 +299.75317256713242386468 +298.73122076795897328338 +297.71286262823258539356 +296.69808488359331022366 +295.68687430403161897630 +294.67921769403932330533 +293.67510189274844378815 +292.67451377406655410596 +291.67744024682031067641 +290.68386825488192926059 +289.69378477730657550637 +288.70717682845469198583 +287.72403145812387492697 +286.74433575166580112636 +285.76807683011395511130 +284.79524185029447380657 +283.82581800494767776399 +282.85979252283891582920 +281.89715266886946665181 +280.93788574418306325242 +279.98197908627656715908 +279.02942006909705696671 +278.08019610314852343436 +277.13429463558856014060 +276.19170315032431517466 +275.25240916810491853539 +274.31640024661868437761 +273.38366398057547712597 +272.45418800180033258584 +271.52795997931497140598 +270.60496761942232524234 +269.68519866578344590380 +268.76864089950186098577 +267.85528213919383233588 +266.94511024106793684041 +266.03811309899128900724 +265.13427864456537008664 +264.23359484718770318068 +263.33604971412364648131 +262.44163129056391881022 +261.55032765969144747942 +260.66212694273787064958 +259.77701729904305238961 +258.89498692610629859701 +258.01602405964700892582 +257.14011697364861674941 +256.26725398041548942274 +255.39742343061507767743 +254.53061371332827889091 +253.66681325608746533362 +252.80601052492636426905 +251.94819402441302713669 +251.09335229769334318917 +250.24147392652560029092 +249.39254753131575625957 +248.54656177114802062533 +247.70350534382188811833 +246.86336698587399496319 +246.02613547261344706385 +245.19179961814123203112 +244.36034827538017566440 +243.53177033609199497732 +242.70605473090446935203 +241.88319042932380398270 +241.06316643975830515956 +240.24597180953094266442 +239.43159562489515224115 +238.62002701104478319394 +237.81125513213055455708 +237.00526919126301095275 +236.20205843052656291547 +235.40161213098255643672 +234.60391961267538363245 +233.80897023463325012926 +233.01675339487513838321 +232.22725853040540755501 +231.44047511721728938028 +230.65639267028913650392 +229.87500074357691914884 +229.09628893001479355007 +228.32024686150165848630 +227.54686420889942155554 +226.77613068201691248760 +226.00803602960465354954 +225.24257003933652754313 +224.47972253780088180974 +223.71948339048350362646 +222.96184250174854923898 +222.20678981482527092339 +221.45431531178618911326 +220.70440901352424134529 +219.95706097973672399348 +219.21226130889800742807 +218.47000013823475228492 +217.73026764370345631505 +216.99305403996194741012 +216.25834958033902921670 +215.52614455680966898399 +214.79642929996151679006 +214.06919417896236268462 +213.34442960153049284600 +212.62212601389836663657 +211.90227390077617997122 +211.18486378531946456860 +210.46988622908617116991 +209.75733183200375719935 +209.04719123232399624612 +208.33945510658659827641 +207.63411416957305277720 +206.93115917426777627952 +206.23058091180917017482 +205.53237021144914820070 +204.83651794050351213627 +204.14301500430650548878 +203.45185234615919966927 +202.76302094728532665613 +202.07651182677500401041 +201.39231604153792432044 +200.71042468624887078477 +200.03082889329454019389 +199.35351983271743847581 +198.67848871216409634144 +198.00572677682458788695 +197.33522530937869987611 +196.66697562993590508995 +196.00096909597678518367 +195.33719710229064503437 +194.67565108091901038279 +194.01632250108860944238 +193.35920286915356314239 +192.70428372852592247000 +192.05155665961723343571 +191.40101327976745437809 +190.75264524318353664967 +190.10644424086734716184 +189.46240200055200375573 +188.82051028663059355495 +188.18076090008770506756 +187.54314567842587280211 +186.90765649559963890169 +186.27428526193727975624 +185.64302392407338970770 +185.01386446487148873530 +184.38679890335225763920 +183.76181929461367303702 +183.13891772976165839282 +182.51808633582618313085 +181.89931727569040731396 +181.28260274800823026453 +180.66793498712462451294 +180.05530626300068774981 +179.44470888112692819050 +178.83613518244840179250 +178.22957754327657653448 +177.62502837521270748766 +177.02248012506134955402 +176.42192527474512075969 +175.82335634122497936005 +175.22676587640864909190 +174.63214646707086785682 +174.03949073476272246808 +173.44879133572680984798 +172.86004096080611702746 +172.27323233536077395911 +171.68835821917278394722 +171.10541140636203749636 +170.52438472529257751376 +169.94527103848324145474 +169.36806324251338651266 +168.79275426793600445308 +168.21933707917861511305 +167.64780467445584122288 +167.07815008567271775064 +166.51036637832874021115 +165.94444665142762573851 +165.38038403737576231833 +164.81817170189245302936 +164.25780284390739893752 +163.69927069546875486594 +163.14256852164223232649 +162.58768962041256145312 +162.03462732258864775758 +161.48337499169869602156 +160.93392602389582179967 +160.38627384785533536160 +159.84041192467279302036 +159.29633374776577170451 +158.75403284277101079169 +158.21350276743990548312 +157.67473711154016768887 +157.13772949675015411231 +156.60247357655424593759 +156.06896303614209386978 +155.53719159230243462844 +155.00715299331631058521 +154.47884101885563268297 +153.95224947987543373529 +153.42737221850575224380 +152.90420310794971214818 +152.38273605237100127852 +151.86296498679189426184 +151.34488387698038991402 +150.82848671934667095229 +150.31376754082953084435 +149.80072039879212297819 +149.28933938091006439208 +148.77961860506036373408 +148.27155221921449879119 +147.76513440132623600221 +147.26035935921885311473 +146.75722133047818829255 +146.25571458233605426358 +145.75583341156234951086 +145.25757214435063247038 +144.76092513620648105643 +144.26588677183184472597 +143.77245146501616090973 +143.28061365851780806224 +142.79036782395434102000 +142.30170846168573461910 +141.81463010069910524180 +141.32912729849689981165 +140.84519464097684249282 +140.36282674232191425290 +139.88201824487904900707 +139.40276381904686786584 +138.92505816315843958364 +138.44889600336111357137 +137.97427209350550469935 +137.50118121502151780078 +137.02961817680636613659 +136.55957781510332438302 +136.09105499338525646635 +135.62404460223336855051 +135.15854155922318113880 +134.69454080880032620371 +134.23203732216575190250 +133.77102609715379344379 +133.31150215811371140262 +132.85346055578705204425 +132.39689636719182885827 +131.94180469549746703706 +131.48818066990801867178 +131.03601944553960834128 +130.58531620329719658002 +130.13606614975799402600 +129.68826451704447322300 +129.24190656270766908165 +128.79698756960033279029 +128.35350284575886803395 +127.91144772427934128700 +127.47081756319226997221 +127.03160774534519816825 +126.59381367827356257294 +126.15743079408274240905 +125.72245454932165387163 +125.28888042486030940381 +124.85670392576408005425 +124.42592058117324427258 +123.99652594417480599986 +123.56851559168180187953 +123.14188512430652622243 +122.71663016623710973363 +122.29274636511028973018 +121.87022939189068893029 +121.44907494074067244583 +121.02927872889925708932 +120.61083649655498106767 +120.19374400671821945252 +119.77799704509997980040 +119.36359141998079280711 +118.95052296208957898216 +118.53878752447276667681 +118.12838098237220663123 +117.71929923309598109427 +117.31153819589160036685 +116.90509381182205572713 +116.49996204363461060893 +116.09613887563828882321 +115.69362031357397313514 +115.29240238448812760907 +114.89248113660387673463 +114.49385263919691624324 +114.09651298246323847252 +113.70045827739600952100 +113.30568465565558256003 +112.91218826944009379076 +112.51996529136044955521 +112.12901191431089387152 +111.73932435133929175208 +111.35089883552136313938 +110.96373161983125044117 +110.57781897701181605953 +110.19315719944684417442 +109.80974259903410938932 +109.42757150705313051731 +109.04664027403941872763 +108.66694526965460454448 +108.28848288255515797118 +107.91124952026721928178 +107.53524160905512019326 +107.16045559379080032159 +106.78688793782788479803 +106.41453512287047544760 +106.04339364884187091320 +105.67346003375872953711 +105.30473081359949105718 +104.93720254217338094804 +104.57087179099346485600 +104.20573514914565294021 +103.84178922315851423264 +103.47903063687319047403 +103.11745603131384996232 +102.75706206455868141347 +102.39784541160902620049 +102.03980276425792794726 +101.68293083096286011369 +101.32722633671245660025 +100.97268602289982197817 +100.61930664718798311696 +100.26708498338349784262 +99.91601782130517506175 +99.56610196665086220946 +99.21733424087156549831 +98.86971148103667417217 +98.52323053970729915818 +98.17788828480331630999 +97.83368159947411868416 +97.49060738196641295872 +97.14866254549615121050 +96.80784401811526151960 +96.46814874258357974668 +96.12957367623650384303 +95.79211579085547612067 +95.45577207253539597787 +95.12053952155697800208 +94.78641515225243097120 +94.45339599287862597521 +94.12147908548379859894 +93.79066148577663852848 +93.46094026299810764158 +93.13231249978707637638 +92.80477529205360553988 +92.47832574884434109208 +92.15296099221450276673 +91.82867815709688841253 +91.50547439116876091703 +91.18334685472470368950 +90.86229272054197281250 +90.54230917375367937439 +90.22339341171507953732 +89.90554264387496630206 +89.58875409164238590165 +89.27302498825906695856 +88.95835257866583845043 +88.64473411937406410743 +88.33216687833429148213 +88.02064813480552629699 +87.71017517922331308000 +87.40074531307199379171 +87.09235584875112579084 +86.78500410944714360539 +86.47868742900185168310 +86.17340315178122978068 +85.86914863254547469751 +85.56592123631993729305 +85.26371833826128465716 +84.96253732353073928607 +84.66237558716133548842 +84.36323053392860060740 +84.06509957821872092154 +83.76798014389945024050 +83.47186966418983899985 +83.17676558153003441021 +82.88266534745005742479 +82.58956642244055501578 +82.29746627582154872016 +82.00636238561278901216 +81.71625223840464968816 +81.42713332922680535830 +81.13900316141860002972 +80.85185924649904620765 +80.56569910403634082741 +80.28052026151821962685 +79.99632025422278047699 +79.71309662508851090479 +79.43084692458268136761 +79.14956871057428600125 +78.86925954820154061053 +78.58991700974554817094 +78.31153867449641836629 +78.03412212862777153077 +77.75766496506514613429 +77.48216478335658052856 +77.20761918954302416296 +76.93402579602933144542 +76.66138222145539771191 +76.38968609056622938169 +76.11893503408272465549 +75.84912668857280948487 +75.58025869632162141443 +75.31232870520243238843 +75.04533436854916317316 +74.77927334502578560205 +74.51414329849802697936 +74.24994189790440657362 +73.98666681712714421337 +73.72431573486323941324 +73.46288633449709948309 +73.20237630397099337642 +72.94278333565559080398 +72.68410512622342878331 +72.42633937651791598000 +72.16948379142844771650 +71.91353607975752026960 +71.65849395409689748249 +71.40435513069675721454 +71.15111732933850419158 +70.89877827320584913195 +70.64733568875703895174 +70.39678730559826647095 +70.14713085635406741858 +69.89836407654090066899 +69.65048470443714734301 +69.40349048095842476869 +69.15737914952694609383 +68.91214845594690530106 +68.66779614827447630887 +68.42431997669261534156 +68.18171769338090371093 +67.93998705239147284374 +67.69912580951863390055 +67.45913172217474595982 +67.22000254926146567414 +66.98173605104189221038 +66.74432998901642122291 +66.50778212579284343065 +66.27209022496090540244 +66.03725205096672823402 +65.80326536898419931276 +65.57012794479007311566 +65.33783754463529191980 +65.10639193512143663156 +64.87578888307153590631 +64.64602615540636065816 +64.41710151901621372872 +64.18901274063662754088 +63.96175758671982691794 +63.73533382331184782288 +63.50973921592348148124 +63.28497152940758496698 +63.06102852783090639832 +62.83790797435012365213 +62.61560763108433746993 +62.39412525899203387780 +62.17345861774303017455 +61.95360546559619052687 +61.73456355927048377907 +61.51633065382369380814 +61.29890450252383260477 +61.08228285672691981745 +60.86646346574967481047 +60.65144407674694093657 +60.43722243458505261060 +60.22379628171862009367 +60.01116335806393919938 +59.79932140087704084408 +59.58826814462648968629 +59.37800132087168236694 +59.16851865813670485750 +58.95981788178721672011 +58.75189671390509005278 +58.54475287316520848435 +58.33838407471186116027 +58.13278803003441908004 +57.92796244684359407984 +57.72390502894780439647 +57.52061347612891495373 +57.31808548401941294514 +57.11631874397912866925 +56.91531094297207005184 +56.71505976344127475386 +56.51556288318843712659 +56.31681797524785793030 +56.11882270776663972356 +55.92157474387813209660 +55.72507174158241838313 +55.52931135362169356995 +55.33429122735816463319 +55.14000900465069321399 +54.94646232173311517499 +54.75364880909176434898 +54.56156609134313839604 +54.37021178711137281425 +54.17958350890626206819 +53.98967886300078333761 +53.80049544930927396535 +53.61203086126625549923 +53.42428268570412797089 +53.23724850273107733756 +53.05092588561016242465 +52.86531240063676051477 +52.68040560701775376629 +52.49620305675043141491 +52.31270229450142750238 +52.12990085748371882346 +51.94779627533795718364 +51.76638607000909075850 +51.58566775562819373135 +51.40563883838826342298 +51.22629681642651888751 +51.04763917970213782382 +50.86966340987623169667 +50.69236698019049214281 +50.51574735534754267974 +50.33980199139088540505 +50.16452833558428636707 +49.98992382629159436647 +49.81598589285688660766 +49.64271195548386828023 +49.47009942511645164132 +49.29814570331990353225 +49.12684818215959126064 +48.95620424408270565664 +48.78621126179712064186 +48.61686659815408262375 +48.44816760602657979007 +48.28011162819275625679 +48.11269599721354239819 +47.94591803531707796537 +47.77977505434974148102 +47.61426436030226483354 +47.44938326622619229056 +47.28512909616733139728 +47.12149918517034308252 +46.95849087924214160239 +46.79610153531388760939 +46.63432852120588734124 +46.47316921558874014409 +46.31262100794856451103 +46.15268129854887746433 +45.99334749839513136749 +45.83461702919673541601 +45.67648732333272931783 +45.51895582381340688016 +45.36201998424598258453 +45.20567726879712466825 +45.04992515215796800021 +44.89476111950691006314 +44.74018266647521357982 +44.58618729910980960085 +44.43277253383953251387 +44.27993589743691416061 +44.12767492698573335019 +43.97598716984266786767 +43.82487018360454555932 +43.67432153607089162506 +43.52433880521073916725 +43.37491957912624229721 +43.22606145601890403896 +43.07776204415264231784 +42.93001896182171606142 +42.78282983731357802526 +42.63619230887618272163 +42.49010402468241665019 +42.34456264279611303891 +42.19956583113673076468 +42.05511126744711702941 +41.91119663925665861370 +41.76781964384998957485 +41.62497798823090988662 +41.48266938908996337432 +41.34089157276900294846 +41.19964227522859090413 +41.05891924201452525267 +40.91872022822411025800 +40.77904299847148195113 +40.63988532685638688235 +40.50124499692867630074 +40.36311980165736912340 +40.22550754339489031963 +40.08840603384647494067 +39.95181309403624680954 +39.81572655427422802177 +39.68014425412317081054 +39.54506404236671102126 +39.41048377697643445572 +39.27640132507964665365 +39.14281456292645344774 +39.00972137585783627856 +38.87711965827271143326 +38.74500731359629668304 +38.61338225424835712829 +38.48224240161076181721 +38.35158568599567985302 +38.22141004661358465455 +38.09171343154149980137 +37.96249379769139409291 +37.83374911077925872860 +37.70547734529222339006 +37.57767648445877028962 +37.45034452021639737040 +37.32347945318034021511 +37.19707929261260659359 +37.07114205639138759807 +36.94566577097954507281 +36.82064847139394458964 +36.69608820117456815524 +36.57198301235333559589 +36.44833096542399886175 +36.32513012931172369235 +36.20237858134226627271 +36.08007440721176806164 +35.95821570095625929753 +35.83680056492121224210 +35.71582710973162733126 +35.59529345426237512129 +35.47519772560780637605 +35.35553805905213664573 +35.23631259803831738964 +35.11751949414078666223 +34.99915690703301152098 +34.88122300446068635438 +34.76371596220937476573 +34.64663396407794238030 +34.52997520184670321441 +34.41373787525032668100 +34.29792019194658792003 +34.18252036748936717458 +34.06753662529808224235 +33.95296719662977125154 +33.83881032054888748917 +33.72506424390055457252 +33.61172722127966494554 +33.49879751500436952938 +33.38627339508597913209 +33.27415313920153039362 +33.16243503266429826226 +33.05111736839712932579 +32.94019844690222242889 +32.82967657623511570364 +32.71955007197500719940 +32.60981725719793899998 +32.50047646244758681178 +32.39152602570899119883 +32.28296429237932585465 +32.17478961524188463272 +32.06700035443648033606 +31.95959487743414584315 +31.85257155900787751079 +31.74592878120652272855 +31.63966493332641860547 +31.53377841188559571606 +31.42826762059530665283 +31.32313097033411963821 +31.21836687911967445075 +31.11397377208324499520 +31.00995008144147746521 +30.90629424647120870873 +30.80300471348151702955 +30.70007993578819593949 +30.59751837368570548392 +30.49531849442290720731 +30.39347877217439375386 +30.29199768801645475946 +30.19087372989909923149 +30.09010539262118300030 +29.98969117780305637666 +29.88962959386213213975 +29.78991915598573569923 +29.69055838610549713508 +29.59154581287248930721 +29.49287997163080632390 +29.39455940439161452105 +29.29658265980816267415 +29.19894829315043693896 +29.10165486627961684007 +29.00470094762269113176 +28.90808511214726905791 +28.81180594133612871133 +28.71586202316242619759 +28.62025195206481598120 +28.52497432892248596659 +28.43002776103000073249 +28.33541086207270254249 +28.24112225210164695000 +28.14716055750911039013 +28.05352441100453830813 +27.96021245158949852794 +27.86722332453348371928 +27.77455568134860186547 +27.68220817976714087649 +27.59017948371532114038 +27.49846826329078908202 +27.40707319473711933711 +27.31599296042109870086 +27.22522624880804542613 +27.13477175443770761376 +27.04462817790082596048 +26.95479422581555795091 +26.86526861080366046508 +26.77605005146668304405 +26.68713727236277222232 +26.59852900398254504921 +26.51022398272636237948 +26.42222095088086319947 +26.33451865659567303624 +26.24711585386030066047 +26.16001130248066175454 +26.07320376805612482940 +25.98669202195664951205 +25.90047484130004917802 +25.81455100892899778842 +25.72891931338833515497 +25.64357854890126375835 +25.55852751534885669571 +25.47376501824525618645 +25.38928986871692572436 +25.30510088347852715174 +25.22119688481228294563 +25.13757670054450699126 +25.05423916402323314401 +24.97118311409653657051 +24.88840739509069877045 +24.80591085678731388953 +24.72369235440210388788 +24.64175074856195735151 +24.56008490528433085842 +24.47869369595421318309 +24.39757599730345560829 +24.31673069138841469794 +24.23615666556882430882 +24.15585281248532467657 +24.07581803003941445240 +23.99605122137037227503 +23.91655129483559960590 +23.83731716398797928491 +23.75834774755560019344 +23.67964196941952792486 +23.60119875859395577322 +23.52301704920383329522 +23.44509578046549691521 +23.36743389666375136926 +23.29003034713316822035 +23.21288408623530585828 +23.13599407333955682020 +23.05935927280120978367 +22.98297865394201622280 +22.90685119102856504014 +22.83097586325288830267 +22.75535165471064402709 +22.67997755438216245238 +22.60485255611097699102 +22.52997565858463246968 +22.45534586531354293015 +22.38096218461183894988 +22.30682362957593412034 +22.23292921806610777935 +22.15927797268507504214 +22.08586892075944874136 +22.01270109431903421182 +21.93977353007683106512 +21.86708526941069408167 +21.79463535834159770843 +21.72242284751629526340 +21.65044679218575396362 +21.57870625218719240479 +21.50720029192404680884 +21.43592798034608648550 +21.36488839093130565061 +21.29408060166526439616 +21.22350369502323985671 +21.15315675795033101281 +21.08303888184257601779 +21.01314916252748332681 +20.94348670024567482528 +20.87405059963178999283 +20.80483996969570625879 +20.73585392380362080189 +20.66709157965911813903 +20.59855205928473864674 +20.53023448900308878251 +20.46213799941875421950 +20.39426172539945980589 +20.32660480605775532581 +20.25916638473176334401 +20.19194560896845658249 +20.12494163050359929912 +20.05815360524501400619 +19.99158069325298470176 +19.92522205872349871925 +19.85907686996920773481 +19.79314429940156827570 +19.72742352351281880374 +19.66191372285851812762 +19.59661408203928090188 +19.53152378968299984763 +19.46664203842717100201 +19.40196802490081040560 +19.33750094970717370302 +19.27324001740620573742 +19.20918443649675921847 +19.14533341939927524322 +19.08168618243819736335 +19.01824194582424709665 +18.95499993363764090759 +18.89195937381044032577 +18.82911949810976892650 +18.76647954212016600195 +18.70403874522585851992 +18.64179635059531037200 +18.57975160516241786013 +18.51790375961096302149 +18.45625206835622833523 +18.39479578952963123584 +18.33353418496087172684 +18.27246652016118133588 +18.21159206430680299604 +18.15091009022244605831 +18.09041987436447840309 +18.03012069680442763797 +17.97001184121180017428 +17.91009259483877258390 +17.85036224850227526417 +17.79082009656920604357 +17.73146543693870214042 +17.67229757102674980729 +17.61331580374914196341 +17.55451944350607718093 +17.49590780216510665923 +17.43748019504608848251 +17.37923594090425893910 +17.32117436191430215331 +17.26329478365510894378 +17.20559653509252484582 +17.14807894856511083503 +17.09074135976673858295 +17.03358310773184314257 +16.97660353481965600508 +16.91980198669748602924 +16.86317781232654766654 +16.80673036394488306655 +16.75045899705314766948 +16.69436307039816114184 +16.63844194595804992787 +16.58269498892606819140 +16.52712156769623064179 +16.47172105384681017881 +16.41649282212636506983 +16.36143625043737515057 +16.30655071982189596724 +16.25183561444548985264 +16.19729032158305770395 +16.14291423160304006501 +16.08870673795299310882 +16.03466723714439368109 +15.98079512873708907250 +15.92708981532582690477 +15.87355070252360711436 +15.82017719894874829833 +15.76696871620847240081 +15.71392466888532801761 +15.66104447452220682635 +15.60832755360723389515 +15.55577332955996183728 +15.50338122871606394426 +15.45115068031363136924 +15.39908111647791599808 +15.34717197220775020128 +15.29542268536014759661 +15.24383269663702122898 +15.19240144956993177061 +15.14112839050673287034 +15.09001296859686291896 +15.03905463577716261625 +14.98825284675780267207 +14.93760705900810847879 +14.88711673274297631053 +14.83678133090830364438 +14.78660031916758654802 +14.73657316588691479353 +14.68669934212278604946 +14.63697832160647571698 +14.58740958073171611886 +14.53799259853974668033 +14.48872685670651883072 +14.43961183952866633717 +14.39064703390945254569 +14.34183192934566442034 +14.29316601791399499177 +14.24464879425748620179 +14.19627975557195043166 +14.14805840159260341693 +14.09998423458029748190 +14.05205675930868913781 +14.00427548305060199141 +13.95663991556495453494 +13.90914956908329891405 +13.86180395829665101814 +13.81460260034216958047 +13.76754501478999692665 +13.72063072363071967175 +13.67385925126164991639 +13.62723012447436232719 +13.58074287244071420844 +13.53439702670080535540 +13.48819212115028243204 +13.44212769202608726005 +13.39620327789511300409 +13.35041841964064701642 +13.30477266044970363623 +13.25926554580005856110 +13.21389662344788717974 +13.16866544341509026594 +13.12357155797656282914 +13.07861452164755000638 +13.03379389117123388075 +12.98910922550588864510 +12.94456008581259354173 +12.90014603544306659444 +12.85586663992686418112 +12.81172146695925206927 +12.76771008638832682891 +12.72383207020356366002 +12.68008699252260385038 +12.63647442957995892243 +12.59299395971407875550 +12.54964516335525637203 +12.50642762301411003989 +12.46334092326842402088 +12.42038465075223818701 +12.37755839414279002142 +12.33486174414917968534 +12.29229429350034052959 +12.24985563693253709516 +12.20754537117821314496 +12.16536309495338663567 +12.12330840894665939800 +12.08138091580655881785 +12.03958022013054574018 +11.99790592845237569009 +11.95635764923133947946 +11.91493499283977897107 +11.87363757155208965344 +11.83246499953275154837 +11.79141689282494454005 +11.75049286933852954462 +11.70969254883932109124 +11.66901555293676828740 +11.62846150507332332324 +11.58803003051247593191 +11.54772075632717331928 +11.50753331138946222723 +11.46746732635771337527 +11.42752243366685860337 +11.38769826751585867441 +11.34799446385714816188 +11.30841066038559894480 +11.26894649652642499404 +11.22960161342528628836 +11.19037565393592714713 +11.15126826261032455534 +11.11227908568660183164 +11.07340777107880569474 +11.03465396836517165013 +10.99601732877788151654 +10.95749750519143361771 +10.91909415211253886469 +10.88080692566858154180 +10.84263548359723827730 +10.80457948523523192819 +10.76663859150771784812 +10.72881246491787621267 +10.69110076953577248560 +10.65350317098798882398 +10.61601933644642237198 +10.57864893461802680008 +10.54139163573473503277 +10.50424711154151324877 +10.46721503528724817045 +10.43029508171343167078 +10.39348692704386678543 +10.35679024897399536087 +10.32020472666075150414 +10.28373004071205620846 +10.24736587317652869444 +10.21111190753290109967 +10.17496782868001403699 +10.13893332292598969957 +10.10300807797845124014 +10.06719178293418970327 +10.03148412826892688088 +9.99588480582706040423 +9.96039350881153140449 +9.92500993177359802644 +9.88973377060275105066 +9.85456472251688531117 +9.81950248605217801412 +9.78454676105256915264 +9.74969724866047648959 +9.71495365130618182548 +9.68031567269857617930 +9.64578301781432756457 +9.61135539288891571630 +9.57703250540628836518 +9.54281406408902377336 +9.50869977888848261216 +9.47468936097505576299 +9.44078252272860396488 +9.40697897772853508513 +9.37327844074409988195 +9.33968062772465934529 +9.30618525579000355208 +9.27279204322080907730 +9.23950070944900581082 +9.20631097504825568478 +9.17322256172422889620 +9.14023519230520520296 +9.10734859073229507942 +9.07456248205018667363 +9.04187659239774532693 +9.00929064899855625015 +8.97680438015105686134 +8.94441751521991434970 +8.91212978462575300398 +8.87994091983676625546 +8.84785065335858433855 +8.81585871872540671745 +8.78396485049084319030 +8.75216878421804267418 +8.72047025647126794468 +8.68886900480589652318 +8.65736476776005225986 +8.62595728484475010589 +8.59464629653532341536 +8.56343154426165931170 +8.53231277039987467958 +8.50128971826251955690 +8.47036213209016608516 +8.43952975704201513452 +8.40879233918707491569 +8.37814962549483333021 +8.34760136382694817314 +8.31714730292758197550 +8.28678719241522010464 +8.25652078277331114009 +8.22634782534128561338 +8.19626807230642562274 +8.16628127669416237211 +8.13638719235975393929 +8.10658557397975165770 +8.07687617704266003216 +8.04725875784081168263 +8.01773307346088515146 +7.98829888177617863931 +7.95895594143695994660 +7.92970401186277840111 +7.90054285323308214117 +7.87147222647914279747 +7.84249189327493834156 +7.81360161602920300083 +7.78480115787626214541 +7.75609028266817279729 +7.72746875496568552677 +7.69893634003021709589 +7.67049280381476883406 +7.64213791295626965194 +7.61387143476643579731 +7.58569313722407567724 +7.55760278896597537113 +7.52960015927931181068 +7.50168501809252852297 +7.47385713596797796043 +7.44611628409241887994 +7.41846223426991713268 +7.39089475891295855092 +7.36341363103434876081 +7.33601862423889272691 +7.30870951271533009219 +7.28148607122818969373 +7.25434807510966450650 +7.22729530025127608894 +7.20032752309605861285 +7.17344452063001902786 +7.14664607037442234372 +7.11993195037771364753 +7.09330193920742946290 +7.06675581594214019532 +7.04029336016347695448 +7.01391435194803669617 +6.98761857185952628413 +6.96140580094091632191 +6.93527582070653814128 +6.90922841313373936600 +6.88326336065562927047 +6.85738044615259934034 +6.83157945294510060563 +6.80586016478512156880 +6.78022236584899218315 +6.75466584072930853466 +6.72919037442714085273 +6.70379575234422464547 +6.67848176027529039089 +6.65324818440049536861 +6.62809481127747890383 +6.60302142783385281888 +6.57802782135940677932 +6.55311377949839712898 +6.52827909024202757138 +6.50352354192092629859 +6.47884692319739841082 +6.45424902305781067469 +6.42972963080527737390 +6.40528853605165693352 +6.38092552871034968121 +6.35664039898878918677 +6.33243293738106771684 +6.30830293465987335111 +6.28425018186973005641 +6.26027447031907957609 +6.23637559157296639256 +6.21255333744590743095 +6.18880749999402812733 +6.16513787150815328886 +6.14154424450582325790 +6.11802641172475336617 +6.09458416611468045687 +6.07121730083077260076 +6.04792560922579891525 +6.02470888484323996437 +6.00156692140958547554 +5.97849951282755753823 +5.95550645316841542609 +5.93258753666522320458 +5.90974255770523093645 +5.88697131082305435967 +5.86427359069299036776 +5.84164919212249600378 +5.81909791004442844553 +5.79661953951059860657 +5.77421387568387167732 +5.75188071383186727559 +5.72961984931922962971 +5.70743107760103818293 +5.68531419421539041537 +5.66326899477674405858 +5.64129527496852567481 +5.61939283053656346567 +5.59756145728145337870 +5.57580095105238093822 +5.55411110773941363306 +5.53249172326721350146 +5.51094259358762350587 +5.48946351467313586880 +5.46805428250948999391 +5.44671469308945610521 +5.42544454240523421618 +5.40424362644235145581 +5.38311174117221646895 +5.36204868254571298536 +5.34105424648595494830 +5.32012822888201863947 +5.29927042558162586516 +5.27848063238497111627 +5.25775864503717649256 +5.23710425922250077946 +5.21651727055678193778 +5.19599747458128735644 +5.17554466675557378608 +5.15515864245124966203 +5.13483919694497537023 +5.11458612541205592805 +5.09439922291944480293 +5.07427828441942985194 +5.05422310474293823290 +5.03423347859298964124 +5.01430920053793371949 +4.99445006500510757519 +4.97465586627397904351 +4.95492639846981841600 +4.93526145555716677649 +4.91566083133328746158 +4.89612431942163262022 +4.87665171326536128760 +4.85724280612072334407 +4.83789739105076854742 +4.81861526091889658119 +4.79939620838244085377 +4.78024002588592189511 +4.76114650565510100222 +4.74211543969009863275 +4.72314661975954841466 +4.70423983739356188494 +4.68539488387796154711 +4.66661155024767460020 +4.64788962728035315308 +4.62922890549002552518 +4.61062917514020753629 +4.59209022663705734857 +4.57361185114222568160 +4.55519384066701071845 +4.53683598806882315557 +4.51853808704783332928 +4.50029993214360324316 +4.48212131873169106200 +4.46400204302031600179 +4.44594190204681005696 +4.42794069367450848773 +4.40999821658904878063 +4.39211427029530998567 +4.37428865511413711431 +4.35652117217868184440 +4.33881162343158699457 +4.32115981162132278826 +4.30356554029891924529 +4.28602861381483890568 +4.26854883731575718286 +4.25112601674120060835 +4.23375995882040356832 +4.21645047106900605627 +4.19919736178581715080 +4.18200044004960247435 +4.16485951571620738321 +4.14777439941494918685 +4.13074490254580073412 +4.11377083727588388484 +4.09685201653668684685 +4.07998825402062337275 +4.06317936417819591810 +4.04642516221464010329 +4.02972546408708254262 +4.01308008650106629034 +3.99648884690794803376 +3.97995156350134271506 +3.96346805521462597355 +3.94703814171723355031 +3.93066164341228230228 +3.91433838143302592627 +3.89806817764023350037 +3.88185085461871937085 +3.86568623567496372218 +3.84957414483366155977 +3.83351440683490052308 +3.81750684713114818436 +3.80155129188450358058 +3.78564756796326129518 +3.76979550293976650721 +3.75399492508676724256 +3.73824566337486929868 +3.72254754746950533573 +3.70690040772821305382 +3.69130407519738401589 +3.67575838160998769055 +3.66026315938213198109 +3.64481824161056877642 +3.62942346206962040966 +3.61407865520867588316 +3.59878365614883088952 +3.58353830068085166261 +3.56834242526148059937 +3.55319586701142142715 +3.53809846371210445781 +3.52305005380314728569 +3.50805047637920619508 +3.49309957118790581632 +3.47819717862642985295 +3.46334313973919227791 +3.44853729621486682078 +3.43377949038392538128 +3.41906956521554139528 +3.40440736431544843654 +3.38979273192279340066 +3.37522551290764560861 +3.36070555276831139935 +3.34623269762852126874 +3.33180679423504999548 +3.31742768995507253393 +3.30309523277318595191 +3.28880927128913658208 +3.27456965471505290211 +3.26037623287272149142 +3.24622885619139189828 +3.23212737570472352644 +3.21807164304855142234 +3.20406151045813736289 +3.19009683076566785687 +3.17617745739761048185 +3.16230324437229226575 +3.14847404629747007476 +3.13468971836748266924 +3.12095011636097785512 +3.10725509663837140550 +3.09360451613915499181 +3.07999823237970105083 +3.06643610345069017598 +3.05291798801451275125 +3.03944374530283978331 +3.02601323511433095703 +3.01262631781184930801 +2.99928285432041441538 +2.98598270612453697836 +2.97272573526585226489 +2.95951180434068339409 +2.94634077649766634721 +2.93321251543517336202 +2.92012688539923459530 +2.90708375118108364177 +2.89408297811452941417 +2.88112443207374191445 +2.86820797947108729886 +2.85533348725429103609 +2.84250082290470018620 +2.82970985443447808905 +2.81696045038452380638 +2.80425247982200964714 +2.79158581233818514633 +2.77896031804590348813 +2.76637586757753739519 +2.75383233208263566993 +2.74132958322552466868 +2.72886749318308741152 +2.71644593464259775928 +2.70406478079921042124 +2.69172390535415706481 +2.67942318251197519885 +2.66716248697868207884 +2.65494169395926915556 +2.64276067915576362566 +2.63061931876456966961 +2.61851748947485685193 +2.60645506846588848049 +2.59443193340502542554 +2.58244796244547014652 +2.57050303422421055899 +2.55859702785956555360 +2.54672982294951033566 +2.53490129956898524455 +2.52311133826816647030 +2.51135982007002578342 +2.49964662646846402794 +2.48797163942581001095 +2.47633474137119069525 +2.46473581519807138918 +2.45317474426220405448 +2.44165141237957339371 +2.43016570382437402387 +2.41871750332662127647 +2.40730669607066571913 +2.39593316769243136477 +2.38459680427784492807 +2.37329749236053766381 +2.36203511891993533922 +2.35080957137897694764 +2.33962073760247912801 +2.32846850589471099369 +2.31735276499759867974 +2.30627340408855285858 +2.29523031277870881439 +2.28422338111049860743 +2.27325249955613850616 +2.26231755901529840713 +2.25141845081328284550 +2.24055506669896198346 +2.22972729884288023428 +2.21893503983507978106 +2.20817818268354981726 +2.19745662081191106552 +2.18677024805753283943 +2.17611895866978866110 +2.16550264730784869371 +2.15492120903892159234 +2.14437453933634047942 +2.13386253407759829415 +2.12338508954243465610 +2.11294210241095781200 +2.10253346976164179338 +2.09215908906957848146 +2.08181885820468215442 +2.07151267542953654299 +2.06124043939760737132 +2.05100204915159878283 +2.04079740412120314019 +2.03062640412165684722 +2.02048894935161715836 +2.01038494039141379943 +2.00031427820122376104 +1.99027686411919546572 +1.98027259985959358524 +1.97030138751108419015 +1.96036312953494529232 +1.95045772876311973576 +1.94058508839641286059 +1.93074511200292708857 +1.92093770351588966072 +1.91116276723226841128 +1.90142020781075071767 +1.89170993027001510534 +1.88203183998705636526 +1.87238584269527397197 +1.86277184448273191997 +1.85318975179055267510 +1.84363947141113526662 +1.83412091048631231693 +1.82463397650560144037 +1.81517857730469578392 +1.80575462106338546775 +1.79636201630420533348 +1.78700067189054823125 +1.77767049702493151742 +1.76837140124735503477 +1.75910329443359714219 +1.74986608679341504313 +1.74065968886904598456 +1.73148401153352171633 +1.72233896598879043793 +1.71322446376413539681 +1.70414041671469851380 +1.69508673701949108548 +1.68606333717999401500 +1.67707013001846383382 +1.66810702867622473455 +1.65917394661199990580 +1.65027079760044181889 +1.64139749573032767138 +1.63255395540302039592 +1.62374009133083840872 +1.61495581853546421591 +1.60620105234627330582 +1.59747570839881780635 +1.58877970263314005628 +1.58011295129221873701 +1.57147537092039635276 +1.56286687836183113554 +1.55428739075876776177 +1.54573682555014113582 +1.53721510046991416409 +1.52872213354547925590 +1.52025784309623257506 +1.51182214773189538271 +1.50341496635100058121 +1.49503621813931708573 +1.48668582256841652622 +1.47836369939402945128 +1.47006976865454674908 +1.46180395066952550920 +1.45356616603808941335 +1.44535633563751497732 +1.43717438062162128354 +1.42902022241934067992 +1.42089378273317823442 +1.41279498353768517838 +1.40472374707803537852 +1.39667999586845836824 +1.38866365269085800804 +1.38067464059317535074 +1.37271288288808657185 +1.36477830315138271011 +1.35687082522062851808 +1.34899037319362280485 +1.34113687142691473397 +1.33331024453445112776 +1.32551041738603569975 +1.31773731510593083982 +1.30999086307141920926 +1.30227098691126830232 +1.29457761250450964496 +1.28691066597876235811 +1.27927007370901235639 +1.27165576231605936819 +1.26406765866518533414 +1.25650568986469868271 +1.24896978326451413288 +1.24145986645482642174 +1.23397586726463770468 +1.22651771376034535166 +1.21908533424447917959 +1.21167865725410606181 +1.20429761155973547027 +1.19694212616361972401 +1.18961213029863821511 +1.18230755342678017783 +1.17502832523783196095 +1.16777437564805741665 +1.16054563479871553078 +1.15334203305484495061 +1.14616350100384067900 +1.13900996945408339300 +1.13188136943371553400 +1.12477763218910653542 +1.11769868918377812683 +1.11064447209679428852 +1.10361491282167967221 +1.09660994346492679519 +1.08962949634477701544 +1.08267350398986650362 +1.07574189913789197703 +1.06883461473434415723 +1.06195158393124877705 +1.05509274008572528913 +1.04825801675883778508 +1.04144734771417746266 +1.03466066691672908817 +1.02789790853141926874 +1.02115900692196315269 +1.01444389664950085361 +1.00775251247139419064 +1.00108478933990907578 +0.99444066240094930453 +0.98782006699283797513 +0.98122293864503296046 +0.97464921307683927143 +0.96809882619617182442 +0.96157171409840669352 +0.95506781306495758255 +0.94858705956219147026 +0.94212939024011499445 +0.93569474193113832960 +0.92928305164888735934 +0.92289425658694790311 +0.91652829411762581913 +0.91018510179079237243 +0.90386461733252509987 +0.89756677864414657897 +0.89129152380072285133 +0.88503879105007021710 +0.87880851881144506077 +0.87260064567438710981 +0.86641511039754504075 +0.86025185190739528185 +0.85411080929715310628 +0.84799192182553773112 +0.84189512891556694818 +0.83582037015346000164 +0.82976758528733707276 +0.82373671422620298177 +0.81772769703860670454 +0.81174047395161841312 +0.80577498534958547083 +0.79983117177300588896 +0.79390897391737536015 +0.78800833263197334055 +0.78212918891879468219 +0.77627148393139688842 +0.77043515897365399958 +0.76462015549876305442 +0.75882641510797066431 +0.75305387954955094187 +0.74730249071757604007 +0.74157219065090762555 +0.73586292153191912302 +0.73017462568551061430 +0.72450724557788859226 +0.71886072381555321531 +0.71323500314409893353 +0.70763002644711081590 +0.70204573674512216197 +0.69648207719443289143 +0.69093899108606715576 +0.68541642184467743704 +0.67991431302734151032 +0.67443260832266649363 +0.66897125154948278158 +0.66353018665595009384 +0.65810935771832990149 +0.65270870893999721751 +0.64732818465031349842 +0.64196772930356504894 +0.63662728747791175188 +0.63130680387430915257 +0.62600622331538857690 +0.62072549074449956397 +0.61546455122451737552 +0.61022334993696414340 +0.60500183218075576086 +0.59979994337128783588 +0.59461762903933801372 +0.58945483483001104297 +0.58431150650175722738 +0.57918758992521701678 +0.57408303108230540612 +0.56899777606510193451 +0.56393177107482228561 +0.55888496242086282972 +0.55385729651964199505 +0.54884871989368855250 +0.54385917917059034554 +0.53888862108196466938 +0.53393699246242709577 +0.52900424024861358863 +0.52409031147815476892 +0.51919515328863630188 +0.51431871291666575452 +0.50946093769677991414 +0.50462177506054473053 +0.49980117253544098510 +0.49499907774399420912 +0.49021543840265607850 +0.48545020232094249213 +0.48070331740034394352 +0.47597473163337389312 +0.47126439310261208915 +0.46657224997968954616 +0.46189825052433541863 +0.45724234308338790322 +0.45260447608980386436 +0.44798459806177093334 +0.44338265760159528694 +0.43879860339490395216 +0.43423238420953458316 +0.42968394889468874975 +0.42515324637991480650 +0.42064022567412684417 +0.41614483586475065069 +0.41166702611668548606 +0.40720674567135267674 +0.40276394384584079944 +0.39833857003182343570 +0.39393057369480499741 +0.38953990437297092431 +0.38516651167641718922 +0.38081034528610835377 +0.37647135495306194297 +0.37214949049728890396 +0.36784470180693601415 +0.36355693883737844052 +0.35928615161027105396 +0.35503229021261462073 +0.35079530479585468994 +0.34657514557494945029 +0.34237176282754006040 +0.33818510689289132953 +0.33401512817111589371 +0.32986177712218645031 +0.32572500426509526372 +0.32160476017688544026 +0.31750099549182708714 +0.31341366090041961057 +0.30934270714861605844 +0.30528808503683646514 +0.30124974541910048975 +0.29722763920215905520 +0.29322171734462348924 +0.28923193085601067720 +0.28525823079593842779 +0.28130056827316807233 +0.27735889444484157496 +0.27343316051546306955 +0.26952331773614501831 +0.26562931740366507727 +0.26175111085960772739 +0.25788864948954270950 +0.25404188472211586269 +0.25021076802817870943 +0.24639525091996333783 +0.24259528495018267691 +0.23881082171119830115 +0.23504181283420824689 +0.23128820998829366373 +0.22754996487962880769 +0.22382702925064526522 +0.22011935487918735110 +0.21642689357759026247 +0.21274959719190841834 +0.20908741760109805785 +0.20544030671609270211 +0.20180821647903146587 +0.19819109886240146579 +0.19458890586818811119 +0.19100158952712262272 +0.18742910189771386231 +0.18387139506558175528 +0.18032842114247765730 +0.17680013226560251094 +0.17328648059666129644 +0.16978741832112848043 +0.16630289764739022984 +0.16283287080595698626 +0.15937729004861825310 +0.15593610764765009069 +0.15250927589498011749 +0.14909674710142983844 +0.14569847359588250502 +0.14231440772443651466 +0.13894450184964590722 +0.13558870834975594866 +0.13224697961780521038 +0.12891926806418882445 +0.12560552617021522592 +0.12230570656116336592 +0.11901976201549462064 +0.11574764546443649937 +0.11248930999139464260 +0.10924470883149571543 +0.10601379537111199625 +0.10279652314728171536 +0.09959284584731471768 +0.09640271730819173490 +0.09322609151612935852 +0.09006292260612240586 +0.08691316486130810925 +0.08377677271271159742 +0.08065370073848945887 +0.07754390366367849841 +0.07444733635955638751 +0.07136395384321385371 +0.06829371127712473288 +0.06523656396854136952 +0.06219246736916411028 +0.05916137707454408728 +0.05614324882366601016 +0.05313803849850472216 +0.05014570212346302486 +0.04716619586502609346 +0.04419947603113642126 +0.04124549907091006051 +0.03830422157401116440 +0.03537560027031150245 +0.03245959202934081689 +0.02955615385985960533 +0.02666524290943977221 +0.02378681646396064997 +0.02092083194715501845 +0.01806724692017725592 +0.01522601908113094087 +0.01239710626466902950 +0.00958046644147678324 +0.00677605771786358307 +0.00398383833530616821 +0.00120376667004638100 +-0.00156419876744263520 +-0.00432009933278203542 +-0.00706397624823543006 +-0.00979587060305073948 +-0.01251582335397210741 +-0.01522387532563447034 +-0.01792006721099659305 +-0.02060443957180051189 +-0.02327703283894178021 +-0.02593788731297356695 +-0.02858704316449516697 +-0.03122454043455262854 +-0.03385041903509006939 +-0.03646471874939912622 +-0.03906747923250309534 +-0.04165874001155958628 +-0.04423854048632793606 +-0.04680691992956353964 +-0.04936391748739366730 +-0.05190957217982962446 +-0.05444392290106098153 +-0.05696700841995743642 +-0.05947886738040953497 +-0.06197953830180393670 +-0.06446905957939044751 +-0.06694746948468353887 +-0.06941480616585349317 +-0.07187110764819920616 +-0.07431641183446550947 +-0.07675075650530546745 +-0.07917417931960950284 +-0.08158671781497751907 +-0.08398840940807239530 +-0.08637929139502827092 +-0.08875940095180953615 +-0.09112877513466594026 +-0.09348745088044209395 +-0.09583546500706885407 +-0.09817285421382720978 +-0.10049965508184756369 +-0.10281590407442217638 +-0.10512163753742814754 +-0.10741689169966583961 +-0.10970170267329171221 +-0.11197610645416224140 +-0.11424013892222927014 +-0.11649383584190589613 +-0.11873723286242815478 +-0.12097036551826791106 +-0.12319326922949774827 +-0.12540597930210645172 +-0.12760853092845811374 +-0.12980095918756545670 +-0.13198329904556924119 +-0.13415558535598773315 +-0.13631785286017905623 +-0.13847013618761275233 +-0.14061246985633521489 +-0.14274488827326337059 +-0.14486742573452937588 +-0.14698011642592240245 +-0.14908299442314557059 +-0.15117609369225687432 +-0.15325944808998118174 +-0.15533309136404252482 +-0.15739705715360799432 +-0.15945137898951941557 +-0.16149609029474054633 +-0.16353122438464182120 +-0.16555681446739253793 +-0.16757289364429872602 +-0.16957949491010682053 +-0.17157665115342835005 +-0.17356439515698604548 +-0.17554275959805210050 +-0.17751177704872680985 +-0.17947147997628587479 +-0.18142190074354788698 +-0.18336307160916195946 +-0.18529502472803049962 +-0.18721779215153122600 +-0.18913140582794707445 +-0.19103589760273467779 +-0.19293129921890370126 +-0.19481764231732984216 +-0.19669495843704906668 +-0.19856327901566192540 +-0.20042263538956769930 +-0.20227305879440529712 +-0.20411458036525056969 +-0.20594723113703702899 +-0.20777104204484417305 +-0.20958604392420171481 +-0.21139226751144571392 +-0.21318974344401100951 +-0.21497850226076295477 +-0.21675857440232867979 +-0.21852999021136415569 +-0.22029277993293666649 +-0.22204697371475282108 +-0.22379260160759861797 +-0.22552969356551105795 +-0.22725827944619389487 +-0.22897838901125933120 +-0.23069005192658134651 +-0.23239329776260334026 +-0.23408815599460125467 +-0.23577465600302294213 +-0.23745282707380005460 +-0.23912269839864266907 +-0.24078429907533391297 +-0.24243765810802209160 +-0.24408280440755494856 +-0.24571976679175877578 +-0.24734857398574316978 +-0.24896925462216304425 +-0.25058183724160032479 +-0.25218635029275199289 +-0.25378282213281466717 +-0.25537128102774170335 +-0.25695175515250689990 +-0.25852427259146554261 +-0.26008886133857000988 +-0.26164554929771965952 +-0.26319436428302539488 +-0.26473533401906462759 +-0.26626848614125603332 +-0.26779384819603885282 +-0.26931144764124426150 +-0.27082131184630986453 +-0.27232346809262492071 +-0.27381794357379035665 +-0.27530476539586473672 +-0.27678396057768550609 +-0.27825555605114415947 +-0.27971957866145158444 +-0.28117605516742993910 +-0.28262501224175445858 +-0.28406647647127730716 +-0.28550047435727277101 +-0.28692703231571697886 +-0.28834617667756068382 +-0.28975793368898583591 +-0.29116232951171738819 +-0.29255939022326388210 +-0.29394914181716502721 +-0.29533161020331738511 +-0.29670682120817393201 +-0.29807480057509455618 +-0.29943557396451581099 +-0.30078916695428548067 +-0.30213560503991160333 +-0.30347491363478723558 +-0.30480711807051258377 +-0.30613224359710716760 +-0.30745031538329886667 +-0.30876135851677649624 +-0.31006539800442711741 +-0.31136245877263807325 +-0.31265256566748467204 +-0.31393574345508573620 +-0.31521201682176597236 +-0.31648141037435584266 +-0.31774394864041449749 +-0.31899965606852659361 +-0.32024855702853638473 +-0.32149067581176415942 +-0.32272603663130966511 +-0.32395466362225028290 +-0.32517658084195605328 +-0.32639181227025237941 +-0.32760038180974154765 +-0.32880231328599379692 +-0.32999763044783342325 +-0.33118635696757131592 +-0.33236851644121445659 +-0.33354413238877494985 +-0.33471322825443605709 +-0.33587582740685795191 +-0.33703195313939265931 +-0.33818162867028311869 +-0.33932487714301817761 +-0.34046172162639154468 +-0.34159218511493211201 +-0.34271629052899305057 +-0.34383406071505984158 +-0.34494551844597987067 +-0.34605068642115610622 +-0.34714958726684025381 +-0.34824224353630184314 +-0.34932867771011111291 +-0.35040891219635977860 +-0.35148296933085226845 +-0.35255087137738905234 +-0.35361264052794638690 +-0.35466829890297124628 +-0.35571786855151082962 +-0.35676137145153813401 +-0.35779882951010105741 +-0.35883026456359862211 +-0.35985569837798464521 +-0.36087515264898434308 +-0.36188864900231482169 +-0.36289620899393654208 +-0.36389785411024871964 +-0.36489360576832025052 +-0.36588348531608178016 +-0.36686751403258704984 +-0.36784571312821834344 +-0.36881810374488571691 +-0.36978470695624826581 +-0.37074554376795654242 +-0.37170063511782552856 +-0.37265000187609986781 +-0.37359366484561479238 +-0.37453164476204964251 +-0.37546396229413336876 +-0.37639063804382189016 +-0.37731169254656488077 +-0.37822714627148151800 +-0.37913701962154966463 +-0.38004133293388925319 +-0.38094010647988368889 +-0.38183336046544991138 +-0.38272111503120048726 +-0.38360339025269624136 +-0.38448020614062433653 +-0.38535158264099039771 +-0.38621753963534621867 +-0.38707809694099759579 +-0.38793327431118890258 +-0.38878309143531092351 +-0.38962756793911523800 +-0.39046672338488153109 +-0.39130057727166678294 +-0.39212914903547002599 +-0.39295245804943729206 +-0.39377052362405651209 +-0.39458336500737778429 +-0.39539100138519800431 +-0.39619345188122867540 +-0.39699073555734065710 +-0.39778287141372803415 +-0.39856987838911567268 +-0.39935177536094706996 +-0.40012858114556548728 +-0.40090031449844165667 +-0.40166699411432016387 +-0.40242863862746380832 +-0.40318526661178499815 +-0.40393689658108511420 +-0.40468354698922315293 +-0.40542523623031084812 +-0.40616198263887842712 +-0.40689380449010048579 +-0.40762071999995713734 +-0.40834274732542868991 +-0.40905990456467244965 +-0.40977220975721628804 +-0.41047968088414815702 +-0.41118233586830243986 +-0.41188019257441033094 +-0.41257326880933270496 +-0.41326158232219201150 +-0.41394515080461852241 +-0.41462399189084719886 +-0.41529812315797964839 +-0.41596756212609836689 +-0.41663232625849411228 +-0.41729243296182866318 +-0.41794789958629241511 +-0.41859874342581943063 +-0.41924498171822782711 +-0.41988663164542311401 +-0.42052371033357272001 +-0.42115623485324521491 +-0.42178422221965744532 +-0.42240768939274103699 +-0.42302665327745014867 +-0.42364113072380904512 +-0.42425113852716350715 +-0.42485669342833048967 +-0.42545781211375061082 +-0.42605451121569604123 +-0.42664680731239812417 +-0.42723471692825720769 +-0.42781825653399807585 +-0.42839744254681200175 +-0.42897229133056963279 +-0.42954281919594600181 +-0.43010904240063752013 +-0.43067097714946617204 +-0.43122863959460294714 +-0.43178204583569207431 +-0.43233121192003914901 +-0.43287615384277644548 +-0.43341688754700724573 +-0.43395342892397670287 +-0.43448579381325469484 +-0.43501399800287154918 +-0.43553805722950134083 +-0.43605798717859278746 +-0.43657380348456331642 +-0.43708552173094472604 +-0.43759315745054477853 +-0.43809672612559197313 +-0.43859624318792461706 +-0.43909172401911306105 +-0.43958318395065287820 +-0.44007063826409348328 +-0.44055410219120733073 +-0.44103359091415172966 +-0.44150911956560617844 +-0.44198070322895793849 +-0.44244835693841066959 +-0.44291209567918060630 +-0.44337193438764294084 +-0.44382788795145122762 +-0.44427997120974493939 +-0.44472819895324494643 +-0.44517258592445341225 +-0.44561314681778102509 +-0.44604989627968066879 +-0.44648284890884815113 +-0.44691201925630785752 +-0.44733742182563152046 +-0.44775907107302170829 +-0.44817698140750450442 +-0.44859116719105174287 +-0.44900164273874859644 +-0.44940842231892957903 +-0.44981152015332087624 +-0.45021095041718550700 +-0.45060672723949102281 +-0.45099886470302480435 +-0.45138737684456248234 +-0.45177227765498612078 +-0.45215358107945818888 +-0.45253130101754390768 +-0.45290545132335952028 +-0.45327604580571001502 +-0.45364309822824883112 +-0.45400662230958621635 +-0.45436663172347097062 +-0.45472314009889064357 +-0.45507616102024422977 +-0.45542570802746329406 +-0.45577179461615996425 +-0.45611443423775055450 +-0.45645364029961343899 +-0.45678942616522150155 +-0.45712180515427164318 +-0.45745079054282217212 +-0.45777639556343119320 +-0.45809863340530965203 +-0.45841751721443085854 +-0.45873306009367464942 +-0.45904527510297438164 +-0.45935417525942279227 +-0.45965977353745468559 +-0.45996208286891704375 +-0.46026111614326059573 +-0.46055688620762114116 +-0.46084940586700318121 +-0.46113868788437750723 +-0.46142474498080798817 +-0.46170758983561499544 +-0.46198723508646799552 +-0.46226369332954619917 +-0.46253697711966129669 +-0.46280709897035537947 +-0.46307407135409872634 +-0.46333790670232466447 +-0.46359861740565400101 +-0.46385621581394725910 +-0.46411071423647515255 +-0.46436212494203649159 +-0.46461046015906093398 +-0.46485573207577601806 +-0.46509795284029464835 +-0.46533713456076541970 +-0.46557328930549385371 +-0.46580642910304986826 +-0.46603656594241427147 +-0.46626371177307424087 +-0.46648787850519624065 +-0.46670907800968691737 +-0.46692732211836363021 +-0.46714262262404415704 +-0.46735499128069657448 +-0.46756443980354428502 +-0.46777097986918653172 +-0.46797462311571591531 +-0.46817538114285200956 +-0.46837326551206664993 +-0.46856828774666753334 +-0.46876045933195303883 +-0.46894979171532108486 +-0.46913629630637043721 +-0.46931998447705475241 +-0.46950086756175568592 +-0.46967895685743793477 +-0.46985426362374999032 +-0.47002679908313305113 +-0.47019657442095291744 +-0.47036360078560063291 +-0.47052788928863370499 +-0.47068945100485137800 +-0.47084829697244584557 +-0.47100443819308956961 +-0.47115788563207083861 +-0.47130865021840029350 +-0.47145674284491373429 +-0.47160217436838913763 +-0.47174495560967377727 +-0.47188509735378103560 +-0.47202261035001163991 +-0.47215750531204808693 +-0.47228979291809181085 +-0.47241948381094156506 +-0.47254658859814957506 +-0.47267111785207671648 +-0.47279308211004950069 +-0.47291249187443595847 +-0.47302935761277403737 +-0.47314368975788084759 +-0.47325549870794164642 +-0.47336479482664300944 +-0.47347158844325676341 +-0.47357588985277088156 +-0.47367770931598046635 +-0.47377705705958661486 +-0.47387394327633963753 +-0.47396837812509506893 +-0.47406037173096343684 +-0.47414993418537720871 +-0.47423707554623328875 +-0.47432180583796962337 +-0.47440413505168138597 +-0.47448407314521828804 +-0.47456163004330176314 +-0.47463681563761089821 +-0.47470963978690666751 +-0.47478011231710287587 +-0.47484824302140266061 +-0.47491404166038025947 +-0.47497751796209353170 +-0.47503868162216950077 +-0.47509754230391942897 +-0.47515410963844101344 +-0.47520839322471580823 +-0.47526040262969321271 +-0.47531014738842336520 +-0.47535763700411698407 +-0.47540288094829052934 +-0.47544588866081849421 +-0.47548666955006413382 +-0.47552523299297116965 +-0.47556158833514261541 +-0.47559574489097145022 +-0.47562771194370540018 +-0.47565749874557111676 +-0.47568511451785361333 +-0.47571056845099579657 +-0.47573386970470410429 +-0.47575502740802211310 +-0.47577405065946304363 +-0.47579094852707071173 +-0.47580573004852522168 +-0.47581840423123850092 +-0.47582898005245644057 +-0.47583746645934860142 +-0.47584387236908509689 +-0.47584820666896210373 +-0.47585047821645848343 +-0.47585069583937444904 +-0.47584886833587092259 +-0.47584500447460470474 +-0.47583911299479608736 +-0.47583120260633354759 +-0.47582128198986578527 +-0.47580935979687410953 +-0.47579544464979250940 +-0.47577954514206799441 +-0.47576166983827783419 +-0.47574182727420827321 +-0.47572002595692736149 +-0.47569627436492173400 +-0.47567058094811509594 +-0.47564295412803581087 +-0.47561340229784154765 +-0.47558193382244146052 +-0.47554855703858306404 +-0.47551328025491845786 +-0.47547611175212195489 +-0.47543705978294714676 +-0.47539613257234064614 +-0.47535333831751608313 +-0.47530868518803287559 +-0.47526218132590325460 +-0.47521383484565077326 +-0.47516365383443487369 +-0.47511164635208907869 +-0.47505782043124683556 +-0.47500218407739747128 +-0.47494474526899393974 +-0.47488551195752265466 +-0.47482449206758936544 +-0.47476169349700308997 +-0.47469712411686620923 +-0.47463079177165629075 +-0.47456270427930252742 +-0.47449286943126839367 +-0.47442129499264434900 +-0.47434798870222166789 +-0.47427295827258042493 +-0.47419621139015977196 +-0.47411775571535830220 +-0.47403759888259067168 +-0.47395574850039667858 +-0.47387221215150554521 +-0.47378699739290780490 +-0.47370011175596299369 +-0.47361156274644666819 +-0.47352135784466198309 +-0.47342950450550042030 +-0.47333601015851273219 +-0.47324088220802551508 +-0.47314412803316996392 +-0.47304575498800310873 +-0.47294577040155338921 +-0.47284418157792751369 +-0.47274099579637135493 +-0.47263622031134638890 +-0.47252986235261174031 +-0.47242192912530622806 +-0.47231242781001969711 +-0.47220136556286845808 +-0.47208874951557683319 +-0.47197458677554166018 +-0.47185888442592721637 +-0.47174164952572827936 +-0.47162288910984379031 +-0.47150261018915690103 +-0.47138081975061141282 +-0.47125752475728410751 +-0.47113273214846695947 +-0.47100644883971809485 +-0.47087868172296482028 +-0.47074943766656379696 +-0.47061872351537614723 +-0.47048654609083068179 +-0.47035291219102315363 +-0.47021782859075650363 +-0.47008130204164411126 +-0.46994333927215808933 +-0.46980394698771887896 +-0.46966313187075742208 +-0.46952090058079598567 +-0.46937725975450123039 +-0.46923221600577880164 +-0.46908577592583017291 +-0.46893794608323408069 +-0.46878873302399903800 +-0.46863814327165143059 +-0.46848618332730218583 +-0.46833285966971555103 +-0.46817817875537159900 +-0.46802214701855071599 +-0.46786477087138261810 +-0.46770605670394760356 +-0.46754601088430791656 +-0.46738463975860378152 +-0.46722194965110680487 +-0.46705794686430107676 +-0.46689263767894167989 +-0.46672602835412346778 +-0.46655812512735667097 +-0.46638893421462312983 +-0.46621846181045462076 +-0.46604671408799291932 +-0.46587369719905219467 +-0.46569941727421354516 +-0.46552388042283704417 +-0.46534709273319452283 +-0.46516906027248117184 +-0.46498978908691085410 +-0.46480928520177622332 +-0.46462755462150556740 +-0.46444460332974102368 +-0.46426043728938942712 +-0.46407506244270291251 +-0.46388848471133425910 +-0.46370070999639712017 +-0.46351174417854718035 +-0.46332159311801940360 +-0.46313026265472828635 +-0.46293775860829433633 +-0.46274408677813477775 +-0.46254925294350646148 +-0.46235326286359129666 +-0.46215612227754226948 +-0.46195783690454950143 +-0.46175841244390236628 +-0.46155785457505954517 +-0.46135616895770542589 +-0.46115336123181183137 +-0.46094943701769680588 +-0.46074440191609250528 +-0.46053826150820631469 +-0.46033102135577991243 +-0.46012268700114244968 +-0.45991326396729220738 +-0.45970275775793195683 +-0.45949117385755006149 +-0.45927851773147176928 +-0.45906479482591527885 +-0.45885001056806518083 +-0.45863417036611087152 +-0.45841727960933620345 +-0.45819934366814729643 +-0.45798036789415291770 +-0.45776035762021782816 +-0.45753931816051418568 +-0.45731725481059731786 +-0.45709417284744041643 +-0.45687007752951686035 +-0.45664497409684556839 +-0.45641886777104628825 +-0.45619176375540976265 +-0.45596366723493497730 +-0.45573458337641836735 +-0.45550451732847530018 +-0.45527347422162472990 +-0.45504145916832550167 +-0.45480847726305251300 +-0.45457453358234090057 +-0.45433963318484255067 +-0.45410378111138516299 +-0.45386698238503225822 +-0.45362924201113130618 +-0.45339056497737867391 +-0.45315095625386253575 +-0.45291042079312787694 +-0.45266896353023322597 +-0.45242658938279933789 +-0.45218330325106148582 +-0.45193911001793940496 +-0.45169401454907043281 +-0.45144802169288478222 +-0.45120113628064201228 +-0.45095336312649764166 +-0.45070470702755299763 +-0.45045517276390933947 +-0.45020476509871204529 +-0.44995348877822310962 +-0.44970134853185700363 +-0.44944834907224689990 +-0.44919449509528330822 +-0.44893979128017602598 +-0.44868424228951020449 +-0.44842785276929281180 +-0.44817062734899887344 +-0.44791257064164391455 +-0.44765368724380399934 +-0.44739398173570754658 +-0.44713345868124781957 +-0.44687212262806491614 +-0.44660997810757258053 +-0.44634702963503064543 +-0.44608328170958372327 +-0.44581873881431122175 +-0.44555340541628735140 +-0.44528728596661970585 +-0.44502038490051176733 +-0.44475270663730492871 +-0.44448425558052490070 +-0.44421503611795420952 +-0.44394505262164168924 +-0.44367430944799868264 +-0.44340281093780831156 +-0.44313056141630285945 +-0.44285756519319824376 +-0.44258382656274591893 +-0.44230934980378550092 +-0.44203413917978601200 +-0.44175819893890444501 +-0.44148153331402861799 +-0.44120414652282036183 +-0.44092604276777574990 +-0.44064722623625862674 +-0.44036770110056838723 +-0.44008747151796123731 +-0.43980654163072374629 +-0.43952491556619893709 +-0.43924259743685034607 +-0.43895959134029988169 +-0.43867590135937506446 +-0.43839153156215726614 +-0.43810648600202800607 +-0.43782076871772546145 +-0.43753438373336922540 +-0.43724733505852664273 +-0.43695962668824428476 +-0.43667126260310945574 +-0.43638224676928449863 +-0.43609258313854953881 +-0.43580227564836154786 +-0.43551132822188981519 +-0.43521974476806152277 +-0.43492752918161292630 +-0.43463468534312182934 +-0.43434121711907536234 +-0.43404712836188663605 +-0.43375242290996252059 +-0.43345710458773201168 +-0.43316117720569985439 +-0.43286464456049222882 +-0.43256751043488778086 +-0.43226977859787901748 +-0.43197145280470178319 +-0.43167253679688771806 +-0.43137303430230566903 +-0.43107294903519921547 +-0.43077228469624134766 +-0.43047104497256344358 +-0.43016923353781594264 +-0.42986685405219254852 +-0.42956391016248574033 +-0.42926040550212263280 +-0.42895634369121332652 +-0.42865172833659020979 +-0.42834656303184376336 +-0.42804085135738140222 +-0.42773459688044773719 +-0.42742780315518547063 +-0.42712047372266792600 +-0.42681261211093601826 +-0.42650422183505781737 +-0.42619530639714497955 +-0.42588586928641442020 +-0.42557591397921767928 +-0.42526544393908854991 +-0.42495446261678249122 +-0.42464297345031143394 +-0.42433097986499013210 +-0.42401848527347890672 +-0.42370549307581778509 +-0.42339200665947335223 +-0.42307802939936772768 +-0.42276356465793152317 +-0.42244861578513753786 +-0.42213318611854161455 +-0.42181727898331744520 +-0.42150089769230209003 +-0.42118404554603477985 +-0.42086672583279693960 +-0.42054894182864305252 +-0.42023069679745178595 +-0.41991199399095258116 +-0.41959283664878083142 +-0.41927322799849514601 +-0.41895317125563480420 +-0.41863266962375006441 +-0.41831172629443552635 +-0.41799034444738042415 +-0.41766852725039582683 +-0.41734627785945832557 +-0.41702359941874839189 +-0.41670049506068174150 +-0.41637696790595457585 +-0.41605302106357788805 +-0.41572865763091315650 +-0.41540388069371525503 +-0.41507869332615954239 +-0.41475309859088760334 +-0.41442709953904238729 +-0.41410069921030701057 +-0.41377390063293068012 +-0.41344670682378370508 +-0.41311912078837093043 +-0.41279114552089590795 +-0.41246278400426833466 +-0.41213403921016378284 +-0.41180491409904235178 +-0.41147541162019585226 +-0.41114553471178211241 +-0.41081528630085240028 +-0.41048466930340021808 +-0.41015368662438261849 +-0.40982234115776877692 +-0.40949063578656630380 +-0.40915857338285671618 +-0.40882615680784323287 +-0.40849338891185799083 +-0.40816027253443404321 +-0.40782681050430708014 +-0.40749300563946933007 +-0.40715886074719803700 +-0.40682437862408676876 +-0.40648956205608927084 +-0.40615441381854033853 +-0.40581893667620366761 +-0.40548313338329761146 +-0.40514700668352843227 +-0.40481055931013043558 +-0.40447379398588956256 +-0.40413671342319462676 +-0.40379932032404824982 +-0.40346161738011848685 +-0.40312360727275969863 +-0.40278529267305612782 +-0.40244667624185065380 +-0.40210776062977288126 +-0.40176854847727722087 +-0.40142904241467741722 +-0.40108924506217702444 +-0.40074915902990071448 +-0.40040878691792464172 +-0.40006813131631840941 +-0.39972719480516322177 +-0.39938597995460178858 +-0.39904448932485214740 +-0.39870272546625090682 +-0.39836069091928183461 +-0.39801838821460983064 +-0.39767581987311090286 +-0.39733298840589997836 +-0.39698989631437286985 +-0.39664654609022342857 +-0.39630294021548939654 +-0.39595908116257527709 +-0.39561497139427997949 +-0.39527061336384333723 +-0.39492600951495643313 +-0.39458116228180983853 +-0.39423607408911059968 +-0.39389074735212675771 +-0.39354518447670827630 +-0.39319938785931812797 +-0.39285335988706560073 +-0.39250710293773610760 +-0.39216061937982249486 +-0.39181391157255140989 +-0.39146698186591827318 +-0.39111983260070948276 +-0.39077246610854315945 +-0.39042488471189051857 +-0.39007709072410728934 +-0.38972908644946369083 +-0.38938087418317651744 +-0.38903245621143567323 +-0.38868383481143570224 +-0.38833501225139832602 +-0.38798599079061218964 +-0.38763677267945395588 +-0.38728736015942155646 +-0.38693775546315750669 +-0.38658796081448759674 +-0.38623797842843504702 +-0.38588781051126674893 +-0.38553745926050553283 +-0.38518692686496830424 +-0.38483621550479263362 +-0.38448532735146440098 +-0.38413426456784210972 +-0.38378302930819097050 +-0.38343162371821026824 +-0.38308004993505756497 +-0.38272831008737789871 +-0.38237640629533131698 +-0.38202434067062424061 +-0.38167211531653333356 +-0.38131973232793248130 +-0.38096719379132298888 +-0.38061450178485645157 +-0.38026165837837105910 +-0.37990866563340602857 +-0.37955552560324135047 +-0.37920224033291222154 +-0.37884881185924784708 +-0.37849524221089286824 +-0.37814153340833051020 +-0.37778768746391733213 +-0.37743370638190026911 +-0.37707959215845460177 +-0.37672534678170027655 +-0.37637097223172999438 +-0.37601647048064695822 +-0.37566184349256703801 +-0.37530709322367439285 +-0.37495222162222247020 +-0.37459723062857525067 +-0.37424212217522684343 +-0.37388689818682713240 +-0.37353156058021019792 +-0.37317611126441802005 +-0.37282055214072712390 +-0.37246488510267478089 +-0.37210911203607877074 +-0.37175323481907329715 +-0.37139725532212003456 +-0.37104117540805231501 +-0.37068499693207834778 +-0.37032872174182501768 +-0.36997235167734843220 +-0.36961588857117133600 +-0.36925933424829893159 +-0.36890269052624607982 +-0.36854595921506294598 +-0.36818914211735886965 +-0.36783224102832817737 +-0.36747525773577388586 +-0.36711819402012724201 +-0.36676105165448230627 +-0.36640383240460766556 +-0.36604653802898595716 +-0.36568917027881991944 +-0.36533173089807113865 +-0.36497422162347487040 +-0.36461664418456984915 +-0.36425900030372043714 +-0.36390129169613388838 +-0.36354352006989704149 +-0.36318568712598392478 +-0.36282779455829616833 +-0.36246984405367166371 +-0.36211183729191492864 +-0.36175377594582303065 +-0.36139566168120029754 +-0.36103749615688984775 +-0.36067928102479096530 +-0.36032101792988580069 +-0.35996270851026024307 +-0.35960435439712573613 +-0.35924595721484703370 +-0.35888751858095518932 +-0.35852904010618469322 +-0.35817052339448024467 +-0.35781197004303150200 +-0.35745338164228701583 +-0.35709475977598309493 +-0.35673610602116334611 +-0.35637742194819754804 +-0.35601870912080962883 +-0.35565996909609653986 +-0.35530120342455112636 +-0.35494241365008499800 +-0.35458360131004568183 +-0.35422476793524498850 +-0.35386591504997694235 +-0.35350704417204131813 +-0.35314815681276068293 +-0.35278925447701059426 +-0.35243033866322992509 +-0.35207141086345428160 +-0.35171247256332649478 +-0.35135352524212581926 +-0.35099457037278480875 +-0.35063560942191324132 +-0.35027664384981410661 +-0.34991767511051224959 +-0.34955870465176924755 +-0.34919973391510800154 +-0.34884076433582916765 +-0.34848179734303669219 +-0.34812283435965524214 +-0.34776387680245524070 +-0.34740492608206591241 +-0.34704598360300364934 +-0.34668705076368572238 +-0.34632812895645981310 +-0.34596921956761106376 +-0.34561032397739593902 +-0.34525144356005177393 +-0.34489257968382452946 +-0.34453373371098494626 +-0.34417490699784786257 +-0.34381610089479625048 +-0.34345731674629553787 +-0.34309855589091953210 +-0.34273981966136612964 +-0.34238110938447391396 +-0.34202242638125640584 +-0.34166377196689690088 +-0.34130514745079459926 +-0.34094655413656521636 +-0.34058799332206762811 +-0.34022946629942618646 +-0.33987097435504115550 +-0.33951251876961685561 +-0.33915410081817443100 +-0.33879572177007655220 +-0.33843738288904218203 +-0.33807908543316594896 +-0.33772083065494018506 +-0.33736261980126908133 +-0.33700445411349455593 +-0.33664633482740558001 +-0.33628826317326582229 +-0.33593024037582552843 +-0.33557226765434533533 +-0.33521434622261148117 +-0.33485647728895645558 +-0.33449866205627321047 +-0.33414090172203991802 +-0.33378319747833418152 +-0.33342555051185174264 +-0.33306796200392452256 +-0.33271043313053921819 +-0.33235296506235706415 +-0.33199555896472954242 +-0.33163821599771470261 +-0.33128093731610314121 +-0.33092372406942216490 +-0.33056657740196915274 +-0.33020949845281805102 +-0.32985248835584030092 +-0.32949554823972593276 +-0.32913867922799633359 +-0.32878188243902267685 +-0.32842515898604623947 +-0.32806850997719277929 +-0.32771193651549385129 +-0.32735543969889741023 +-0.32699902062029273520 +-0.32664268036752019952 +-0.32628642002339736106 +-0.32593024066572712227 +-0.32557414336731976823 +-0.32521812919600845415 +-0.32486219921466724658 +-0.32450635448122755466 +-0.32415059604869472798 +-0.32379492496516448785 +-0.32343934227383963620 +-0.32308384901304876280 +-0.32272844621626106676 +-0.32237313491210189964 +-0.32201791612437208334 +-0.32166279087206112175 +-0.32130776016936951622 +-0.32095282502571687022 +-0.32059798644576398274 +-0.32024324542942950167 +-0.31988860297190191417 +-0.31953406006365903114 +-0.31917961769048308618 +-0.31882527683347849923 +-0.31847103846908486613 +-0.31811690356909455568 +-0.31776287310066902991 +-0.31740894802635366556 +-0.31705512930409679440 +-0.31670141788725980625 +-0.31634781472463813223 +-0.31599432076047284657 +-0.31564093693447159428 +-0.31528766418181791709 +-0.31493450343319190354 +-0.31458145561478162433 +-0.31422852164830195054 +-0.31387570245100948618 +-0.31352299893571539124 +-0.31317041201080303425 +-0.31281794258024236965 +-0.31246559154360603605 +-0.31211335979608439972 +-0.31176124822849710094 +-0.31140925772731697929 +-0.31105738917467146143 +-0.31070564344837359183 +-0.31035402142192286545 +-0.31000252396452870896 +-0.30965115194112319275 +-0.30929990621237335446 +-0.30894878763470012828 +-0.30859779706028861446 +-0.30824693533710673110 +-0.30789620330891809274 +-0.30754560181529561058 +-0.30719513169163747968 +-0.30684479376918111226 +-0.30649458887501862536 +-0.30614451783210888669 +-0.30579458145929483415 +-0.30544478057131363435 +-0.30509511597881666667 +-0.30474558848837962621 +-0.30439619890251651269 +-0.30404694801969550655 +-0.30369783663435195864 +-0.30334886553690609823 +-0.30300003551376958333 +-0.30265134734736676148 +-0.30230280181614321844 +-0.30195439969458526264 +-0.30160614175322775221 +-0.30125802875867130348 +-0.30091006147359727896 +-0.30056224065677555890 +-0.30021456706308674578 +-0.29986704144352865908 +-0.29951966454523176742 +-0.29917243711147745167 +-0.29882535988170283447 +-0.29847843359152215204 +-0.29813165897273385951 +-0.29778503675334161427 +-0.29743856765755849469 +-0.29709225240582781691 +-0.29674609171483123937 +-0.29640008629750552727 +-0.29605423686305426534 +-0.29570854411696051445 +-0.29536300876100046731 +-0.29501763149325666014 +-0.29467241300812935245 +-0.29432735399635395757 +-0.29398245514500720432 +-0.29363771713752639947 +-0.29329314065371675513 +-0.29294872636977070668 +-0.29260447495827351938 +-0.29226038708822099643 +-0.29191646342502935996 +-0.29157270463054990595 +-0.29122911136308193836 +-0.29088568427738242805 +-0.29054242402467961304 +-0.29019933125268798646 +-0.28985640660561784454 +-0.28951365072418988600 +-0.28917106424564342770 +-0.28882864780375649971 +-0.28848640202884728856 +-0.28814432754779817358 +-0.28780242498405850249 +-0.28746069495766157775 +-0.28711913808523603642 +-0.28677775498001756294 +-0.28643654625186026896 +-0.28609551250724885030 +-0.28575465434931346387 +-0.28541397237783716623 +-0.28507346718927090157 +-0.28473313937674366025 +-0.28439298953007563497 +-0.28405301823579076625 +-0.28371322607712479158 +-0.28337361363404095504 +-0.28303418148323938874 +-0.28269493019817021340 +-0.28235586034904497366 +-0.28201697250284601948 +-0.28167826722333988432 +-0.28133974507108966412 +-0.28100140660346645261 +-0.28066325237465700182 +-0.28032528293567987587 +-0.27998749883439333352 +-0.27964990061551048273 +-0.27931248882060577543 +-0.27897526398813005111 +-0.27863822665342141693 +-0.27830137734871163158 +-0.27796471660314564511 +-0.27762824494278554033 +-0.27729196289062396641 +-0.27695587096659790571 +-0.27661996968759450244 +-0.27628425956746643921 +-0.27594874111703915354 +-0.27561341484412688052 +-0.27527828125353814848 +-0.27494334084708971222 +-0.27460859412361537935 +-0.27427404157897955495 +-0.27393968370608573482 +-0.27360552099488827382 +-0.27327155393240121217 +-0.27293778300271143156 +-0.27260420868698859165 +-0.27227083146349495557 +-0.27193765180759499334 +-0.27160467019176870451 +-0.27127188708561894570 +-0.27093930295588580792 +-0.27060691826645272284 +-0.27027473347835923034 +-0.26994274904981008234 +-0.26961096543618884303 +-0.26927938309006260731 +-0.26894800246119676679 +-0.26861682399656466869 +-0.26828584814035388861 +-0.26795507533398271738 +-0.26762450601610510148 +-0.26729414062262174534 +-0.26696397958669343398 +-0.26663402333874697270 +-0.26630427230648645587 +-0.26597472691490325891 +-0.26564538758628875037 +-0.26531625474023856626 +-0.26498732879366776460 +-0.26465861016081682067 +-0.26433009925326400591 +-0.26400179647993438081 +-0.26367370224710950932 +-0.26334581695843545246 +-0.26301814101493564690 +-0.26269067481501889860 +-0.26236341875448865313 +-0.26203637322655209951 +-0.26170953862183210514 +-0.26138291532837343301 +-0.26105650373165650846 +-0.26073030421460219319 +-0.26040431715758466380 +-0.26007854293843840621 +-0.25975298193246948442 +-0.25942763451246481088 +-0.25910250104870002907 +-0.25877758190895022716 +-0.25845287745849632177 +-0.25812838806013999049 +-0.25780411407420728009 +-0.25748005585855948674 +-0.25715621376860497982 +-0.25683258815730319879 +-0.25650917937517869749 +-0.25618598777032752789 +-0.25586301368842545578 +-0.25554025747274111691 +-0.25521771946413956966 +-0.25489540000109528473 +-0.25457329941969830678 +-0.25425141805366663350 +-0.25392975623435087851 +-0.25360831429074653931 +-0.25328709254949938190 +-0.25296609133491809729 +-0.25264531096898101836 +-0.25232475177134422450 +-0.25200441405935097849 +-0.25168429814804027522 +-0.25136440435015700023 +-0.25104473297615748084 +-0.25072528433421997773 +-0.25040605873025356676 +-0.25008705646790496679 +-0.24976827784857003056 +-0.24944972317139826878 +-0.24913139273330456303 +-0.24881328682897513316 +-0.24849540575087905592 +-0.24817774978927281682 +-0.24786031923221141238 +-0.24754311436555634374 +-0.24722613547298130654 +-0.24690938283598490299 +-0.24659285673389488847 +-0.24627655744387777492 +-0.24596048524094898946 +-0.24564464039797717643 +-0.24532902318569549394 +-0.24501363387270710947 +-0.24469847272549738459 +-0.24438354000843673375 +-0.24406883598379236489 +-0.24375436091173446895 +-0.24344011505034499065 +-0.24312609865562620493 +-0.24281231198150676764 +-0.24249875527985109702 +-0.24218542880046639576 +-0.24187233279111167161 +-0.24155946749750462077 +-0.24124683316332856675 +-0.24093443003024250793 +-0.24062225833788616902 +-0.24031031832389154745 +-0.23999861022388574439 +-0.23968713427150278861 +-0.23937589069838757783 +-0.23906487973420836868 +-0.23875410160665927473 +-0.23844355654147048051 +-0.23813324476241701233 +-0.23782316649132148600 +-0.23751332194806806797 +-0.23720371135060486223 +-0.23689433491495254236 +-0.23658519285521484310 +-0.23627628538358086407 +-0.23596761271033681040 +-0.23565917504387010051 +-0.23535097259068027409 +-0.23504300555538240602 +-0.23473527414071723718 +-0.23442777854755686429 +-0.23412051897491251151 +-0.23381349561994260733 +-0.23350670867795852992 +-0.23320015834243246200 +-0.23289384480500396890 +-0.23258776825548874156 +-0.23228192888188364806 +-0.23197632687037550436 +-0.23167096240534607032 +-0.23136583566938148659 +-0.23106094684327835309 +-0.23075629610604989073 +-0.23045188363493337991 +-0.23014770960539795985 +-0.22984377419114970786 +-0.22954007756414168684 +-0.22923661989457677635 +-0.22893340135091785892 +-0.22863042209989184461 +-0.22832768230650041241 +-0.22802518213402209191 +-0.22772292174402206100 +-0.22742090129635927909 +-0.22711912094918951244 +-0.22681758085897721355 +-0.22651628118049829674 +-0.22621522206684741008 +-0.22591440366944715024 +-0.22561382613805075481 +-0.22531348962075201103 +-0.22501339426398919707 +-0.22471354021255490752 +-0.22441392760959871788 +-0.22411455659663653828 +-0.22381542731355549836 +-0.22351653989862146910 +-0.22321789448848528004 +-0.22291949121818890878 +-0.22262133022117108760 +-0.22232341162927524159 +-0.22202573557275531724 +-0.22172830218028166671 +-0.22143111157894698748 +-0.22113416389427409392 +-0.22083745925022058021 +-0.22054099776918723030 +-0.22024477957202134859 +-0.21994880477802536412 +-0.21965307350496113270 +-0.21935758586905915180 +-0.21906234198502078092 +-0.21876734196602667937 +-0.21847258592374330100 +-0.21817807396832814004 +-0.21788380620843475488 +-0.21758978275122117796 +-0.21729600370235369056 +-0.21700246916601539926 +-0.21670917924490920581 +-0.21641613404026652234 +-0.21612333365185026901 +-0.21583077817796503250 +-0.21553846771545831507 +-0.21524640235972994362 +-0.21495458220473578903 +-0.21466300734299459396 +-0.21437167786559468974 +-0.21408059386219760456 +-0.21378975542104552976 +-0.21349916262896653785 +-0.21320881557138091078 +-0.21291871433230599719 +-0.21262885899436204107 +-0.21233924963877989778 +-0.21204988634540239412 +-0.21176076919269534726 +-0.21147189825774839744 +-0.21118327361628411176 +-0.21089489534266206427 +-0.21060676350988419281 +-0.21031887818960151582 +-0.21003123945211826795 +-0.20974384736639783977 +-0.20945670200007054929 +-0.20916980341943547383 +-0.20888315168946777756 +-0.20859674687382556701 +-0.20831058903485213940 +-0.20802467823358491983 +-0.20773901452975587767 +-0.20745359798180509903 +-0.20716842864687767811 +-0.20688350658083237699 +-0.20659883183825009101 +-0.20631440447243462599 +-0.20603022453541861014 +-0.20574629207797107133 +-0.20546260714960309923 +-0.20517916979856848370 +-0.20489598007187403983 +-0.20461303801528163415 +-0.20433034367331592840 +-0.20404789708926723835 +-0.20376569830519727922 +-0.20348374736194621559 +-0.20320204429913482636 +-0.20292058915517147133 +-0.20263938196725747587 +-0.20235842277139090561 +-0.20207771160237258945 +-0.20179724849380939466 +-0.20151703347812385814 +-0.20123706658655426960 +-0.20095734784916027826 +-0.20067787729483099746 +-0.20039865495128747486 +-0.20011968084508857668 +-0.19984095500163434611 +-0.19956247744517427445 +-0.19928424819880807828 +-0.19900626728449352654 +-0.19872853472305085365 +-0.19845105053416667307 +-0.19817381473639908429 +-0.19789682734718214152 +-0.19762008838283329215 +-0.19734359785855512537 +-0.19706735578843903589 +-0.19679136218547610415 +-0.19651561706155512566 +-0.19624012042747018825 +-0.19596487229292505750 +-0.19568987266654105928 +-0.19541512155585424870 +-0.19514061896732790014 +-0.19486636490635175778 +-0.19459235937725080645 +-0.19431860238328627077 +-0.19404509392666219325 +-0.19377183400853026374 +-0.19349882262899306684 +-0.19322605978710896690 +-0.19295354548089857505 +-0.19268127970734605370 +-0.19240926246240663833 +-0.19213749374100738687 +-0.19186597353705797664 +-0.19159470184344831734 +-0.19132367865205640589 +-0.19105290395375204571 +-0.19078237773840406311 +-0.19051209999487889180 +-0.19024207071104928812 +-0.18997228987379966014 +-0.18970275746902534597 +-0.18943347348164241151 +-0.18916443789558798350 +-0.18889565069382810436 +-0.18862711185835859262 +-0.18835882137021106586 +-0.18809077920945829754 +-0.18782298535521604887 +-0.18755543978564859220 +-0.18728814247797387349 +-0.18702109340846542751 +-0.18675429255245931670 +-0.18648773988435402016 +-0.18622143537762200771 +-0.18595537900480585414 +-0.18568957073752689890 +-0.18542401054648768866 +-0.18515869840147955450 +-0.18489363427138044704 +-0.18462881812416467864 +-0.18436424992690381153 +-0.18409992964577320818 +-0.18383585724605316924 +-0.18357203269213515084 +-0.18330845594752520622 +-0.18304512697484806583 +-0.18278204573584980186 +-0.18251921219140493369 +-0.18225662630151667765 +-0.18199428802532408023 +-0.18173219732110226787 +-0.18147035414627230021 +-0.18120875845739883858 +-0.18094741021019611349 +-0.18068630935953519656 +-0.18042545585944325115 +-0.18016484966310847282 +-0.17990449072288555721 +-0.17964437899030008539 +-0.17938451441604841285 +-0.17912489695000535783 +-0.17886552654122633843 +-0.17860640313795195233 +-0.17834752668761069683 +-0.17808889713682282685 +-0.17783051443140623915 +-0.17757237851637694415 +-0.17731448933595478357 +-0.17705684683356742726 +-0.17679945095185256587 +-0.17654230163266310116 +-0.17628539881706858927 +-0.17602874244536287351 +-0.17577233245706416764 +-0.17551616879091883061 +-0.17526025138490677890 +-0.17500458017624578866 +-0.17474915510139119035 +-0.17449397609604283543 +-0.17423904309514864908 +-0.17398435603290596241 +-0.17372991484276667506 +-0.17347571945744114097 +-0.17322176980890049980 +-0.17296806582838078481 +-0.17271460744638547635 +-0.17246139459269196892 +-0.17220842719635173768 +-0.17195570518569369689 +-0.17170322848833199925 +-0.17145099703116417622 +-0.17119901074037732758 +-0.17094726954145059161 +-0.17069577335916133465 +-0.17044452211758340243 +-0.17019351574009483619 +-0.16994275414937892732 +-0.16969223726742979630 +-0.16944196501555303103 +-0.16919193731437037753 +-0.16894215408382423638 +-0.16869261524317846757 +-0.16844332071102349757 +-0.16819427040527928918 +-0.16794546424319839462 +-0.16769690214136906414 +-0.16744858401571804940 +-0.16720050978151621002 +-0.16695267935337923526 +-0.16670509264527072490 +-0.16645774957050729626 +-0.16621065004176174829 +-0.16596379397106356124 +-0.16571718126980417019 +-0.16547081184874209980 +-0.16522468561800071618 +-0.16497880248707649797 +-0.16473316236483909192 +-0.16448776515953678068 +-0.16424261077879745430 +-0.16399769912963263474 +-0.16375303011844116741 +-0.16350860365101113625 +-0.16326441963252344425 +-0.16302047796755556042 +-0.16277677856008329615 +-0.16253332131348507961 +-0.16229010613054264955 +-0.16204713291344899351 +-0.16180440156380576644 +-0.16156191198262895292 +-0.16131966407035155942 +-0.16107765772682863803 +-0.16083589285133514935 +-0.16059436934257434459 +-0.16035308709867690524 +-0.16011204601720688268 +-0.15987124599516180923 +-0.15963068692897727785 +-0.15939036871452996746 +-0.15915029124713950259 +-0.15891045442157175627 +-0.15867085813204276357 +-0.15843150227221983184 +-0.15819238673522578731 +-0.15795351141363986325 +-0.15771487619950463888 +-0.15747648098432417973 +-0.15723832565906839531 +-0.15700041011417853465 +-0.15676273423956629816 +-0.15652529792461761238 +-0.15628810105819643250 +-0.15605114352864812854 +-0.15581442522379926330 +-0.15557794603096275488 +-0.15534170583694037471 +-0.15510570452802488473 +-0.15486994199000306272 +-0.15463441810815772848 +-0.15439913276727235125 +-0.15416408585163113298 +-0.15392927724502344922 +-0.15369470683074651363 +-0.15346037449160698785 +-0.15322628010992470071 +-0.15299242356753367522 +-0.15275880474578792945 +-0.15252542352556086591 +-0.15229227978724865777 +-0.15205937341077385705 +-0.15182670427558853099 +-0.15159427226067387351 +-0.15136207724454517343 +-0.15113011910525567250 +-0.15089839772039484456 +-0.15066691296709477932 +-0.15043566472203095952 +-0.15020465286142592465 +-0.14997387726105060324 +-0.14974333779622639451 +-0.14951303434183085828 +-0.14928296677229632716 +-0.14905313496161345932 +-0.14882353878333640096 +-0.14859417811058198144 +-0.14836505281603246109 +-0.14813616277193999982 +-0.14790750785012868329 +-0.14767908792199455070 +-0.14745090285851064626 +-0.14722295253022843475 +-0.14699523680728035502 +-0.14676775555938223472 +-0.14654050865583509444 +-0.14631349596552908898 +-0.14608671735694386817 +-0.14586017269815224062 +-0.14563386185682269947 +-0.14540778470022075464 +-0.14518194109521229129 +-0.14495633090826418043 +-0.14473095400545016309 +-0.14450581025244921274 +-0.14428089951454958761 +-0.14405622165665110668 +-0.14383177654326886885 +-0.14360756403853189300 +-0.14338358400618850252 +-0.14315983630960879558 +-0.14293632081178375692 +-0.14271303737533094780 +-0.14248998586249450593 +-0.14226716613514928111 +-0.14204457805480091848 +-0.14182222148258971650 +-0.14160009627929220910 +-0.14137820230532358035 +-0.14115653942073932980 +-0.14093510748523849219 +-0.14071390635816469206 +-0.14049293589850928021 +-0.14027219596491180553 +-0.14005168641566573262 +-0.13983140710871638790 +-0.13961135790166537274 +-0.13939153865177225655 +-0.13917194921595821278 +-0.13895258945080454782 +-0.13873345921255816893 +-0.13851455835713213927 +-0.13829588674010862004 +-0.13807744421674009172 +-0.13785923064195151899 +-0.13764124587034348712 +-0.13742348975619303464 +-0.13720596215345587376 +-0.13698866291576974885 +-0.13677159189645449189 +-0.13655474894851538092 +-0.13633813392464522174 +-0.13612174667722570787 +-0.13590558705832997410 +-0.13568965491972317938 +-0.13547395011286789135 +-0.13525847248892239327 +-0.13504322189874415350 +-0.13482819819289182384 +-0.13461340122162862576 +-0.13439883083492104587 +-0.13418448688244358213 +-0.13397036921357946548 +-0.13375647767742346317 +-0.13354281212278271140 +-0.13332937239817915787 +-0.13311615835185217072 +-0.13290316983175920473 +-0.13269040668557832707 +-0.13247786876071071527 +-0.13226555590428132336 +-0.13205346796314199054 +-0.13184160478387146886 +-0.13162996621278058584 +-0.13141855209591080111 +-0.13120736227903692650 +-0.13099639660767090077 +-0.13078565492706140105 +-0.13057513708219589676 +-0.13036484291780350842 +-0.13015477227835725582 +-0.12994492500807361401 +-0.12973530095091623249 +-0.12952589995059743400 +-0.12931672185057965785 +-0.12910776649407743055 +-0.12889903372405894788 +-0.12869052338324848961 +-0.12848223531412761300 +-0.12827416935893601324 +-0.12806632535967618636 +-0.12785870315811243003 +-0.12765130259577295302 +-0.12744412351395248417 +-0.12723716575371488147 +-0.12703042915589207729 +-0.12682391356108813074 +-0.12661761880968003258 +-0.12641154474182011991 +-0.12620569119743663133 +-0.12600005801623612167 +-0.12579464503770582118 +-0.12558945210111366331 +-0.12538447904551133782 +-0.12517972570973565083 +-0.12497519193240991253 +-0.12477087755194582464 +-0.12456678240654436851 +-0.12436290633419966323 +-0.12415924917269817451 +-0.12395581075962058826 +-0.12375259093234584895 +-0.12354958952805017436 +-0.12334680638370898453 +-0.12314424133609984391 +-0.12294189422180405724 +-0.12273976487720614226 +-0.12253785313849768768 +-0.12233615884167778343 +-0.12213468182255537986 +-0.12193342191675009267 +-0.12173237895969402089 +-0.12153155278663395344 +-0.12133094323263177161 +-0.12113055013256683601 +-0.12093037332113766580 +-0.12073041263286277136 +-0.12053066790208280534 +-0.12033113896296118717 +-0.12013182564948771125 +-0.11993272779547771434 +-0.11973384523457430983 +-0.11953517780025040007 +-0.11933672532581107717 +-0.11913848764439219363 +-0.11894046458896435914 +-0.11874265599233460589 +-0.11854506168714562531 +-0.11834768150587929303 +-0.11815051528085727950 +-0.11795356284424306226 +-0.11775682402804266147 +-0.11756029866410645790 +-0.11736398658413103868 +-0.11716788761965990506 +-0.11697200160208527653 +-0.11677632836264982552 +-0.11658086773244763501 +-0.11638561954242584995 +-0.11619058362338542667 +-0.11599575980598456071 +-0.11580114792073782637 +-0.11560674779801810574 +-0.11541255926805869814 +-0.11521858216095538785 +-0.11502481630666511192 +-0.11483126153501019284 +-0.11463791767567797775 +-0.11444478455822351681 +-0.11425186201206954939 +-0.11405914986650879384 +-0.11386664795070522427 +-0.11367435609369516691 +-0.11348227412438853523 +-0.11329040187157078667 +-0.11309873916390367210 +-0.11290728582992688722 +-0.11271604169805858608 +-0.11252500659659872562 +-0.11233418035372826072 +-0.11214356279751075407 +-0.11195315375589545703 +-0.11176295305671669900 +-0.11157296052769549721 +-0.11138317599644151357 +-0.11119359929045480317 +-0.11100423023712506498 +-0.11081506866373494469 +-0.11062611439746035391 +-0.11043736726537220494 +-0.11024882709443721562 +-0.11006049371151924166 +-0.10987236694338115006 +-0.10968444661668529105 +-0.10949673255799496907 +-0.10930922459377614975 +-0.10912192255039801503 +-0.10893482625413486442 +-0.10874793553116610112 +-0.10856125020758002064 +-0.10837477010937177080 +-0.10818849506244700154 +-0.10800242489262192047 +-0.10781655942562530515 +-0.10763089848709871121 +-0.10744544190259840144 +-0.10726018949759659471 +-0.10707514109748202114 +-0.10689029652756154576 +-0.10670565561306173674 +-0.10652121817912897639 +-0.10633698405083201466 +-0.10615295305316120589 +-0.10596912501103231130 +-0.10578549974928551369 +-0.10560207709268663867 +-0.10541885686593033267 +-0.10523583889363898047 +-0.10505302300036446772 +-0.10487040901058977682 +-0.10468799674873076333 +-0.10450578603913505960 +-0.10432377670608525277 +-0.10414196857379920402 +-0.10396036146643153342 +-0.10377895520807417507 +-0.10359774962275779264 +-0.10341674453445326431 +-0.10323593976707201580 +-0.10305533514446758858 +-0.10287493049043702764 +-0.10269472562872117294 +-0.10251472038300660228 +-0.10233491457692552029 +-0.10215530803405878379 +-0.10197590057793524954 +-0.10179669203203296768 +-0.10161768221978111082 +-0.10143887096456127850 +-0.10126025808970667841 +-0.10108184341850499910 +-0.10090362677419950632 +-0.10072560797998823812 +-0.10054778685902711344 +-0.10037016323442975174 +-0.10019273692926930486 +-0.10001550776657865127 +-0.09983847556935200596 +-0.09966164016054600283 +-0.09948500136308026376 +-0.09930855899983860591 +-0.09913231289367051280 +-0.09895626286739134247 +-0.09878040874378413161 +-0.09860475034559938734 +-0.09842928749555822365 +-0.09825402001635125115 +-0.09807894773064014526 +-0.09790407046105915889 +-0.09772938803021649634 +-0.09755490026069345288 +-0.09738060697504716257 +-0.09720650799581095902 +-0.09703260314549534693 +-0.09685889224658898733 +-0.09668537512155947478 +-0.09651205159285483615 +-0.09633892148290390534 +-0.09616598461411737797 +-0.09599324080888939348 +-0.09582068988959749345 +-0.09564833167860453678 +-0.09547616599825828332 +-0.09530419267089453028 +-0.09513241151883598812 +-0.09496082236439351565 +-0.09478942502986863194 +-0.09461821933755265590 +-0.09444720510972809402 +-0.09427638216866984777 +-0.09410575033664687894 +-0.09393530943592109939 +-0.09376505928874999396 +-0.09359499971738705071 +-0.09342513054408262130 +-0.09325545159108457327 +-0.09308596268063962231 +-0.09291666363499402614 +-0.09274755427639443106 +-0.09257863442708873236 +-0.09240990390932721232 +-0.09224136254536316470 +-0.09207301015745389394 +-0.09190484656786107598 +-0.09173687159885302034 +-0.09156908507270421216 +-0.09140148681169614486 +-0.09123407663811899937 +-0.09106685437427251839 +-0.09089981984246614521 +-0.09073297286501989800 +-0.09056631326426604900 +-0.09039984086254919393 +-0.09023355548222763978 +-0.09006745694567344640 +-0.08990154507527498007 +-0.08973581969343492892 +-0.08957028062257378631 +-0.08940492768512990629 +-0.08923976070355967016 +-0.08907477950033870773 +-0.08890998389796302137 +-0.08874537371894931914 +-0.08858094878583623599 +-0.08841670892118473624 +-0.08825265394757983439 +-0.08808878368763010946 +-0.08792509796396937027 +-0.08776159659925714118 +-0.08759827941617977232 +-0.08743514623745071712 +-0.08727219688581211443 +-0.08710943118403444152 +-0.08694684895491923415 +-0.08678445002129665797 +-0.08662223420603021307 +-0.08646020133201451352 +-0.08629835122217734134 +-0.08613668369948028480 +-0.08597519858691941852 +-0.08581389570752596951 +-0.08565277488436734421 +-0.08549183594054762803 +-0.08533107869920897315 +-0.08517050298353139037 +-0.08501010861673430341 +-0.08484989542207670155 +-0.08468986322285844415 +-0.08453001184242082966 +-0.08437034110414698418 +-0.08421085083146358230 +-0.08405154084784065283 +-0.08389241097679180081 +-0.08373346104187713579 +-0.08357469086670174518 +-0.08341610027491767887 +-0.08325768909022430997 +-0.08309945713636904263 +-0.08294140423714797816 +-0.08278353021640690035 +-0.08262583489804227466 +-0.08246831810600092905 +-0.08231097966428195523 +-0.08215381939693640334 +-0.08199683712806879465 +-0.08184003268183764890 +-0.08168340588245584510 +-0.08152695655419160692 +-0.08137068452136987651 +-0.08121458960837095453 +-0.08105867163963394184 +-0.08090293043965546271 +-0.08074736583299105264 +-0.08059197764425587995 +-0.08043676569812542587 +-0.08028172981933595631 +-0.08012686983268554886 +-0.07997218556303443970 +-0.07981767683530641144 +-0.07966334347448848774 +-0.07950918530563219622 +-0.07935520215385420684 +-0.07920139384433703966 +-0.07904776020232957834 +-0.07889430105314779174 +-0.07874101622217594132 +-0.07858790553486641461 +-0.07843496881674065502 +-0.07828220589339017488 +-0.07812961659047697183 +-0.07797720073373429206 +-0.07782495814896714381 +-0.07767288866205322717 +-0.07752099209894312837 +-0.07736926828566140224 +-0.07721771704830758531 +-0.07706633821305577947 +-0.07691513160615617850 +-0.07676409705393549832 +-0.07661323438279743492 +-0.07646254341922362197 +-0.07631202398977388057 +-0.07616167592108785689 +-0.07601149903988414780 +-0.07586149317296213279 +-0.07571165814720200171 +-0.07556199378956583723 +-0.07541249992709800343 +-0.07526317638692571477 +-0.07511402299625971613 +-0.07496503958239579546 +-0.07481622597271311847 +-0.07466758199467775359 +-0.07451910747584121475 +-0.07437080224384201577 +-0.07422266612640590622 +-0.07407469895134692617 +-0.07392690054656742005 +-0.07377927074005941055 +-0.07363180935990439047 +-0.07348451623427504353 +-0.07333739119143461993 +-0.07319043405973850447 +-0.07304364466763414721 +-0.07289702284366224305 +-0.07275056841645723138 +-0.07260428121474737928 +-0.07245816106735653017 +-0.07231220780320363195 +-0.07216642125130304231 +-0.07202080124076712386 +-0.07187534760080464824 +-0.07173006016072260016 +-0.07158493874992652439 +-0.07143998319792112250 +-0.07129519333431061368 +-0.07115056898879960900 +-0.07100610999119423561 +-0.07086181617140152600 +-0.07071768735943088913 +-0.07057372338539463774 +-0.07042992407950809941 +-0.07028628927209086552 +-0.07014281879356702720 +-0.06999951247446560554 +-0.06985637014542185608 +-0.06971339163717708842 +-0.06957057678057938788 +-0.06942792540658455913 +-0.06928543734625652872 +-0.06914311243076795566 +-0.06900095049140056447 +-0.06885895135954679669 +-0.06871711486670827040 +-0.06857544084449852806 +-0.06843392912464286992 +-0.06829257953897849287 +-0.06815139191945546182 +-0.06801036609813748690 +-0.06786950190720193732 +-0.06772879917894104873 +-0.06758825774576197876 +-0.06744787744018811149 +-0.06730765809485894646 +-0.06716759954253012643 +-0.06702770161607608801 +-0.06688796414848860450 +-0.06674838697287810430 +-0.06660896992247447579 +-0.06646971283062744207 +-0.06633061553080674133 +-0.06619167785660337588 +-0.06605289964173005623 +-0.06591428072002136762 +-0.06577582092543461656 +-0.06563752009205042759 +-0.06549937805407317348 +-0.06536139464583182179 +-0.06522356970178024016 +-0.06508590305649768204 +-0.06494839454469011897 +-0.06481104400118975484 +-0.06467385126095606673 +-0.06453681615907655433 +-0.06439993853076717012 +-0.06426321821137283286 +-0.06412665503636783004 +-0.06399024884135738611 +-0.06385399946207623301 +-0.06371790673439113595 +-0.06358197049430090730 +-0.06344619057793632333 +-0.06331056682156138704 +-0.06317509906157370292 +-0.06303978713450478222 +-0.06290463087702109768 +-0.06276963012592422231 +-0.06263478471815195348 +-0.06250009449077810475 +-0.06236555928101371327 +-0.06223117892620721320 +-0.06209695326384525454 +-0.06196288213155328600 +-0.06182896536709589497 +-0.06169520280837745979 +-0.06156159429344333633 +-0.06142813966047876162 +-0.06129483874781162245 +-0.06116169139391138682 +-0.06102869743739029740 +-0.06089585671700393360 +-0.06076316907165168341 +-0.06063063434037722910 +-0.06049825236236942849 +-0.06036602297696242597 +-0.06023394602363698475 +-0.06010202134202006358 +-0.05997024877188619069 +-0.05983862815315749845 +-0.05970715932590467401 +-0.05957584213034742421 +-0.05944467640685485721 +-0.05931366199594657190 +-0.05918279873829275506 +-0.05905208647471424377 +-0.05892152504618473202 +-0.05879111429382972986 +-0.05866085405892798588 +-0.05853074418291179248 +-0.05840078450736764515 +-0.05827097487403661707 +-0.05814131512481521269 +-0.05801180510175613786 +-0.05788244464706822356 +-0.05775323360311745285 +-0.05762417181242758535 +-0.05749525911768037928 +-0.05736649536171661151 +-0.05723788038753638285 +-0.05710941403829969398 +-0.05698109615732745159 +-0.05685292658810143368 +-0.05672490517426508755 +-0.05659703175962436938 +-0.05646930618814805652 +-0.05634172830396851767 +-0.05621429795138208757 +-0.05608701497485026055 +-0.05595987921899896189 +-0.05583289052862049073 +-0.05570604874867384620 +-0.05557935372428474824 +-0.05545280530074656045 +-0.05532640332352116441 +-0.05520014763823895970 +-0.05507403809070002265 +-0.05494807452687430066 +-0.05482225679290280568 +-0.05469658473509754482 +-0.05457105819994192974 +-0.05444567703409267795 +-0.05432044108437907026 +-0.05419535019780414437 +-0.05407040422154530540 +-0.05394560300295488109 +-0.05382094638956047561 +-0.05369643422906596186 +-0.05357206636935218924 +-0.05344784265847705301 +-0.05332376294467648659 +-0.05319982707636512764 +-0.05307603490213654018 +-0.05295238627076444271 +-0.05282888103120283313 +-0.05270551903258673815 +-0.05258230012423317085 +-0.05245922415564125557 +-0.05233629097649292877 +-0.05221350043665400065 +-0.05209085238617436331 +-0.05196834667528880958 +-0.05184598315441747707 +-0.05172376167416719434 +-0.05160168208533062739 +-0.05147974423888845852 +-0.05135794798600951117 +-0.05123629317805106220 +-0.05111477966655969540 +-0.05099340730327225207 +-0.05087217594011586574 +-0.05075108542920920424 +-0.05063013562286274721 +-0.05050932637357986865 +-0.05038865753405692011 +-0.05026812895718429930 +-0.05014774049604683176 +-0.05002749200392479084 +-0.04990738333429432794 +-0.04978741434082811085 +-0.04966758487739604544 +-0.04954789479806642749 +-0.04942834395710517942 +-0.04930893220897850093 +-0.04918965940835205025 +-0.04907052541009225555 +-0.04895153006926689088 +-0.04883267324114578395 +-0.04871395478120126021 +-0.04859537454510928084 +-0.04847693238874974109 +-0.04835862816820764298 +-0.04824046173977311613 +-0.04812243295994271530 +-0.04800454168541968408 +-0.04788678777311503043 +-0.04776917108014808178 +-0.04765169146384704707 +-0.04753434878175022416 +-0.04741714289160620099 +-0.04730007365137419567 +-0.04718314091922629072 +-0.04706634455354661434 +-0.04694968441293278366 +-0.04683316035619651541 +-0.04671677224236444465 +-0.04660051993067852033 +-0.04648440328059718490 +-0.04636842215179608206 +-0.04625257640416841759 +-0.04613686589782600017 +-0.04602129049310007408 +-0.04590585005054169387 +-0.04579054443092293175 +-0.04567537349523739104 +-0.04556033710470094866 +-0.04544543512075285147 +-0.04533066740505604236 +-0.04521603381949800687 +-0.04510153422619190416 +-0.04498716848747705277 +-0.04487293646591972857 +-0.04475883802431399050 +-0.04464487302568295041 +-0.04453104133327822484 +-0.04441734281058226652 +-0.04430377732130857255 +-0.04419034472940213537 +-0.04407704489904072653 +-0.04396387769463557665 +-0.04385084298083197912 +-0.04373794062251048359 +-0.04362517048478741638 +-0.04351253243301611562 +-0.04340002633278711858 +-0.04328765204992966048 +-0.04317540945051191043 +-0.04306329840084240079 +-0.04295131876747047828 +-0.04283947041718731696 +-0.04272775321702688978 +-0.04261616703426630154 +-0.04250471173642730166 +-0.04239338719127706823 +-0.04228219326682863127 +-0.04217112983134203852 +-0.04206019675332543784 +-0.04194939390153546582 +-0.04183872114497866335 +-0.04172817835291208616 +-0.04161776539484412368 +-0.04150748214053585905 +-0.04139732846000147154 +-0.04128730422350920803 +-0.04117740930158258345 +-0.04106764356500127583 +-0.04095800688480174395 +-0.04084849913227835833 +-0.04073912017898482368 +-0.04062986989673380428 +-0.04052074815759922066 +-0.04041175483391680479 +-0.04030288979828471063 +-0.04019415292356479791 +-0.04008554408288353410 +-0.03997706314963277857 +-0.03986870999747119809 +-0.03976048450032473869 +-0.03965238653238822852 +-0.03954441596812567628 +-0.03943657268227182544 +-0.03932885654983266088 +-0.03922126744608699783 +-0.03911380524658708557 +-0.03900646982715972461 +-0.03889926106390731442 +-0.03879217883320924820 +-0.03868522301172176020 +-0.03857839347638071514 +-0.03847169010440122661 +-0.03836511277327939173 +-0.03825866136079317942 +-0.03815233574500363767 +-0.03804613580425555280 +-0.03794006141717907316 +-0.03783411246269044459 +-0.03772828881999341216 +-0.03762259036857987932 +-0.03751701698823135811 +-0.03741156855901991291 +-0.03730624496130931916 +-0.03720104607575630551 +-0.03709597178331147660 +-0.03699102196522075642 +-0.03688619650302624869 +-0.03678149527856704182 +-0.03667691817398148479 +-0.03657246507170724276 +-0.03646813585448302475 +-0.03636393040534955517 +-0.03625984860765086443 +-0.03615589034503535748 +-0.03605205550145708371 +-0.03594834396117720793 +-0.03584475560876465572 +-0.03574129032909776488 +-0.03563794800736545115 +-0.03553472852906810336 +-0.03543163178001939445 +-0.03532865764634708639 +-0.03522580601449437632 +-0.03512307677122144395 +-0.03502046980360629808 +-0.03491798499904612968 +-0.03481562224525891480 +-0.03471338143028441375 +-0.03461126244248557970 +-0.03450926517054975218 +-0.03440738950349039177 +-0.03430563533064751036 +-0.03420400254168991933 +-0.03410249102661633974 +-0.03400110067575635303 +-0.03389983137977214955 +-0.03379868302965984006 +-0.03369765551675051041 +-0.03359674873271210199 +-0.03349596256955036933 +-0.03339529691961069108 +-0.03329475167557898602 +-0.03319432673048359345 +-0.03309402197769622384 +-0.03299383731093386701 +-0.03289377262425990239 +-0.03279382781208575043 +-0.03269400276917204529 +-0.03259429739063077203 +-0.03249471157192544007 +-0.03239524520887415715 +-0.03229589819764999703 +-0.03219667043478294244 +-0.03209756181716132833 +-0.03199857224203317418 +-0.03189970160700811991 +-0.03180094981005859855 +-0.03170231674952150858 +-0.03160380232409999018 +-0.03150540643286457720 +-0.03140712897525493180 +-0.03130896985108167635 +-0.03121092896052767365 +-0.03111300620414981372 +-0.03101520148288047091 +-0.03091751469802963767 +-0.03081994575128547270 +-0.03072249454471712854 +-0.03062516098077587914 +-0.03052794496229674012 +-0.03043084639250023468 +-0.03033386517499411794 +-0.03023700121377495553 +-0.03014025441323000057 +-0.03004362467813874102 +-0.02994711191367495015 +-0.02985071602540794941 +-0.02975443691930480111 +-0.02965827450173163377 +-0.02956222867945590069 +-0.02946629935964784064 +-0.02937048644988240337 +-0.02927478985814089413 +-0.02917920949281333984 +-0.02908374526269914134 +-0.02898839707701042143 +-0.02889316484537276034 +-0.02879804847782750643 +-0.02870304788483359071 +-0.02860816297726945234 +-0.02851339366643472484 +-0.02841873986405253280 +-0.02832420148227098380 +-0.02822977843366564210 +-0.02813547063124088865 +-0.02804127798843228381 +-0.02794720041910826042 +-0.02785323783757240670 +-0.02775939015856530856 +-0.02766565729726647160 +-0.02757203916929662837 +-0.02747853569071959101 +-0.02738514677804382638 +-0.02729187234822567573 +-0.02719871231867031225 +-0.02710566660723443336 +-0.02701273513222812728 +-0.02691991781241714551 +-0.02682721456702475207 +-0.02673462531573415901 +-0.02664214997869073995 +-0.02654978847650378213 +-0.02645754073024903993 +-0.02636540666147084086 +-0.02627338619218409083 +-0.02618147924487683464 +-0.02608968574251230646 +-0.02599800560853110856 +-0.02590643876685377878 +-0.02581498514188269172 +-0.02572364465850445620 +-0.02563241724209246869 +-0.02554130281850895343 +-0.02545030131410745688 +-0.02535941265573505438 +-0.02526863677073512907 +-0.02517797358694887078 +-0.02508742303271862054 +-0.02499698503688992449 +-0.02490665952881380291 +-0.02481644643834933495 +-0.02472634569586621214 +-0.02463635723224683743 +-0.02454648097888934358 +-0.02445671686770968523 +-0.02436706483114443877 +-0.02427752480215307831 +-0.02418809671422074772 +-0.02409878050136053662 +-0.02400957609811641899 +-0.02392048343956569220 +-0.02383150246132149583 +-0.02374263309953553169 +-0.02365387529090099555 +-0.02356522897265435346 +-0.02347669408257923793 +-0.02338827055900820004 +-0.02329995834082585621 +-0.02321175736747146598 +-0.02312366757894184291 +-0.02303568891579379357 +-0.02294782131914727824 +-0.02286006473068799213 +-0.02277241909267029366 +-0.02268488434791996258 +-0.02259746043983688196 +-0.02251014731239828898 +-0.02242294491016138397 +-0.02233585317826634539 +-0.02224887206243909496 +-0.02216200150899469423 +-0.02207524146483942282 +-0.02198859187747462945 +-0.02190205269499948329 +-0.02181562386611369392 +-0.02172930534012078654 +-0.02164309706693120020 +-0.02155699899706505634 +-0.02147101108165563182 +-0.02138513327245213091 +-0.02129936552182318948 +-0.02121370778275958463 +-0.02112816000887768678 +-0.02104272215442241564 +-0.02095739417427072357 +-0.02087217602393461394 +-0.02078706765956446489 +-0.02070206903795215880 +-0.02061718011653470781 +-0.02053240085339683857 +-0.02044773120727522847 +-0.02036317113756115973 +-0.02027872060430415541 +-0.02019437956821528227 +-0.02011014799067065151 +-0.02002602583371455164 +-0.01994201306006327182 +-0.01985810963310829375 +-0.01977431551691998315 +-0.01969063067625079211 +-0.01960705507653909621 +-0.01952358868391239344 +-0.01944023146519120379 +-0.01935698338789251793 +-0.01927384442023326316 +-0.01919081453113427593 +-0.01910789369022363252 +-0.01902508186784014621 +-0.01894237903503772844 +-0.01885978516358848348 +-0.01877730022598669835 +-0.01869492419545254122 +-0.01861265704593590559 +-0.01853049875211998029 +-0.01844844928942534351 +-0.01836650863401380340 +-0.01828467676279202717 +-0.01820295365341556212 +-0.01812133928429281865 +-0.01803983363458875819 +-0.01795843668422917455 +-0.01787714841390441664 +-0.01779596880507346851 +-0.01771489783996808148 +-0.01763393550159665646 +-0.01755308177374820255 +-0.01747233664099679187 +-0.01739170008870537590 +-0.01731117210303007026 +-0.01723075267092415153 +-0.01715044178014260567 +-0.01707023941924561486 +-0.01699014557760357078 +-0.01691016024540099513 +-0.01683028341364072381 +-0.01675051507414829569 +-0.01667085521957637276 +-0.01659130384340878547 +-0.01651186093996530324 +-0.01643252650440571450 +-0.01635330053273451045 +-0.01627418302180502069 +-0.01619517396932415934 +-0.01611627337385660233 +-0.01603748123482968621 +-0.01595879755253765131 +-0.01588022232814632895 +-0.01580175556369766907 +-0.01572339726211457317 +-0.01564514742720489460 +-0.01556700606366699137 +-0.01548897317709376636 +-0.01541104877397759046 +-0.01533323286171499843 +-0.01525552544861154616 +-0.01517792654388634699 +-0.01510043615767716653 +-0.01502305430104507525 +-0.01494578098597947567 +-0.01486861622540271502 +-0.01479156003317529111 +-0.01471461242410042856 +-0.01463777341392941481 +-0.01456104301936637924 +-0.01448442125807325974 +-0.01440790814867513002 +-0.01433150371076462835 +-0.01425520796490770473 +-0.01417902093264849193 +-0.01410294263651431027 +-0.01402697310002091850 +-0.01395111234767778749 +-0.01387536040499300072 +-0.01379971729847890606 +-0.01372418305565704759 +-0.01364875770506369414 +-0.01357344127625487867 +-0.01349823379981195809 +-0.01342313530734674625 +-0.01334814583150715009 +-0.01327326540598240158 +-0.01319849406550855503 +-0.01312383184587388209 +-0.01304927878392462409 +-0.01297483491756979378 +-0.01290050028578766314 +-0.01282627492863059118 +-0.01275215888723093410 +-0.01267815220380654438 +-0.01260425492166649189 +-0.01253046708521646409 +-0.01245678873996483929 +-0.01238321993252806605 +-0.01230976071063662712 +-0.01223641112314057845 +-0.01216317122001548713 +-0.01209004105236802934 +-0.01201702067244211393 +-0.01194411013362451854 +-0.01187130949045076683 +-0.01179861879861123126 +-0.01172603811495682991 +-0.01165356749750480134 +-0.01158120700544530007 +-0.01150895669914675686 +-0.01143681664016227284 +-0.01136478689123540838 +-0.01129286751630651474 +-0.01122105858051844138 +-0.01114936015022306021 +-0.01107777229298721050 +-0.01100629507759874788 +-0.01093492857407288993 +-0.01086367285365837272 +-0.01079252798884350502 +-0.01072149405336267694 +-0.01065057112220249222 +-0.01057975927160800279 +-0.01050905857908917759 +-0.01043846912342708683 +-0.01036799098468018689 +-0.01029762424419090883 +-0.01022736898459193980 +-0.01015722528981261204 +-0.01008719324508533521 +-0.01001727293695225257 +-0.00994746445327114245 +-0.00987776788322270415 +-0.00980818331731647328 +-0.00973871084739758204 +-0.00966935056665327135 +-0.00960010256961951400 +-0.00953096695218737765 +-0.00946194381160997758 +-0.00939303324650883099 +-0.00932423535688065193 +-0.00925555024410388075 +-0.00918697801094547559 +-0.00911851876156739329 +-0.00905017260153360460 +-0.00898193963781659249 +-0.00891381997880416263 +-0.00884581373430614465 +-0.00877792101556138654 +-0.00871014193524395110 +-0.00864247660747070531 +-0.00857492514780755326 +-0.00850748767327643741 +-0.00844016430236220645 +-0.00837295515501946044 +-0.00830586035267922435 +-0.00823888001825609333 +-0.00817201427615503635 +-0.00810526325227814599 +-0.00803862707403167273 +-0.00797210587033293262 +-0.00790569977161708487 +-0.00783940890984427541 +-0.00777323341850644656 +-0.00770717343263426988 +-0.00764122908880432265 +-0.00757540052514547087 +-0.00750968788134660260 +-0.00744409129866303084 +-0.00737861091992370217 +-0.00731324688953810965 +-0.00724799935350336350 +-0.00718286845941092534 +-0.00711785435645396342 +-0.00705295719543403820 +-0.00698817712876834748 +-0.00692351431049642758 +-0.00685896889628745052 +-0.00679454104344684104 +-0.00673023091092365879 +-0.00666603865931738281 +-0.00660196445088485732 +-0.00653800844954742152 +-0.00647417082089777988 +-0.00641045173220677975 +-0.00634685135243086198 +-0.00628336985221858348 +-0.00622000740391774089 +-0.00615676418158226605 +-0.00609364036097922394 +-0.00603063611959552080 +-0.00596775163664507897 +-0.00590498709307568214 +-0.00584234267157565049 +-0.00577981855658091757 +-0.00571741493428184946 +-0.00565513199262986967 +-0.00559296992134466087 +-0.00553092891192062328 +-0.00546900915763381783 +-0.00540721085354876289 +-0.00534553419652507300 +-0.00528397938522410203 +-0.00522254662011587076 +-0.00516123610348561551 +-0.00510004803944035662 +-0.00503898263391564828 +-0.00497804009468220173 +-0.00491722063135209466 +-0.00485652445538574829 +-0.00479595178009828167 +-0.00473550282066582517 +-0.00467517779413211682 +-0.00461497691941487129 +-0.00455490041731197986 +-0.00449494851050813264 +-0.00443512142358081906 +-0.00437541938300679053 +-0.00431584261716807041 +-0.00425639135635826928 +-0.00419706583278846822 +-0.00413786628059359309 +-0.00407879293583821457 +-0.00401984603652259364 +-0.00396102582258852764 +-0.00390233253592543738 +-0.00384376642037566026 +-0.00378532772174088516 +-0.00372701668778736969 +-0.00366883356825170375 +-0.00361077861484645482 +-0.00355285208126563312 +-0.00349505422319002857 +-0.00343738529829280522 +-0.00337984556624468116 +-0.00332243528871926062 +-0.00326515472939810239 +-0.00320800415397591880 +-0.00315098383016546458 +-0.00309409402770270117 +-0.00303733501835150733 +-0.00298070707590849734 +-0.00292421047620781234 +-0.00286784549712569125 +-0.00281161241858477167 +-0.00275551152255904897 +-0.00269954309307779633 +-0.00264370741622999830 +-0.00258800478016850671 +-0.00253243547511413205 +-0.00247699979335945161 +-0.00242169802927292817 +-0.00236653047930257153 +-0.00231149744197950988 +-0.00225659921792162538 +-0.00220183610983702748 +-0.00214720842252720187 +-0.00209271646289043269 +-0.00203836053992480963 +-0.00198414096473112844 +-0.00193005805051586028 +-0.00187611211259380069 +-0.00182230346839052835 +-0.00176863243744515108 +-0.00171509934141242989 +-0.00166170450406498763 +-0.00160844825129547743 +-0.00155533091111822769 +-0.00150235281367133407 +-0.00144951429121801222 +-0.00139681567814826067 +-0.00134425731097999887 +-0.00129183952836036276 +-0.00123956267106650341 +-0.00118742708200668227 +-0.00113543310622076779 +-0.00108358109088076138 +-0.00103187138529117653 +-0.00098030434088921072 +-0.00092888031124441499 +-0.00087759965205905634 +-0.00082646272116725257 +-0.00077546987853459705 +-0.00072462148625736895 +-0.00067391790856158242 +-0.00062335951180172273 +-0.00057294666445956871 +-0.00052267973714255833 +-0.00047255910258195400 +-0.00042258513563093938 +-0.00037275821326244107 +-0.00032307871456658144 +-0.00027354702074823854 +-0.00022416351512409208 +-0.00017492858311950459 +-0.00012584261226533024 +-0.00007690599219422538 +-0.00002811911463682659 +0.00002051762658208678 +0.00006900383554804056 +0.00011733911426135711 +0.00016552306264196211 +0.00021355527853431807 +0.00026143535771304643 +0.00030916289388815250 +0.00035673747871101959 +0.00040415870178057349 +0.00045142615064967193 +0.00049853941083171933 +0.00054549806580782528 +0.00059230169703383499 +0.00063894988394805059 +0.00068544220397902104 +0.00073177823255379706 +0.00077795754310628999 +0.00082397970708624412 +0.00086984429396813254 +0.00091555087126070887 +0.00096109900451675403 +0.00100648825734316982 +0.00105171819141126671 +0.00109678836646789858 +0.00114169834034607816 +0.00118644766897676632 +0.00123103590640059293 +0.00127546260478002271 +0.00131972731441184327 +0.00136382958374022347 +0.00140776895936975117 +0.00145154498607928607 +0.00149515720683581995 +0.00153860516280905179 +0.00158188839338607680 +0.00162500643618673736 +0.00166795882707903567 +0.00171074510019528290 +0.00175336478794848628 +0.00179581742104914358 +0.00183810252852256630 +0.00188021963772666339 +0.00192216827436976576 +0.00196394796252942743 +0.00200555822467135547 +0.00204699858166882331 +0.00208826855282253027 +0.00212936765588111945 +0.00217029540706174025 +0.00221105132107143106 +0.00225163491112892033 +0.00229204568898675300 +0.00233228316495403144 +0.00237234684791965550 +0.00241223624537586362 +0.00245195086344250678 +0.00249149020689172756 +0.00253085377917306028 +0.00257004108243917605 +0.00260905161757219806 +0.00264788488421022726 +0.00268654038077470632 +0.00272501760449824762 +0.00276331605145289190 +0.00280143521657894327 +0.00283937459371459656 +0.00287713367562564010 +0.00291471195403606364 +0.00295210891965925560 +0.00298932406222950564 +0.00302635687053440109 +0.00306320683244743668 +0.00309987343496153419 +0.00313635616422296148 +0.00317265450556589688 +0.00320876794354757857 +0.00324469596198421421 +0.00328043804398706323 +0.00331599367199963708 +0.00335136232783523214 +0.00338654349271511187 +0.00342153664730736762 +0.00345634127176643957 +0.00349095684577312377 +0.00352538284857531036 +0.00355961875902945915 +0.00359366405564255772 +0.00362751821661478962 +0.00366118071988296269 +0.00369465104316436853 +0.00372792866400152318 +0.00376101305980749458 +0.00379390370791183587 +0.00382660008560731799 +0.00385910167019734096 +0.00389140793904379839 +0.00392351836961593071 +0.00395543243953971023 +0.00398714962664794680 +0.00401866940903101121 +0.00404999126508856153 +0.00408111467358150508 +0.00411203911368498900 +0.00414276406504210902 +0.00417328900781816382 +0.00420361342275571859 +0.00423373679123046059 +0.00426365859530754808 +0.00429337831779894302 +0.00432289544232127877 +0.00435220945335460310 +0.00438131983630170828 +0.00441022607754834855 +0.00443892766452402138 +0.00446742408576358396 +0.00449571483096963126 +0.00452379939107557753 +0.00455167725830935184 +0.00457934792625821086 +0.00460681088993374326 +0.00463406564583809799 +0.00466111169203071475 +0.00468794852819584638 +0.00471457565571083408 +0.00474099257771516680 +0.00476719879918016502 +0.00479319382697952507 +0.00481897716996053382 +0.00484454833901611526 +0.00486990684715749803 +0.00489505220958774776 +0.00491998394377589701 +0.00494470156953194257 +0.00496920460908252884 +0.00499349258714731469 +0.00501756503101614612 +0.00504142147062705201 +0.00506506143864459291 +0.00508848447053940594 +0.00511169010466816596 +0.00513467788235436474 +0.00515744734796977442 +0.00517999804901670378 +0.00520232953621081219 +0.00522444136356473368 +0.00524633308847239320 +0.00526800427179399938 +0.00528945447794169389 +0.00531068327496596326 +0.00533169023464259706 +0.00535247493256046404 +0.00537303694820985209 +0.00539337586507148119 +0.00541349127070622771 +0.00543338275684545659 +0.00545304991948191483 +0.00547249235896140034 +0.00549170968007490698 +0.00551070149215152419 +0.00552946740915175473 +0.00554800704976174137 +0.00556632003748764453 +0.00558440600075096188 +0.00560226457298428331 +0.00561989539272756463 +0.00563729810372499539 +0.00565447235502242879 +0.00567141780106523042 +0.00568813410179677768 +0.00570462092275731466 +0.00572087793518342297 +0.00573690481610796259 +0.00575270124846027821 +0.00576826692116714449 +0.00578360152925392994 +0.00579870477394628988 +0.00581357636277222024 +0.00582821600966452504 +0.00584262343506365429 +0.00585679836602086192 +0.00587074053630180781 +0.00588444968649040612 +0.00589792556409297723 +0.00591116792364279782 +0.00592417652680477316 +0.00593695114248051804 +0.00594949154691360241 +0.00596179752379498440 +0.00597386886436877644 +0.00598570536753807378 +0.00599730683997097384 +0.00600867309620679076 +0.00601980395876235749 +0.00603069925823841897 +0.00604135883342611039 +0.00605178253141358449 +0.00606197020769252883 +0.00607192172626477237 +0.00608163695974894402 +0.00609111578948701680 +0.00610035810565079471 +0.00610936380734843076 +0.00611813280273069103 +0.00612666500909721159 +0.00613496035300253092 +0.00614301877036202773 +0.00615084020655756302 +0.00615842461654295906 +0.00616577196494917310 +0.00617288222618926023 +0.00617975538456294344 +0.00618639143436092252 +0.00619279037996872412 +0.00619895223597033331 +0.00620487702725115633 +0.00621056478910074657 +0.00621601556731498413 +0.00622122941829765606 +0.00622620640916166177 +0.00623094661782951000 +0.00623545013313328306 +0.00623971705491396973 +0.00624374749412010856 +0.00624754157290576592 +0.00625109942472777680 +0.00625442119444223621 +0.00625750703840019431 +0.00626035712454253886 +0.00626297163249402416 +0.00626535075365638496 +0.00626749469130061590 +0.00626940366065820934 +0.00627107788901140346 +0.00627251761578251008 +0.00627372309262208983 +0.00627469458349610119 +0.00627543236477187249 +0.00627593672530298453 +0.00627620796651290423 +0.00627624640247742040 +0.00627605236000583662 +0.00627562617872083183 +0.00627496821113703029 +0.00627407882273814214 +0.00627295839205282285 +0.00627160731072896882 +0.00627002598360661915 +0.00626821482878932561 +0.00626617427771401780 +0.00626390477521922024 +0.00626140677961176031 +0.00625868076273173018 +0.00625572721001583804 +0.00625254662055901075 +0.00624913950717423308 +0.00624550639645067834 +0.00624164782880990646 +0.00623756435856027275 +0.00623325655394946828 +0.00622872499721507673 +0.00622397028463322292 +0.00621899302656514844 +0.00621379384750190850 +0.00620837338610687357 +0.00620273229525615446 +0.00619687124207700217 +0.00619079090798393961 +0.00618449198871271789 +0.00617797519435211821 +0.00617124124937343168 +0.00616429089265766762 +0.00615712487752046508 +0.00614974397173458608 +0.00614214895755017549 +0.00613434063171241443 +0.00612631980547692027 +0.00611808730462254022 +0.00610964396946174753 +0.00610099065484840612 +0.00609212823018312130 +0.00608305757941587574 +0.00607377960104616733 +0.00606429520812040192 +0.00605460532822676371 +0.00604471090348725551 +0.00603461289054719584 +0.00602431226056182427 +0.00601380999918027254 +0.00600310710652670124 +0.00599220459717864345 +0.00598110350014255041 +0.00596980485882651728 +0.00595830973101008510 +0.00594661918881124549 +0.00593473431865048327 +0.00592265622121197766 +0.00591038601140177807 +0.00589792481830317579 +0.00588527378512901291 +0.00587243406917102931 +0.00585940684174628829 +0.00584619328814056215 +0.00583279460754867075 +0.00581921201301194291 +0.00580544673135250603 +0.00579150000310460749 +0.00577737308244291505 +0.00576306723710772687 +0.00574858374832713879 +0.00573392391073620274 +0.00571908903229298264 +0.00570408043419155363 +0.00568889945077193353 +0.00567354742942701405 +0.00565802573050628102 +0.00564233572721670271 +0.00562647880552035948 +0.00561045636402911826 +0.00559426981389624794 +0.00557792057870500589 +0.00556141009435410019 +0.00554473980894032784 +0.00552791118263799407 +0.00551092568757543654 +0.00549378480770853963 +0.00547649003869127900 +0.00545904288774323124 +0.00544144487351430010 +0.00542369752594634019 +0.00540580238613196790 +0.00538776100617042748 +0.00536957494902065222 +0.00535124578835132880 +0.00533277510838837000 +0.00531416450375934296 +0.00529541557933525028 +0.00527652995006952768 +0.00525750924083430611 +0.00523835508625392510 +0.00521906913053592474 +0.00519965302729922824 +0.00518010843939981352 +0.00516043703875376531 +0.00514064050615785622 +0.00512072053110745017 +0.00510067881161220711 +0.00508051705400909548 +0.00506023697277315811 +0.00503984029032585913 +0.00501932873684116359 +0.00499870405004923437 +0.00497796797503810215 +0.00495712226405292493 +0.00493616867629324049 +0.00491510897770807472 +0.00489394494078901864 +0.00487267834436120369 +0.00485131097337254593 +0.00482984461868082859 +0.00480828107683907695 +0.00478662214987909170 +0.00476486964509324675 +0.00474302537481446820 +0.00472109115619479599 +0.00469906881098215191 +0.00467696016529563913 +0.00465476704939935069 +0.00463249129747479640 +0.00461013474739183863 +0.00458769924047851187 +0.00456518662128942241 +0.00454259873737301433 +0.00451993743903774618 +0.00449720457911705063 +0.00447440201273352923 +0.00445153159706197094 +0.00442859519109159178 +0.00440559465538741587 +0.00438253185185096417 +0.00435940864348006475 +0.00433622689412832898 +0.00431298846826376654 +0.00428969523072706728 +0.00426634904648937486 +0.00424295178040974292 +0.00421950529699214628 +0.00419601146014250519 +0.00417247213292526713 +0.00414888917732003128 +0.00412526445397807216 +0.00410159982197895630 +0.00407789713858706417 +0.00405415825900856501 +0.00403038503614834911 +0.00400657932036736081 +0.00398274295924028361 +0.00395887779731365994 +0.00393498567586436816 +0.00391106843265892276 +0.00388712790171311367 +0.00386316591305249776 +0.00383918429247357687 +0.00381518486130583406 +0.00379116943617453838 +0.00376713982876471492 +0.00374309784558591501 +0.00371904528773814606 +0.00369498395067893465 +0.00367091562399161908 +0.00364684209115477553 +0.00362276512931319645 +0.00359868650905005279 +0.00357460799416058463 +0.00355053134142730440 +0.00352645830039676747 +0.00350239061315787102 +0.00347833001412207413 +0.00345427822980509842 +0.00343023697861059374 +0.00340620797061558104 +0.00338219290735788675 +0.00335819348162537694 +0.00333421137724748120 +0.00331024826888854772 +0.00328630582184343120 +0.00326238569183523886 +0.00323848952481533810 +0.00321461895676552123 +0.00319077561350271030 +0.00316696111048583645 +0.00314317705262522670 +0.00311942503409445068 +0.00309570663814469176 +0.00307202343692155851 +0.00304837699128473767 +0.00302476885063003489 +0.00300120055271421151 +0.00297767362348253265 +0.00295418957689908782 +0.00293074991477976915 +0.00290735612662834566 +0.00288400968947514699 +0.00286071206771875254 +0.00283746471297061965 +0.00281426906390266126 +0.00279112654609773008 +0.00276803857190332939 +0.00274500654028814878 +0.00272203183670180699 +0.00269911583293764743 +0.00267625988699868962 +0.00265346534296662323 +0.00263073353087418501 +0.00260806576658050883 +0.00258546335164977319 +0.00256292757323310118 +0.00254045970395367238 +0.00251806100179501318 +0.00249573270999276994 +0.00247347605692956182 +0.00245129225603322055 +0.00242918250567832712 +0.00240714798909109212 +0.00238518987425745052 +0.00236330931383469962 +0.00234150744506626514 +0.00231978538969991946 +0.00229814425390935167 +0.00227658512821895306 +0.00225510908743220898 +0.00223371719056318857 +0.00221241048077151217 +0.00219118998530062588 +0.00217005671541946202 +0.00214901166636731348 +0.00212805581730225561 +0.00210719013125268287 +0.00208641555507227188 +0.00206573301939822667 +0.00204514343861283455 +0.00202464771080822746 +0.00200424671775459914 +0.00198394132487149421 +0.00196373238120243218 +0.00194362071939275941 +0.00192360715567072471 +0.00190369248983165399 +0.00188387750522550049 +0.00186416296874737436 +0.00184454963083129043 +0.00182503822544702364 +0.00180562947010009427 +0.00178632406583471024 +0.00176712269723997418 +0.00174802603245891074 +0.00172903472320060634 +0.00171014940475529612 +0.00169137069601242801 +0.00167269919948154536 +0.00165413550131627055 +0.00163568017134095209 +0.00161733376308024015 +0.00159909681379147273 +0.00158096984449983225 +0.00156295336003616728 +0.00154504784907770780 +0.00152725378419128277 +0.00150957162187927152 +0.00149200180262815747 +0.00147454475095966332 +0.00145720087548437583 +0.00143997056895802246 +0.00142285420834006708 +0.00140585215485483227 +0.00138896475405499956 +0.00137219233588749723 +0.00135553521476163854 +0.00133899368961970587 +0.00132256804400963956 +0.00130625854616000701 +0.00129006544905715232 +0.00127398899052448707 +0.00125802939330380763 +0.00124218686513883464 +0.00122646159886060984 +0.00121085377247496313 +0.00119536354925189946 +0.00117999107781690963 +0.00116473649224406255 +0.00114959991215108498 +0.00113458144279605716 +0.00111968117517593840 +0.00110489918612677904 +0.00109023553842560808 +0.00107569028089387047 +0.00106126344850259141 +0.00104695506247894435 +0.00103276513041438193 +0.00101869364637422583 +0.00100474059100868107 +0.00099090593166517279 +0.00097718962250215816 +0.00096359160460409983 +0.00095011180609777556 +0.00093675014226979184 +0.00092350651568529876 +0.00091038081630777747 +0.00089737292162007075 +0.00088448269674634217 +0.00087170999457512985 +0.00085905465588338042 +0.00084651650946143860 +0.00083409537223889680 +0.00082179104941143349 +0.00080960333456837647 +0.00079753200982111885 +0.00078557684593229325 +0.00077373760244568181 +0.00076201402781676168 +0.00075040585954401647 +0.00073891282430075926 +0.00072753463806759144 +0.00071627100626540246 +0.00070512162388889547 +0.00069408617564053728 +0.00068316433606506921 +0.00067235576968431620 +0.00066166013113243738 +0.00065107706529151993 +0.00064060620742743964 +0.00063024718332610854 +0.00061999960942987048 +0.00060986309297417446 +0.00059983723212442286 +0.00058992161611299697 +0.00058011582537636098 +0.00057041943169236823 +0.00056083199831754335 +0.00055135308012445891 +0.00054198222373911642 +0.00053271896767833509 +0.00052356284248704582 +0.00051451337087562597 +0.00050557006785704507 +0.00049673244088394345 +0.00048799998998556502 +0.00047937220790452887 +0.00047084858023337470 +0.00046242858555097671 +0.00045411169555864176 +0.00044589737521598719 +0.00043778508287653208 +0.00042977427042299596 +0.00042186438340223052 +0.00041405486115989011 +0.00040634513697465532 +0.00039873463819211535 +0.00039122278635823454 +0.00038380899735240766 +0.00037649268152003374 +0.00036927324380470742 +0.00036215008387985710 +0.00035512259627992889 +0.00034819017053105251 +0.00034135219128118556 +0.00033460803842968469 +0.00032795708725638574 +0.00032139870855004522 +0.00031493226873622763 +0.00030855713000458928 +0.00030227265043555125 +0.00029607818412631833 +0.00028997308131631778 +0.00028395668851192882 +0.00027802834861057735 +0.00027218740102415243 +0.00026643318180174364 +0.00026076502375165482 +0.00025518225656277017 +0.00024968420692514826 +0.00024427019864992240 +0.00023893955278846149 +0.00023369158775079612 +0.00022852561942327118 +0.00022344096128549241 +0.00021843692452645934 +0.00021351281815995335 +0.00020866794913914532 +0.00020390162247042350 +0.00019921314132641238 +0.00019460180715824267 +0.00019006691980697862 +0.00018560777761426325 +0.00018122367753215345 +0.00017691391523214813 +0.00017267778521337987 +0.00016851458091003216 +0.00016442359479789176 +0.00016040411850010134 +0.00015645544289207949 +0.00015257685820561977 +0.00014876765413213852 +0.00014502711992513335 +0.00014135454450176860 +0.00013774921654364957 +0.00013421042459675967 +0.00013073745717056653 +0.00012732960283627930 +0.00012398615032430663 +0.00012070638862084818 +0.00011748960706367170 +0.00011433509543705691 +0.00011124214406591244 +0.00010821004390905095 +0.00010523808665166882 +0.00010232556479697123 +0.00009947177175699164 +0.00009667600194258920 +0.00009393755085263230 +0.00009125571516235576 +0.00008862979281093456 +0.00008605908308822327 +0.00008354288672070636 +0.00008108050595664496 +0.00007867124465043090 +0.00007631440834613650 +0.00007400930436030073 +0.00007175524186390800 +0.00006955153196360009 +0.00006739748778211389 +0.00006529242453793922 +0.00006323565962423341 +0.00006122651268695581 +0.00005926430570225884 +0.00005734836305312869 +0.00005547801160528469 +0.00005365258078233247 +0.00005187140264020413 +0.00005013381194085406 +0.00004843914622524493 +0.00004678674588561793 +0.00004517595423705786 +0.00004360611758834899 +0.00004207658531215262 +0.00004058670991448058 +0.00003913584710349550 +0.00003772335585763454 +0.00003634859849306667 +0.00003501094073048208 +0.00003370975176124071 +0.00003244440431286012 +0.00003121427471386971 +0.00003001874295803045 +0.00002885719276792973 +0.00002772901165795149 +0.00002663359099664633 +0.00002557032606848550 +0.00002453861613502384 +0.00002353786449547162 +0.00002256747854668510 +0.00002162686984257741 +0.00002071545415297148 +0.00001983265152188321 +0.00001897788632525714 +0.00001815058732815587 +0.00001735018774141286 +0.00001657612527775123 +0.00001582784220738795 +0.00001510478541311512 +0.00001440640644487779 +0.00001373216157385117 +0.00001308151184602594 +0.00001245392313530567 +0.00001184886619613321 +0.00001126581671564079 +0.00001070425536534102 +0.00001016366785236234 +0.00000964354497023746 +0.00000914338264924924 +0.00000866268200634905 +0.00000820094939464464 +0.00000775769645247272 +0.00000733244015206028 +0.00000692470284778296 +0.00000653401232402526 +0.00000615990184265578 +0.00000580191019011655 +0.00000545958172413972 +0.00000513246642009613 +0.00000482011991698345 +0.00000452210356305917 +0.00000423798446112975 +0.00000396733551349670 +0.00000370973546657108 +0.00000346476895516112 +0.00000323202654644036 +0.00000301110478360136 +0.00000280160622920510 +0.00000260313950822784 +0.00000241531935081538 +0.00000223776663474958 +0.00000207010842763393 +0.00000191197802880317 +0.00000176301501096574 +0.00000162286526158168 +0.00000149118102398443 +0.00000136762093825140 +0.00000125185008182945 +0.00000114354000992020 +0.00000104236879563259 +0.00000094802106990583 +0.00000086018806121006 +0.00000077856763502926 +0.00000070286433313185 +0.00000063278941263383 +0.00000056806088486051 +0.00000050840355401046 +0.00000045354905562748 +0.00000040323589488517 +0.00000035720948468869 +0.00000031522218359818 +0.00000027703333357882 +0.00000024240929758123 +0.00000021112349695691 +0.00000018295644871280 +0.00000015769580260895 +0.00000013513637810321 +0.00000011508020114699 +0.00000009733654083554 +0.00000008172194591643 +0.00000006806028115990 +0.00000005618276359412 +0.00000004592799860902 +0.00000003714201593145 +0.00000002967830547495 +0.00000002339785306689 +0.00000001816917605582 +0.00000001386835880167 +0.00000001037908805133 +0.00000000759268820197 +0.00000000540815645445 +0.00000000373219785892 +0.00000000247926025467 +0.00000000157156910601 +0.00000000093916223615 +0.00000000051992446044 +0.00000000025962212071 +0.00000000011193752186 +0.00000000003850327215 +0.00000000000893652797 +0.00000000000087314440 +0.00000000000000173200 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 +0.00000000000000000000 diff --git a/examples/USER/misc/cnp/in.cnp b/examples/USER/misc/cnp/in.cnp new file mode 100644 index 0000000000000000000000000000000000000000..2c71441483b90163ae5d56a34711260d82879d08 --- /dev/null +++ b/examples/USER/misc/cnp/in.cnp @@ -0,0 +1,51 @@ +# Generation and relaxation of a partial dislocation in Cu perfect FCC crystal + +# Initialization +units metal +boundary p p p +atom_style atomic + +# create simulation box and system +lattice fcc 3.615 origin 0.01 0.01 0.01 orient x -1 -1 2 orient y 1 1 1 orient z -1 1 0 +region mdbox block 0 3 0.0 14.0 0 84 units lattice +region system block 0 3 1.1 13.1 0 84 units lattice +create_box 2 mdbox +create_atoms 1 region system + +# Define atoms mass and force field +mass * 63.54 +pair_style eam/alloy +pair_coeff * * Cu_Mishin1.eam Cu Cu + +# Delete a plane of atoms along the z direction to generate a partial dislocation +region dislocation_atoms block 0 3 7 14 41.9 42.1 units lattice +delete_atoms region dislocation_atoms +region quarter_up block 0 3 7 11 0 84 units lattice +group middle region quarter_up + +# specify simulation parameters +timestep 0.004 + +# Relax configuration using conjugate gradient +#min_style cg +#minimize 1.0e-4 1.0e-6 100 1000 + +# Setup calculations +compute 1 all cnp/atom 3.086 +compute 2 all cna/atom 3.086 +compute 3 all centro/atom fcc +compute 4 all coord/atom cutoff 3.086 +dump 1 all custom 100 dump.lammpstrj id type xu yu zu c_1 c_2 c_3 c_4 + +### Set up thermo display +thermo 10 +thermo_style custom step atoms temp press pe ke etotal + +# Relax the system performing a langevin dynamics (freeze motion along y 111 direction) +fix 1 all nve +fix 2 all langevin 50 1 0.1 699483 +fix 3 all setforce NULL 0.0 NULL +fix 4 middle setforce 0.0 0.0 0.0 +run 100 +unfix 4 +run 200 diff --git a/examples/USER/misc/cnp/log.31May17.cnp.g++.4 b/examples/USER/misc/cnp/log.31May17.cnp.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..d86d0b56001b53fa5cf765d248b4bb3d52469a50 --- /dev/null +++ b/examples/USER/misc/cnp/log.31May17.cnp.g++.4 @@ -0,0 +1,185 @@ +LAMMPS (19 May 2017) +OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90) + using 1 OpenMP thread(s) per MPI task +# Generation and relaxation of a partial dislocation in Cu perfect FCC crystal + +# Initialization +units metal +boundary p p p +atom_style atomic + +# create simulation box and system +lattice fcc 3.615 origin 0.01 0.01 0.01 orient x -1 -1 2 orient y 1 1 1 orient z -1 1 0 +Lattice spacing in x,y,z = 5.90327 6.26136 5.11238 +region mdbox block 0 3 0.0 14.0 0 84 units lattice +region system block 0 3 1.1 13.1 0 84 units lattice +create_box 2 mdbox +Created orthogonal box = (0 0 0) to (17.7098 87.6591 429.44) + 1 by 1 by 4 MPI processor grid +create_atoms 1 region system +Created 48384 atoms + +# Define atoms mass and force field +mass * 63.54 +pair_style eam/alloy +pair_coeff * * Cu_Mishin1.eam Cu Cu + +# Delete a plane of atoms along the z direction to generate a partial dislocation +region dislocation_atoms block 0 3 7 14 41.9 42.1 units lattice +delete_atoms region dislocation_atoms +Deleted 76 atoms, new total = 48308 +region quarter_up block 0 3 7 11 0 84 units lattice +group middle region quarter_up +16080 atoms in group middle + +# specify simulation parameters +timestep 0.004 + +# Relax configuration using conjugate gradient +#min_style cg +#minimize 1.0e-4 1.0e-6 100 1000 + +# Setup calculations +compute 1 all cnp/atom 3.086 +compute 2 all cna/atom 3.086 +compute 3 all centro/atom fcc +compute 4 all coord/atom cutoff 3.086 +dump 1 all custom 100 dump.lammpstrj id type xu yu zu c_1 c_2 c_3 c_4 + +### Set up thermo display +thermo 10 +thermo_style custom step atoms temp press pe ke etotal + +# Relax the system performing a langevin dynamics (freeze motion along y 111 direction) +fix 1 all nve +fix 2 all langevin 50 1 0.1 699483 +fix 3 all setforce NULL 0.0 NULL +fix 4 middle setforce 0.0 0.0 0.0 +run 100 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 7.50679 + ghost atom cutoff = 7.50679 + binsize = 3.75339, bins = 5 24 115 + 5 neighbor lists, perpetual/occasional/extra = 1 4 0 + (1) pair eam/alloy, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + (2) compute cnp/atom, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard + (3) compute cna/atom, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard + (4) compute centro/atom, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard + (5) compute coord/atom, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 45.41 | 45.41 | 45.41 Mbytes +Step Atoms Temp Press PotEng KinEng TotEng + 0 48308 0 -3388.0911 -169746.07 0 -169746.07 + 10 48308 7.35092 -3091.0864 -169715.96 45.900393 -169670.05 + 20 48308 9.9162268 -2822.7045 -169678.51 61.918604 -169616.59 + 30 48308 12.351316 -2726.7195 -169666.35 77.123716 -169589.23 + 40 48308 13.302856 -2703.586 -169662.9 83.06529 -169579.83 + 50 48308 12.782228 -2706.8662 -169662.36 79.814401 -169582.55 + 60 48308 12.198179 -2772.4206 -169670.02 76.167503 -169593.86 + 70 48308 10.663322 -2841.3384 -169677.48 66.583595 -169610.9 + 80 48308 9.1169804 -2932.3896 -169687.85 56.927974 -169630.92 + 90 48308 7.2905076 -3029.9433 -169699.09 45.523167 -169653.56 + 100 48308 5.4063635 -3139.4496 -169711.65 33.758252 -169677.89 +Loop time of 10.9003 on 4 procs for 100 steps with 48308 atoms + +Performance: 3.171 ns/day, 7.570 hours/ns, 9.174 timesteps/s +31.8% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 9.8764 | 9.9587 | 10.021 | 1.6 | 91.36 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.1232 | 0.18385 | 0.26683 | 12.1 | 1.69 +Output | 0.45385 | 0.45451 | 0.45634 | 0.2 | 4.17 +Modify | 0.25026 | 0.2537 | 0.25744 | 0.5 | 2.33 +Other | | 0.04949 | | | 0.45 + +Nlocal: 12077 ave 12096 max 12020 min +Histogram: 1 0 0 0 0 0 0 0 0 3 +Nghost: 14204 ave 14261 max 14109 min +Histogram: 1 0 0 0 0 1 0 0 0 2 +Neighs: 814050 ave 818584 max 809212 min +Histogram: 1 0 0 0 0 2 0 0 0 1 +FullNghs: 1.6281e+06 ave 1.63296e+06 max 1.61808e+06 min +Histogram: 1 0 0 0 0 0 1 0 0 2 + +Total # of neighbors = 6512400 +Ave neighs/atom = 134.81 +Neighbor list builds = 0 +Dangerous builds = 0 +unfix 4 +run 200 +Per MPI rank memory allocation (min/avg/max) = 45.41 | 45.41 | 45.41 Mbytes +Step Atoms Temp Press PotEng KinEng TotEng + 100 48308 5.4063635 -3139.4496 -169711.65 33.758252 -169677.89 + 110 48308 15.260795 -2793.119 -169677.24 95.290993 -169581.95 + 120 48308 18.548656 -2433.1584 -169624.79 115.82096 -169508.97 + 130 48308 22.15831 -2276.626 -169604.28 138.36025 -169465.92 + 140 48308 24.393841 -2208.1771 -169596.16 152.31929 -169443.84 + 150 48308 24.797558 -2173.3145 -169591.43 154.84016 -169436.59 + 160 48308 24.73371 -2188.909 -169593.08 154.44148 -169438.64 + 170 48308 24.128467 -2220.3404 -169596.96 150.66225 -169446.29 + 180 48308 22.975708 -2275.1244 -169602.72 143.46422 -169459.26 + 190 48308 21.936324 -2348.3762 -169610.59 136.97413 -169473.61 + 200 48308 20.516249 -2432.8447 -169619.98 128.10694 -169491.87 + 210 48308 19.000566 -2510.2915 -169628.58 118.64276 -169509.93 + 220 48308 17.490407 -2597.299 -169638.24 109.21307 -169529.03 + 230 48308 16.062482 -2684.1203 -169648.31 100.29687 -169548.01 + 240 48308 14.360342 -2768.2313 -169657.7 89.668411 -169568.03 + 250 48308 12.802315 -2852.6965 -169666.99 79.939831 -169587.05 + 260 48308 11.258205 -2944.4533 -169677.52 70.298142 -169607.23 + 270 48308 9.6159129 -3038.6304 -169688.06 60.043393 -169628.02 + 280 48308 7.972425 -3129.0826 -169698.03 49.781176 -169648.25 + 290 48308 6.3752377 -3219.2054 -169708.23 39.808067 -169668.42 + 300 48308 4.7374688 -3306.1468 -169718.27 29.58156 -169688.69 +Loop time of 23.0164 on 4 procs for 200 steps with 48308 atoms + +Performance: 3.003 ns/day, 7.992 hours/ns, 8.689 timesteps/s +31.8% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 20.221 | 20.423 | 20.57 | 3.1 | 88.73 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.27748 | 0.42603 | 0.62832 | 21.4 | 1.85 +Output | 1.5454 | 1.5473 | 1.5529 | 0.3 | 6.72 +Modify | 0.48886 | 0.49773 | 0.50842 | 1.1 | 2.16 +Other | | 0.1221 | | | 0.53 + +Nlocal: 12077 ave 12096 max 12020 min +Histogram: 1 0 0 0 0 0 0 0 0 3 +Nghost: 14204 ave 14261 max 14109 min +Histogram: 1 0 0 0 0 1 0 0 0 2 +Neighs: 814094 ave 818584 max 809212 min +Histogram: 1 0 0 0 0 2 0 0 0 1 +FullNghs: 1.62852e+06 ave 1.63296e+06 max 1.61892e+06 min +Histogram: 1 0 0 0 0 0 0 1 0 2 + +Total # of neighbors = 6514094 +Ave neighs/atom = 134.845 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:35 diff --git a/examples/USER/misc/grem/lj-temper/0/log.lammps.0 b/examples/USER/misc/grem/lj-temper/0/log.lammps.0 index 22998f8893f6cd99a109ec1ed1a2f7e7cc9aba6a..1c1aaaf0420f099c9ab90e0d12d822d1e375f8c1 100644 --- a/examples/USER/misc/grem/lj-temper/0/log.lammps.0 +++ b/examples/USER/misc/grem/lj-temper/0/log.lammps.0 @@ -16,10 +16,6 @@ read_data 0/lj.data #dump dump all xyz 1000 ${rep}/dump.xyz -thermo 100 -thermo_style custom step temp pe etotal press vol -timestep 1.0 - fix fxnpt all npt temp ${T0} ${T0} 1000.0 iso ${press} ${press} 10000.0 fix fxnpt all npt temp 300 ${T0} 1000.0 iso ${press} ${press} 10000.0 fix fxnpt all npt temp 300 300 1000.0 iso ${press} ${press} 10000.0 @@ -27,135 +23,1044 @@ fix fxnpt all npt temp 300 300 1000.0 iso 0 ${press} 10000.0 fix fxnpt all npt temp 300 300 1000.0 iso 0 0 10000.0 fix fxgREM all grem ${lambda} -.03 -30000 fxnpt fix fxgREM all grem 900 -.03 -30000 fxnpt + +thermo 10 +thermo_style custom step temp f_fxgREM pe etotal press vol thermo_modify press fxgREM_press +timestep 1.0 temper/grem 10000 100 ${lambda} fxgREM fxnpt 10294 98392 #${walker} temper/grem 10000 100 900 fxgREM fxnpt 10294 98392 Neighbor list info ... - 1 neighbor list requests update every 1 steps, delay 10 steps, check yes max neighbors/atom: 2000, page size: 100000 master list distance cutoff = 7 ghost atom cutoff = 7 - binsize = 3.5 -> bins = 7 7 7 -Memory usage per processor = 5.38195 Mbytes -Step Temp PotEng TotEng Press Volume - 0 299.42262 -3416.9 -2971.5315 134.01052 11328.967 - 100 294.63941 -3417.2566 -2979.0028 164.62038 11328.996 - 200 302.86941 -3421.7405 -2971.2452 116.35909 11329.199 - 300 297.43413 -3417.6882 -2975.2774 143.60143 11329.666 - 400 300.1465 -3421.466 -2975.0208 108.02406 11330.366 - 500 300.2847 -3419.7205 -2973.0698 51.93802 11331.264 - 600 297.98642 -3416.0758 -2972.8436 102.67264 11332.303 - 700 308.88819 -3423.7386 -2964.2908 -84.991092 11333.461 - 800 300.03002 -3418.2447 -2971.9728 61.530839 11334.623 - 900 294.38296 -3416.7286 -2978.8563 59.4512 11336.02 - 1000 301.55478 -3419.3685 -2970.8287 -70.828164 11337.415 - 1100 304.76191 -3420.01 -2966.6997 -46.02246 11338.757 - 1200 300.87935 -3418.357 -2970.8218 -27.557375 11340.179 - 1300 297.90925 -3416.7586 -2973.6412 27.202549 11341.647 - 1400 304.0426 -3421.0828 -2968.8425 -150.93733 11343.177 - 1500 293.50425 -3416.0019 -2979.4366 -32.703088 11344.538 - 1600 298.70817 -3416.4247 -2972.1189 -111.16189 11345.702 - 1700 293.12426 -3415.1528 -2979.1527 -12.438698 11346.701 - 1800 289.23529 -3413.4211 -2983.2055 -27.234713 11347.494 - 1900 292.62679 -3415.5324 -2980.2722 -121.80014 11347.911 - 2000 295.3705 -3415.4594 -2976.1182 -118.35918 11347.97 - 2100 288.3131 -3411.9071 -2983.0632 11.471141 11347.887 - 2200 312.55009 -3422.4761 -2957.5815 -294.72858 11347.644 - 2300 285.92437 -3414.5956 -2989.3047 -43.986075 11346.986 - 2400 300.19294 -3419.6619 -2973.1476 -153.30034 11346.114 - 2500 295.61459 -3415.1159 -2975.4116 20.33969 11344.962 - 2600 297.71268 -3415.7827 -2972.9577 -52.926913 11343.679 - 2700 298.56462 -3417.9995 -2973.9072 -131.29953 11342.271 - 2800 286.03931 -3410.6373 -2985.1754 164.72078 11340.629 - 2900 290.70577 -3411.5311 -2979.1283 143.13209 11338.97 - 3000 303.332 -3415.7792 -2964.5958 -49.330701 11337.303 - 3100 301.73339 -3417.0188 -2968.2132 -8.5041565 11335.568 - 3200 313.04988 -3423.077 -2957.439 -122.70098 11333.927 - 3300 286.09745 -3408.4061 -2982.8579 353.68123 11332.253 - 3400 303.00349 -3413.2328 -2962.5381 231.93254 11330.816 - 3500 308.79241 -3417.4556 -2958.1503 91.741468 11329.674 - 3600 287.41302 -3407.5431 -2980.038 543.23683 11329.043 - 3700 294.17479 -3407.5416 -2969.9788 449.07447 11329.318 - 3800 289.81831 -3405.492 -2974.4092 576.60121 11330.443 - 3900 295.40157 -3408.4609 -2969.0734 451.50877 11332.572 - 4000 287.71309 -3402.6497 -2974.6982 573.42617 11335.583 - 4100 290.02657 -3403.0786 -2971.686 527.71502 11339.534 - 4200 297.74424 -3409.8018 -2966.9298 277.40905 11344.404 - 4300 292.06583 -3402.1684 -2967.7426 444.89069 11349.944 - 4400 300.74402 -3406.2366 -2958.9027 139.09782 11356.223 - 4500 295.14988 -3402.5929 -2963.5798 173.91619 11362.896 - 4600 299.82229 -3402.1676 -2956.2047 114.51482 11369.919 - 4700 299.72789 -3400.7608 -2954.9383 83.204669 11377.21 - 4800 292.34822 -3393.2268 -2958.3809 161.50646 11384.622 - 4900 298.32652 -3395.3712 -2951.6331 29.917068 11392.178 - 5000 293.0855 -3395.7079 -2959.7654 -67.018003 11399.715 - 5100 302.58088 -3398.1836 -2948.1175 -214.79018 11407.054 - 5200 302.00186 -3395.3231 -2946.1182 -305.62771 11413.967 - 5300 308.44521 -3396.6926 -2937.9037 -455.80177 11420.397 - 5400 305.55347 -3392.9576 -2938.4699 -464.74116 11426.162 - 5500 306.15731 -3393.3834 -2937.9976 -453.93342 11431.122 - 5600 300.86617 -3390.7578 -2943.2421 -413.36436 11435.339 - 5700 311.4673 -3395.9418 -2932.6578 -637.62795 11438.796 - 5800 302.31851 -3388.9636 -2939.2877 -501.87858 11441.318 - 5900 315.93456 -3400.2717 -2930.343 -753.56358 11443.043 - 6000 304.69067 -3391.9711 -2938.7668 -477.72322 11443.646 - 6100 311.80005 -3393.0048 -2929.2259 -633.56098 11443.351 - 6200 307.36579 -3391.6406 -2934.4573 -532.82298 11442.106 - 6300 302.7084 -3389.8455 -2939.5897 -490.38666 11439.901 - 6400 306.78573 -3392.1276 -2935.807 -547.43007 11436.8 - 6500 304.23179 -3388.17 -2935.6483 -348.52993 11432.753 - 6600 296.7094 -3385.8464 -2944.5136 -201.5605 11428.01 - 6700 313.3959 -3395.26 -2929.1073 -415.82786 11422.724 - 6800 316.4072 -3399.1224 -2928.4906 -496.33753 11416.786 - 6900 302.56547 -3389.7698 -2939.7266 -80.175542 11410.218 - 7000 299.36836 -3387.708 -2942.4203 128.86864 11403.334 - 7100 309.39575 -3396.2826 -2936.0798 -53.969342 11396.555 - 7200 324.25215 -3405.3158 -2923.0153 -227.25827 11389.761 - 7300 305.99737 -3393.6219 -2938.474 235.46369 11382.905 - 7400 317.73948 -3402.1318 -2929.5184 21.699572 11376.471 - 7500 315.75908 -3400.7608 -2931.0931 148.23486 11370.37 - 7600 311.46203 -3401.9952 -2938.7191 182.85745 11364.81 - 7700 309.3606 -3403.1221 -2942.9717 257.80591 11359.711 - 7800 300.78232 -3397.6295 -2950.2386 440.63834 11355.144 - 7900 306.59047 -3402.2326 -2946.2025 287.86462 11351.377 - 8000 311.28944 -3406.6966 -2943.6772 268.88617 11348.336 - 8100 299.93421 -3405.6032 -2959.4738 356.79664 11346.007 - 8200 302.26336 -3406.5846 -2956.9908 246.8694 11344.383 - 8300 294.27353 -3406.1343 -2968.4247 428.02411 11343.38 - 8400 273.01304 -3396.291 -2990.2048 712.6039 11343.239 - 8500 294.90301 -3407.461 -2968.8151 259.18716 11344.147 - 8600 295.43347 -3410.2779 -2970.8431 127.32619 11345.636 - 8700 285.2568 -3407.4342 -2983.1364 223.78377 11347.458 - 8800 296.45048 -3413.1605 -2972.2128 61.542232 11349.754 - 8900 306.82798 -3417.5232 -2961.1398 -170.00394 11352.317 - 9000 287.0527 -3408.5483 -2981.5791 46.683648 11354.612 - 9100 296.30539 -3414.6325 -2973.9007 -222.07876 11356.77 - 9200 300.16761 -3415.9641 -2969.4875 -218.94764 11358.601 - 9300 310.01596 -3418.7396 -2957.6143 -409.04518 11360.094 - 9400 293.6059 -3411.4846 -2974.768 -112.34303 11361.015 - 9500 306.2565 -3416.0499 -2960.5165 -230.75414 11361.667 - 9600 298.77172 -3410.0086 -2965.6083 -160.9185 11361.994 - 9700 287.60936 -3408.4236 -2980.6264 -50.45167 11361.936 - 9800 298.13818 -3413.9967 -2970.5387 -252.57086 11361.537 - 9900 292.93826 -3411.9851 -2976.2616 -152.45548 11360.629 - 10000 286.02579 -3405.9303 -2980.4887 67.188883 11359.503 -Loop time of 3.25987 on 1 procs for 10000 steps with 500 atoms + binsize = 3.5, bins = 7 7 7 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.728 | 5.728 | 5.728 Mbytes +Step Temp f_fxgREM PotEng TotEng Press Volume + 0 299.42262 102.507 -3416.9 -2971.5315 134.01052 11328.967 + 10 305.01315 102.57259 -3419.0863 -2965.4023 52.103984 11328.968 + 20 309.92197 102.64755 -3421.585 -2960.5996 -26.370593 11328.97 + 30 311.99962 102.71885 -3423.9618 -2959.886 -76.248163 11328.972 + 40 310.18839 102.69518 -3423.1727 -2961.791 -52.274449 11328.972 + 50 305.15121 102.6302 -3421.0067 -2967.1174 16.259045 11328.972 + 60 299.39836 102.5938 -3419.7932 -2974.4609 80.723613 11328.972 + 70 295.2947 102.55104 -3418.3682 -2979.1397 134.53808 11328.974 + 80 293.51832 102.50438 -3416.8126 -2980.2263 170.69406 11328.978 + 90 293.4313 102.51498 -3417.1659 -2980.7091 170.93297 11328.985 + 100 294.63941 102.5177 -3417.2566 -2979.0028 164.62038 11328.996 + 110 297.25348 102.56555 -3418.8518 -2976.7098 131.78524 11329.01 + 120 300.50388 102.60344 -3420.1147 -2973.138 98.057487 11329.026 + 130 302.95328 102.6409 -3421.3632 -2970.7432 69.182017 11329.044 + 140 304.05933 102.70523 -3423.5076 -2971.2424 38.480693 11329.064 + 150 304.21872 102.71956 -3423.9853 -2971.483 33.117628 11329.084 + 160 304.38371 102.70642 -3423.5475 -2970.7998 39.859883 11329.105 + 170 304.80117 102.70106 -3423.3687 -2970.0001 43.810288 11329.127 + 180 305.13983 102.71028 -3423.6759 -2969.8035 46.007676 11329.15 + 190 304.94578 102.68774 -3422.9246 -2969.3408 69.449779 11329.173 + 200 302.86941 102.65221 -3421.7405 -2971.2452 116.35909 11329.199 + 210 297.662 102.5571 -3418.57 -2975.8203 210.54052 11329.226 + 220 290.84972 102.4888 -3416.2934 -2983.6765 300.40584 11329.257 + 230 286.66935 102.40548 -3413.5161 -2987.1171 365.46428 11329.294 + 240 288.43119 102.42846 -3414.2821 -2985.2626 333.78695 11329.338 + 250 295.9831 102.53577 -3417.8591 -2977.6067 218.31479 11329.388 + 260 305.50936 102.66938 -3422.3128 -2967.8908 75.635176 11329.443 + 270 311.52891 102.76931 -3425.6438 -2962.2682 -26.093631 11329.498 + 280 311.04895 102.75023 -3425.0078 -2962.346 -28.116138 11329.554 + 290 305.15136 102.64834 -3421.6115 -2967.722 47.657742 11329.609 + 300 297.43413 102.53064 -3417.6882 -2975.2774 143.60143 11329.666 + 310 292.00918 102.45991 -3415.3303 -2980.9888 202.75098 11329.725 + 320 291.38867 102.45449 -3415.1495 -2981.731 200.97437 11329.788 + 330 294.86583 102.51948 -3417.316 -2978.7254 144.54327 11329.854 + 340 299.07345 102.62717 -3420.9058 -2976.0567 71.367848 11329.924 + 350 301.19767 102.64391 -3421.4636 -2973.4549 53.019075 11329.994 + 360 301.40218 102.64706 -3421.5686 -2973.2557 57.550459 11330.066 + 370 301.60343 102.63439 -3421.1465 -2972.5342 71.264067 11330.139 + 380 302.3767 102.66604 -3422.2013 -2972.4389 65.704342 11330.213 + 390 302.18634 102.66324 -3422.1079 -2972.6286 79.588346 11330.289 + 400 300.1465 102.64398 -3421.466 -2975.0208 108.02406 11330.366 + 410 297.66938 102.55498 -3418.4992 -2975.7385 159.37689 11330.446 + 420 296.70991 102.52861 -3417.6203 -2976.2868 170.02926 11330.528 + 430 297.60515 102.52266 -3417.4218 -2974.7567 154.72136 11330.614 + 440 299.48021 102.57498 -3419.166 -2973.7119 108.67344 11330.703 + 450 301.07427 102.60725 -3420.2418 -2972.4166 73.295179 11330.793 + 460 302.09454 102.59522 -3419.8407 -2970.498 61.278464 11330.886 + 470 302.70993 102.61648 -3420.5493 -2970.2912 41.512266 11330.979 + 480 302.81262 102.62998 -3420.9993 -2970.5885 29.560402 11331.073 + 490 302.14786 102.61601 -3420.5338 -2971.1117 34.608836 11331.168 + 500 300.2847 102.59162 -3419.7205 -2973.0698 51.93802 11331.264 + 510 297.96748 102.55635 -3418.545 -2975.3409 76.76076 11331.361 + 520 296.76806 102.53422 -3417.8075 -2976.3875 89.385785 11331.459 + 530 296.70055 102.51748 -3417.2493 -2975.9297 92.386458 11331.559 + 540 296.45476 102.50595 -3416.8648 -2975.9109 92.522412 11331.661 + 550 296.25617 102.49905 -3416.6349 -2975.9763 88.090319 11331.765 + 560 297.43141 102.48149 -3416.0497 -2973.643 77.975047 11331.87 + 570 299.89275 102.52306 -3417.4354 -2971.3677 40.26193 11331.977 + 580 301.71033 102.55476 -3418.492 -2969.7207 18.941485 11332.085 + 590 301.0837 102.56499 -3418.833 -2970.9938 31.38825 11332.194 + 600 297.98642 102.48227 -3416.0758 -2972.8436 102.67264 11332.303 + 610 293.64246 102.45555 -3415.1849 -2978.414 160.19415 11332.414 + 620 290.00186 102.38828 -3412.9426 -2981.5869 218.76366 11332.528 + 630 289.94928 102.41899 -3413.9664 -2982.6888 200.08217 11332.646 + 640 295.74572 102.48763 -3416.2545 -2976.3551 112.68119 11332.768 + 650 306.46178 102.65897 -3421.9655 -2966.1269 -54.544301 11332.892 + 660 317.10206 102.85711 -3428.5703 -2956.905 -227.40954 11333.015 + 670 322.23923 102.91998 -3430.6661 -2951.3597 -299.41158 11333.135 + 680 320.5723 102.93397 -3431.1325 -2954.3055 -291.13842 11333.248 + 690 314.82311 102.8303 -3427.6765 -2959.401 -196.15735 11333.357 + 700 308.88819 102.71216 -3423.7386 -2964.2908 -84.991092 11333.461 + 710 304.9868 102.66458 -3422.1526 -2968.5078 -11.177461 11333.565 + 720 303.01004 102.61889 -3420.6298 -2969.9253 49.289549 11333.668 + 730 300.99168 102.60022 -3420.0074 -2972.3051 98.740194 11333.772 + 740 296.94374 102.54664 -3418.2214 -2976.5401 171.9416 11333.877 + 750 291.19139 102.44341 -3414.7802 -2981.6551 269.15521 11333.987 + 760 285.41566 102.3714 -3412.3799 -2987.8457 343.50487 11334.101 + 770 281.59912 102.30948 -3410.3161 -2991.4588 384.62499 11334.222 + 780 282.54974 102.32011 -3410.6705 -2990.3991 347.61696 11334.35 + 790 289.63184 102.42814 -3414.2713 -2983.4659 218.95403 11334.485 + 800 300.03002 102.54734 -3418.2447 -2971.9728 61.530839 11334.623 + 810 308.28446 102.63792 -3421.2641 -2962.7143 -61.683906 11334.763 + 820 310.71517 102.67118 -3422.3726 -2960.2073 -105.82674 11334.902 + 830 307.51937 102.64604 -3421.5347 -2964.1229 -72.21706 11335.04 + 840 302.15103 102.52481 -3417.4936 -2968.0669 22.7566 11335.176 + 850 298.13259 102.46761 -3415.5869 -2972.1373 83.701724 11335.312 + 860 296.67204 102.44948 -3414.9826 -2973.7055 105.71932 11335.45 + 870 296.58815 102.49174 -3416.3914 -2975.239 86.7842 11335.59 + 880 296.07017 102.48007 -3416.0022 -2975.6203 85.129741 11335.731 + 890 294.71487 102.49894 -3416.6313 -2978.2652 73.985492 11335.875 + 900 294.38296 102.50186 -3416.7286 -2978.8563 59.4512 11336.02 + 910 297.14646 102.51252 -3417.0839 -2975.1011 20.162699 11336.166 + 920 302.00677 102.60648 -3420.216 -2971.0038 -63.001904 11336.312 + 930 305.93557 102.65476 -3421.8253 -2966.7693 -118.84051 11336.457 + 940 307.40744 102.65758 -3421.9192 -2964.6739 -136.4053 11336.601 + 950 306.77038 102.64009 -3421.3363 -2965.0386 -128.88393 11336.742 + 960 305.29301 102.60192 -3420.0641 -2965.9639 -105.70523 11336.88 + 970 304.01625 102.62204 -3420.7347 -2968.5336 -106.83536 11337.017 + 980 303.31176 102.61148 -3420.3825 -2969.2293 -99.75059 11337.151 + 990 302.52222 102.60354 -3420.1179 -2970.139 -91.350072 11337.284 + 1000 301.55478 102.58106 -3419.3685 -2970.8287 -70.828164 11337.415 + 1010 300.80872 102.57382 -3419.1273 -2971.6971 -51.456861 11337.545 + 1020 300.39394 102.57561 -3419.1869 -2972.3737 -30.06259 11337.674 + 1030 299.3407 102.56375 -3418.7917 -2973.5451 8.5095011 11337.802 + 1040 296.64408 102.51102 -3417.0339 -2975.7983 76.942673 11337.931 + 1050 292.71654 102.45136 -3415.0454 -2979.6518 150.13917 11338.061 + 1060 290.03112 102.41851 -3413.9503 -2982.551 191.32381 11338.195 + 1070 291.18322 102.40463 -3413.4876 -2980.3746 184.12677 11338.331 + 1080 295.92737 102.46916 -3415.6386 -2975.4691 109.66058 11338.472 + 1090 301.38195 102.55252 -3418.4174 -2970.1346 17.470646 11338.614 + 1100 304.76191 102.6003 -3420.01 -2966.6997 -46.02246 11338.757 + 1110 305.67145 102.63011 -3421.0035 -2966.3404 -77.688687 11338.899 + 1120 304.63891 102.59084 -3419.6947 -2966.5674 -58.912495 11339.04 + 1130 302.03115 102.55519 -3418.5064 -2969.258 -22.869742 11339.18 + 1140 298.58726 102.51077 -3417.0257 -2972.8997 25.472397 11339.319 + 1150 295.27588 102.46046 -3415.3487 -2976.1482 73.241528 11339.459 + 1160 292.9158 102.45232 -3415.0775 -2979.3875 92.346457 11339.6 + 1170 292.32496 102.4632 -3415.4399 -2980.6287 85.260271 11339.743 + 1180 294.10212 102.48255 -3416.085 -2978.6304 55.687553 11339.888 + 1190 297.62415 102.51671 -3417.2237 -2974.5303 10.398915 11340.033 + 1200 300.87935 102.55071 -3418.357 -2970.8218 -27.557375 11340.179 + 1210 302.10425 102.57743 -3419.2477 -2969.8905 -43.340394 11340.325 + 1220 301.30073 102.56135 -3418.7116 -2970.5496 -24.958889 11340.469 + 1230 299.19878 102.52943 -3417.6477 -2972.6122 9.1129556 11340.614 + 1240 296.57738 102.49746 -3416.582 -2975.4456 44.677497 11340.759 + 1250 294.7463 102.46575 -3415.5249 -2977.1121 71.138297 11340.904 + 1260 295.03023 102.45442 -3415.1473 -2976.3122 71.685459 11341.051 + 1270 297.27714 102.5085 -3416.95 -2974.7728 32.147349 11341.2 + 1280 299.58047 102.55561 -3418.5205 -2972.9172 -3.9755183 11341.349 + 1290 299.93811 102.56119 -3418.7063 -2972.5711 -9.1750938 11341.498 + 1300 297.90925 102.50276 -3416.7586 -2973.6412 27.202549 11341.647 + 1310 294.65437 102.42963 -3414.3209 -2976.0448 75.461621 11341.797 + 1320 292.32951 102.414 -3413.8001 -2978.9821 92.928442 11341.948 + 1330 291.73106 102.405 -3413.4999 -2979.5721 91.541739 11342.101 + 1340 292.46822 102.38863 -3412.9545 -2977.9302 79.39213 11342.255 + 1350 294.51754 102.45145 -3415.0485 -2976.9759 25.453397 11342.412 + 1360 298.37904 102.51036 -3417.0121 -2973.1958 -39.401903 11342.568 + 1370 303.20597 102.60772 -3420.2573 -2969.2614 -123.63065 11342.725 + 1380 306.63864 102.64436 -3421.4786 -2965.3768 -170.50872 11342.878 + 1390 306.88908 102.66404 -3422.1346 -2965.6603 -183.28528 11343.029 + 1400 304.0426 102.63248 -3421.0828 -2968.8425 -150.93733 11343.177 + 1410 299.66949 102.56974 -3418.9914 -2973.2558 -94.9749 11343.322 + 1420 295.75762 102.53777 -3417.9258 -2978.0088 -56.871182 11343.465 + 1430 294.21861 102.49465 -3416.4882 -2978.8603 -36.38455 11343.607 + 1440 296.29159 102.52235 -3417.4116 -2976.7003 -73.334676 11343.749 + 1450 301.43074 102.58107 -3419.369 -2971.0136 -145.05567 11343.889 + 1460 306.46756 102.66201 -3422.067 -2966.2197 -219.74649 11344.026 + 1470 307.07703 102.7226 -3424.0867 -2967.3329 -247.49745 11344.159 + 1480 302.2323 102.62495 -3420.8317 -2971.284 -168.81147 11344.288 + 1490 295.91077 102.50883 -3416.961 -2976.8162 -69.501498 11344.414 + 1500 293.50425 102.48006 -3416.0019 -2979.4366 -32.703088 11344.538 + 1510 296.38451 102.5083 -3416.9434 -2976.0939 -58.867745 11344.662 + 1520 300.86865 102.56497 -3418.8323 -2971.313 -111.47355 11344.785 + 1530 302.55532 102.63423 -3421.141 -2971.1129 -151.60859 11344.905 + 1540 300.12591 102.59715 -3419.9051 -2973.4905 -123.34016 11345.023 + 1550 295.72707 102.49851 -3416.6169 -2976.7453 -60.531918 11345.139 + 1560 293.2099 102.4286 -3414.2866 -2978.1591 -24.591334 11345.254 + 1570 294.28758 102.45689 -3415.2297 -2977.4993 -53.252019 11345.368 + 1580 297.21835 102.52137 -3417.3791 -2975.2893 -106.9868 11345.481 + 1590 298.97978 102.51665 -3417.2215 -2972.5118 -122.34188 11345.592 + 1600 298.70817 102.49274 -3416.4247 -2972.1189 -111.16189 11345.702 + 1610 297.71642 102.50123 -3416.7078 -2973.8772 -103.17121 11345.809 + 1620 297.11447 102.49992 -3416.6639 -2974.7287 -92.752723 11345.914 + 1630 297.30695 102.47122 -3415.7074 -2973.4858 -77.698896 11346.018 + 1640 298.78349 102.50947 -3416.9822 -2972.5644 -98.097519 11346.12 + 1650 301.16284 102.54579 -3418.193 -2970.2361 -122.32653 11346.22 + 1660 302.06844 102.57137 -3419.0456 -2969.7417 -128.70176 11346.319 + 1670 299.50439 102.5007 -3416.69 -2971.1999 -72.217359 11346.415 + 1680 294.73969 102.42805 -3414.2685 -2975.8655 -2.7377727 11346.51 + 1690 291.74851 102.38634 -3412.8781 -2978.9243 34.143745 11346.605 + 1700 293.12426 102.45458 -3415.1528 -2979.1527 -12.438698 11346.701 + 1710 297.66889 102.50404 -3416.8012 -2974.0413 -84.291158 11346.796 + 1720 303.18954 102.62371 -3420.7905 -2969.819 -194.16156 11346.89 + 1730 307.99174 102.70484 -3423.4945 -2965.3802 -281.31997 11346.98 + 1740 310.44287 102.74613 -3424.8708 -2963.1106 -328.29545 11347.066 + 1750 309.0034 102.67277 -3422.4258 -2962.8067 -292.43283 11347.145 + 1760 303.32174 102.59781 -3419.9269 -2968.7588 -216.31399 11347.219 + 1770 295.38441 102.44513 -3414.8376 -2975.4757 -89.076671 11347.289 + 1780 288.58524 102.35732 -3411.9108 -2982.6621 3.0132998 11347.357 + 1790 286.12 102.32368 -3410.7894 -2985.2076 35.206418 11347.425 + 1800 289.23529 102.40263 -3413.4211 -2983.2055 -27.234713 11347.494 + 1810 297.14401 102.51126 -3417.0419 -2975.0627 -143.73517 11347.563 + 1820 306.76876 102.69352 -3423.1173 -2966.822 -303.83015 11347.628 + 1830 313.74855 102.79616 -3426.5388 -2959.8616 -411.15416 11347.689 + 1840 315.46423 102.8539 -3428.4633 -2959.2341 -457.59523 11347.741 + 1850 313.12768 102.7984 -3426.6133 -2960.8595 -424.12945 11347.785 + 1860 309.05655 102.75586 -3425.1952 -2965.497 -376.68901 11347.822 + 1870 304.34244 102.62867 -3420.9558 -2968.2695 -287.77517 11347.851 + 1880 299.24405 102.55097 -3418.3655 -2973.2627 -213.86081 11347.875 + 1890 294.61776 102.49233 -3416.411 -2978.1894 -151.41806 11347.894 + 1900 292.62679 102.46597 -3415.5324 -2980.2722 -121.80014 11347.911 + 1910 294.72445 102.45454 -3415.1512 -2976.7709 -126.45908 11347.926 + 1920 298.52848 102.50865 -3416.955 -2972.9165 -166.49847 11347.938 + 1930 300.00062 102.53502 -3417.8341 -2971.6059 -176.48502 11347.948 + 1940 297.49521 102.47308 -3415.7692 -2973.2676 -123.76195 11347.954 + 1950 292.71263 102.40415 -3413.4715 -2978.0837 -54.263616 11347.958 + 1960 288.44322 102.3588 -3411.96 -2982.9226 -3.4208938 11347.96 + 1970 287.06595 102.34563 -3411.5212 -2984.5323 8.2328493 11347.963 + 1980 289.12422 102.36896 -3412.2988 -2982.2484 -22.331306 11347.966 + 1990 292.66135 102.4424 -3414.7468 -2979.4352 -83.979412 11347.969 + 2000 295.3705 102.46378 -3415.4594 -2976.1182 -118.35918 11347.97 + 2010 296.19204 102.49592 -3416.5306 -2975.9674 -139.92814 11347.969 + 2020 294.90329 102.46027 -3415.3424 -2976.6961 -117.84509 11347.965 + 2030 292.6706 102.42204 -3414.068 -2978.7427 -85.454237 11347.96 + 2040 291.1965 102.40326 -3413.4421 -2980.3094 -65.359696 11347.953 + 2050 291.49792 102.43522 -3414.5074 -2980.9263 -74.906718 11347.944 + 2060 292.19872 102.43772 -3414.5905 -2979.9671 -72.703722 11347.934 + 2070 291.30976 102.4396 -3414.6532 -2981.352 -57.588834 11347.923 + 2080 289.10508 102.4017 -3413.39 -2983.3681 -18.360635 11347.911 + 2090 287.50207 102.35715 -3411.9051 -2984.2676 16.153245 11347.899 + 2100 288.3131 102.35721 -3411.9071 -2983.0632 11.471141 11347.887 + 2110 292.7849 102.38929 -3412.9764 -2977.481 -35.718949 11347.876 + 2120 300.57092 102.54295 -3418.0984 -2971.022 -153.11266 11347.863 + 2130 308.35058 102.65721 -3421.907 -2963.2588 -250.25017 11347.848 + 2140 311.5324 102.69737 -3423.2455 -2959.8647 -278.53274 11347.828 + 2150 307.94447 102.66567 -3422.1891 -2964.145 -226.03646 11347.803 + 2160 300.34725 102.54919 -3418.3064 -2971.5626 -113.33777 11347.774 + 2170 294.74346 102.42777 -3414.2591 -2975.8505 -21.9587 11347.743 + 2180 295.93182 102.43361 -3414.4537 -2974.2776 -40.912863 11347.712 + 2190 303.44883 102.53191 -3417.7303 -2966.3732 -151.31949 11347.679 + 2200 312.55009 102.67428 -3422.4761 -2957.5815 -294.72858 11347.644 + 2210 318.70677 102.77575 -3425.8585 -2951.8063 -399.95463 11347.603 + 2220 320.41031 102.81707 -3427.2357 -2950.6497 -442.80939 11347.555 + 2230 317.89639 102.77822 -3425.9407 -2953.0939 -415.74038 11347.499 + 2240 311.79167 102.70038 -3423.3458 -2959.5794 -339.24833 11347.434 + 2250 302.71984 102.57339 -3419.1129 -2968.84 -217.78603 11347.364 + 2260 292.68143 102.44369 -3414.7896 -2979.4482 -88.707316 11347.289 + 2270 285.16359 102.34464 -3411.4881 -2987.3289 8.1302473 11347.213 + 2280 282.20047 102.34256 -3411.4188 -2991.667 31.275399 11347.137 + 2290 282.97286 102.37758 -3412.5859 -2991.6852 8.3448595 11347.061 + 2300 285.92437 102.43787 -3414.5956 -2989.3047 -43.986075 11346.986 + 2310 290.33396 102.48872 -3416.2907 -2984.441 -102.13915 11346.91 + 2320 295.06958 102.57618 -3419.206 -2980.3124 -171.55177 11346.832 + 2330 298.20563 102.59982 -3419.9941 -2976.4358 -196.58253 11346.751 + 2340 298.24805 102.60378 -3420.126 -2976.5046 -183.81627 11346.666 + 2350 295.73298 102.56082 -3418.6941 -2978.8137 -130.56758 11346.578 + 2360 292.97371 102.49046 -3416.3488 -2980.5726 -65.991294 11346.487 + 2370 292.09981 102.44491 -3414.8303 -2980.3539 -30.554608 11346.395 + 2380 293.80369 102.47809 -3415.9363 -2978.9256 -51.800628 11346.303 + 2390 297.03927 102.55038 -3418.346 -2976.5226 -106.6691 11346.209 + 2400 300.19294 102.58986 -3419.6619 -2973.1476 -153.30034 11346.114 + 2410 302.52849 102.61685 -3420.5618 -2970.5736 -190.20449 11346.016 + 2420 304.2705 102.63871 -3421.2905 -2968.7112 -218.07039 11345.914 + 2430 305.85619 102.66217 -3422.0724 -2967.1345 -239.56699 11345.808 + 2440 307.32492 102.65625 -3421.8749 -2964.7524 -241.23506 11345.698 + 2450 308.04007 102.65063 -3421.6876 -2963.5014 -229.18194 11345.583 + 2460 307.04877 102.6426 -3421.4201 -2964.7084 -197.70237 11345.464 + 2470 304.24305 102.61084 -3420.3614 -2967.8229 -141.79187 11345.341 + 2480 300.73751 102.56457 -3418.8189 -2971.4947 -77.902376 11345.216 + 2490 297.83991 102.50388 -3416.796 -2973.7818 -19.331564 11345.089 + 2500 295.61459 102.45348 -3415.1159 -2975.4116 20.33969 11344.962 + 2510 293.90955 102.42678 -3414.2261 -2977.0579 37.36597 11344.835 + 2520 293.2758 102.40449 -3413.4831 -2977.2576 38.441637 11344.709 + 2530 294.10044 102.42813 -3414.2709 -2976.8188 8.2781685 11344.583 + 2540 295.79126 102.46495 -3415.4982 -2975.5312 -32.653729 11344.458 + 2550 297.46851 102.46999 -3415.6663 -2973.2045 -58.295967 11344.332 + 2560 299.11686 102.49825 -3416.6084 -2971.6947 -88.286189 11344.205 + 2570 301.18226 102.49734 -3416.5779 -2968.5922 -103.49957 11344.077 + 2580 302.63024 102.5265 -3417.5501 -2967.4106 -120.78148 11343.946 + 2590 301.58639 102.53737 -3417.9123 -2969.3254 -110.85339 11343.813 + 2600 297.71268 102.47348 -3415.7827 -2972.9577 -52.926913 11343.679 + 2610 293.37251 102.35913 -3411.9709 -2975.6016 24.038394 11343.543 + 2620 291.57318 102.33871 -3411.2903 -2977.5973 39.974952 11343.408 + 2630 293.60911 102.40434 -3413.4781 -2976.7568 -11.291017 11343.273 + 2640 298.03205 102.51421 -3417.1404 -2973.8403 -99.838219 11343.138 + 2650 301.82817 102.61056 -3420.3519 -2971.4054 -177.26862 11343.002 + 2660 303.05948 102.60469 -3420.1562 -2969.3782 -192.61408 11342.862 + 2670 302.00375 102.55206 -3418.4021 -2969.1944 -169.04976 11342.719 + 2680 300.12539 102.53028 -3417.6761 -2971.2624 -146.9902 11342.572 + 2690 298.69819 102.55882 -3418.6273 -2974.3363 -144.09982 11342.423 + 2700 298.56462 102.53998 -3417.9995 -2973.9072 -131.29953 11342.271 + 2710 300.36177 102.56644 -3418.8813 -2972.1159 -148.75542 11342.116 + 2720 303.05628 102.57891 -3419.297 -2968.5237 -166.08469 11341.959 + 2730 304.31746 102.62475 -3420.825 -2968.1758 -184.53772 11341.798 + 2740 303.30231 102.58845 -3419.615 -2968.4758 -152.58699 11341.634 + 2750 300.88876 102.55454 -3418.4847 -2970.9355 -107.57012 11341.467 + 2760 297.47918 102.53312 -3417.7708 -2975.2931 -56.252079 11341.298 + 2770 293.03307 102.42885 -3414.295 -2978.4305 36.263572 11341.129 + 2780 288.40268 102.36122 -3412.0408 -2983.0636 114.05528 11340.96 + 2790 285.56689 102.30512 -3410.1707 -2985.4116 168.78308 11340.793 + 2800 286.03931 102.31912 -3410.6373 -2985.1754 164.72078 11340.629 + 2810 289.36719 102.3765 -3412.5499 -2982.1381 114.49221 11340.468 + 2820 294.12351 102.45166 -3415.0553 -2977.5689 38.242625 11340.309 + 2830 299.93982 102.51538 -3417.1795 -2971.0417 -47.538089 11340.151 + 2840 306.52231 102.61846 -3420.6155 -2964.6868 -154.04342 11339.993 + 2850 311.8299 102.71002 -3423.6674 -2959.8441 -239.35759 11339.831 + 2860 313.16451 102.70148 -3423.3826 -2957.5742 -245.17137 11339.665 + 2870 309.66773 102.68618 -3422.8725 -2962.2653 -200.14279 11339.495 + 2880 303.04179 102.55611 -3418.537 -2967.7853 -78.388837 11339.32 + 2890 296.09023 102.47255 -3415.7518 -2975.34 31.562303 11339.145 + 2900 290.70577 102.34593 -3411.5311 -2979.1283 143.13209 11338.97 + 2910 287.87699 102.27794 -3409.2646 -2981.0694 205.23091 11338.797 + 2920 288.5156 102.24294 -3408.0979 -2978.9528 218.49033 11338.628 + 2930 293.42282 102.31627 -3410.5423 -2974.0981 149.60588 11338.464 + 2940 301.23155 102.44568 -3414.8559 -2966.7968 31.452595 11338.302 + 2950 308.28679 102.53999 -3417.9998 -2959.4465 -71.450294 11338.14 + 2960 311.90297 102.59377 -3419.7925 -2955.8604 -133.20873 11337.978 + 2970 311.90753 102.61216 -3420.4055 -2956.4667 -152.07015 11337.812 + 2980 309.52021 102.55066 -3418.3552 -2957.9674 -119.36033 11337.645 + 2990 306.24786 102.53092 -3417.6973 -2962.1769 -91.93941 11337.475 + 3000 303.332 102.47338 -3415.7792 -2964.5958 -49.330701 11337.303 + 3010 301.77545 102.45228 -3415.0762 -2966.208 -28.857235 11337.13 + 3020 302.30004 102.51026 -3417.0085 -2967.3601 -51.534897 11336.957 + 3030 304.17544 102.53757 -3417.9189 -2965.4809 -68.897987 11336.783 + 3040 304.91398 102.5752 -3419.1732 -2965.6367 -78.661065 11336.607 + 3050 302.17194 102.56359 -3418.7863 -2969.3285 -40.479311 11336.43 + 3060 295.33808 102.44622 -3414.8741 -2975.5811 67.550001 11336.253 + 3070 287.67745 102.30843 -3410.281 -2982.3826 183.72816 11336.076 + 3080 285.34182 102.25583 -3408.5278 -2984.1035 220.67815 11335.903 + 3090 291.27355 102.31712 -3410.5705 -2977.3232 147.76789 11335.735 + 3100 301.73339 102.51056 -3417.0188 -2968.2132 -8.5041565 11335.568 + 3110 309.85683 102.64885 -3421.6282 -2960.7396 -120.10423 11335.402 + 3120 311.55409 102.66934 -3422.3115 -2958.8984 -128.86291 11335.233 + 3130 307.3327 102.59945 -3419.9817 -2962.8476 -47.617285 11335.063 + 3140 299.92758 102.48502 -3416.1675 -2970.0479 80.605277 11334.891 + 3150 292.93367 102.3692 -3412.3066 -2976.59 204.67622 11334.721 + 3160 289.72621 102.34257 -3411.4191 -2980.4733 256.81694 11334.555 + 3170 291.67069 102.35894 -3411.9647 -2978.1267 239.1892 11334.393 + 3180 297.52865 102.46647 -3415.5491 -2972.9978 140.31814 11334.236 + 3190 305.25157 102.59851 -3419.9503 -2965.9117 4.6483503 11334.081 + 3200 313.04988 102.69231 -3423.077 -2957.439 -122.70098 11333.927 + 3210 318.97432 102.78136 -3426.0455 -2951.5954 -229.92268 11333.77 + 3220 321.20355 102.80998 -3426.9992 -2949.2333 -274.19805 11333.609 + 3230 319.32049 102.76394 -3425.4647 -2950.4997 -244.22576 11333.443 + 3240 314.68691 102.69567 -3423.1889 -2955.116 -170.83257 11333.273 + 3250 309.49262 102.61105 -3420.3684 -2960.0216 -76.170797 11333.1 + 3260 304.59025 102.52707 -3417.569 -2964.5141 23.655499 11332.925 + 3270 298.17868 102.40837 -3413.6123 -2970.0941 150.0034 11332.75 + 3280 289.89012 102.28882 -3409.6275 -2978.4379 284.14697 11332.579 + 3290 284.30126 102.2261 -3407.5368 -2984.6603 366.95736 11332.413 + 3300 286.09745 102.25218 -3408.4061 -2982.8579 353.68123 11332.253 + 3310 293.33392 102.37673 -3412.5576 -2976.2457 256.19547 11332.1 + 3320 301.0149 102.49231 -3416.4103 -2968.6735 155.09674 11331.952 + 3330 307.30122 102.61033 -3420.3444 -2963.2571 62.440701 11331.807 + 3340 312.53092 102.65711 -3421.9037 -2957.0377 9.2485959 11331.663 + 3350 315.64491 102.66766 -3422.2553 -2952.7574 -8.9635528 11331.519 + 3360 315.15776 102.64301 -3421.4338 -2952.6605 16.901657 11331.376 + 3370 311.24667 102.58588 -3419.5292 -2956.5734 81.827464 11331.232 + 3380 305.99482 102.54774 -3418.2579 -2963.1138 145.93822 11331.09 + 3390 302.75439 102.46151 -3415.3836 -2965.0594 209.37224 11330.951 + 3400 303.00349 102.39698 -3413.2328 -2962.5381 231.93254 11330.816 + 3410 305.5257 102.442 -3414.7334 -2960.2871 190.476 11330.685 + 3420 307.81516 102.4637 -3415.4565 -2957.6048 157.58587 11330.558 + 3430 308.4995 102.48069 -3416.0229 -2957.1533 141.19165 11330.434 + 3440 307.28043 102.4766 -3415.8866 -2958.8302 153.55063 11330.313 + 3450 304.08687 102.41482 -3413.8274 -2961.5213 207.93895 11330.194 + 3460 299.65439 102.33421 -3411.1403 -2965.4271 277.2275 11330.08 + 3470 296.32907 102.27835 -3409.2782 -2968.5112 322.25086 11329.97 + 3480 296.47373 102.33853 -3411.2843 -2970.3021 287.42311 11329.867 + 3490 301.11216 102.41975 -3413.9916 -2966.1101 205.21241 11329.769 + 3500 308.79241 102.52367 -3417.4556 -2958.1503 91.741468 11329.674 + 3510 313.82024 112.62165 -3420.7216 -2953.9378 68.070844 11329.582 + 3520 312.17975 112.59278 -3419.7593 -2955.4156 99.345939 11329.492 + 3530 304.28932 112.50266 -3416.7555 -2964.1482 218.12897 11329.403 + 3540 293.52077 112.33699 -3411.233 -2974.6431 397.39315 11329.319 + 3550 284.16324 112.16362 -3405.4541 -2982.7828 570.65082 11329.242 + 3560 278.59083 112.06147 -3402.0489 -2987.6661 682.09863 11329.176 + 3570 276.87088 112.01565 -3400.5217 -2988.6972 730.14542 11329.123 + 3580 278.19722 112.04419 -3401.4731 -2987.6759 711.27968 11329.083 + 3590 281.91007 112.08873 -3402.9577 -2983.6378 655.25628 11329.057 + 3600 287.41302 112.22629 -3407.5431 -2980.038 543.23683 11329.043 + 3610 293.04417 112.31863 -3410.6209 -2974.7399 446.56273 11329.039 + 3620 297.26272 112.35175 -3411.7251 -2969.5693 386.12831 11329.044 + 3630 300.17716 112.38288 -3412.7626 -2966.2718 342.09224 11329.056 + 3640 302.23929 112.38388 -3412.796 -2963.238 321.74396 11329.075 + 3650 303.26631 112.41101 -3413.7004 -2962.6147 303.98938 11329.1 + 3660 302.92126 112.37573 -3412.5243 -2961.9519 321.75911 11329.13 + 3670 301.78497 112.34303 -3411.4342 -2962.552 344.33169 11329.167 + 3680 300.06579 112.33224 -3411.0748 -2964.7497 361.96582 11329.21 + 3690 297.42058 112.32177 -3410.7256 -2968.335 386.32968 11329.261 + 3700 294.17479 112.22625 -3407.5416 -2969.9788 449.07447 11329.318 + 3710 291.36041 112.20504 -3406.8348 -2973.4583 482.40403 11329.384 + 3720 289.22388 112.15171 -3405.0572 -2974.8586 525.75427 11329.46 + 3730 287.40035 112.1273 -3404.2435 -2976.7572 557.519 11329.545 + 3740 286.43141 112.1289 -3404.2967 -2978.2517 573.33026 11329.641 + 3750 287.25887 112.11706 -3403.9021 -2976.6263 580.77656 11329.747 + 3760 289.37209 112.13446 -3404.4819 -2974.0629 567.39293 11329.865 + 3770 290.97106 112.16693 -3405.5643 -2972.7669 552.87407 11329.993 + 3780 290.79361 112.17361 -3405.7869 -2973.2534 561.15697 11330.132 + 3790 289.6584 112.15243 -3405.0811 -2974.2362 583.38373 11330.282 + 3800 289.81831 112.16476 -3405.492 -2974.4092 576.60121 11330.443 + 3810 292.48719 112.1913 -3406.3766 -2971.3241 539.19837 11330.615 + 3820 296.99463 112.2721 -3409.0701 -2967.3131 465.89889 11330.797 + 3830 301.10814 112.33452 -3411.1507 -2963.2752 407.18977 11330.988 + 3840 302.52711 112.34397 -3411.4657 -2961.4795 397.2451 11331.187 + 3850 300.48194 112.33889 -3411.2964 -2964.3523 426.34028 11331.393 + 3860 296.07037 112.24306 -3408.1022 -2967.7199 510.06753 11331.608 + 3870 291.71471 112.20744 -3406.9145 -2973.011 562.96593 11331.833 + 3880 289.931 112.15163 -3405.0543 -2973.8039 589.98936 11332.068 + 3890 291.48251 112.15539 -3405.1797 -2971.6215 552.76938 11332.315 + 3900 295.40157 112.25383 -3408.4609 -2969.0734 451.50877 11332.572 + 3910 299.66071 112.33031 -3411.0104 -2965.2878 356.16668 11332.838 + 3920 302.09191 112.36433 -3412.1445 -2962.8056 301.68043 11333.11 + 3930 300.91963 112.33418 -3411.1394 -2963.5442 314.52609 11333.389 + 3940 296.44979 112.25295 -3408.4315 -2967.4849 385.79232 11333.674 + 3950 291.35954 112.14567 -3404.8556 -2971.4803 481.31446 11333.966 + 3960 287.77794 112.10707 -3403.5689 -2975.521 541.92103 11334.268 + 3970 285.78596 112.07518 -3402.5062 -2977.4212 584.71511 11334.58 + 3980 284.88927 112.07729 -3402.5763 -2978.825 600.06986 11334.903 + 3990 285.23665 112.09618 -3403.2058 -2978.9379 591.28151 11335.237 + 4000 287.71309 112.07949 -3402.6497 -2974.6982 573.42617 11335.583 + 4010 292.18637 112.16707 -3405.569 -2970.9639 494.20949 11335.94 + 4020 297.25971 112.2449 -3408.1634 -2966.0121 412.26903 11336.306 + 4030 300.6334 112.27855 -3409.2851 -2962.1157 363.51421 11336.68 + 4040 300.39711 112.27161 -3409.0537 -2962.2357 363.09176 11337.062 + 4050 296.85456 112.20546 -3406.8486 -2965.2999 414.89498 11337.451 + 4060 292.44322 112.13275 -3404.4251 -2969.4379 479.30074 11337.847 + 4070 289.31292 112.05786 -3401.9285 -2971.5975 536.28198 11338.253 + 4080 288.22728 112.04471 -3401.4902 -2972.774 552.87886 11338.67 + 4090 288.96998 112.05017 -3401.6724 -2971.8515 548.64672 11339.097 + 4100 290.02657 112.09236 -3403.0786 -2971.686 527.71502 11339.534 + 4110 290.0294 112.09717 -3403.2389 -2971.8422 526.85667 11339.982 + 4120 289.71383 112.1203 -3404.0099 -2973.0825 518.01045 11340.44 + 4130 290.72904 112.15373 -3405.1243 -2972.6868 489.42752 11340.908 + 4140 293.57015 112.19006 -3406.3355 -2969.6721 439.64349 11341.385 + 4150 297.00834 112.22039 -3407.3462 -2965.5688 383.85759 11341.871 + 4160 299.0427 112.26669 -3408.8897 -2964.0863 331.12884 11342.365 + 4170 299.04358 112.2474 -3408.2465 -2963.4418 321.19393 11342.865 + 4180 298.08281 112.23567 -3407.8556 -2964.48 318.60805 11343.372 + 4190 297.56479 112.27088 -3409.0293 -2966.4242 297.26633 11343.885 + 4200 297.74424 112.29405 -3409.8018 -2966.9298 277.40905 11344.404 + 4210 297.77619 112.27922 -3409.3073 -2966.3878 275.10654 11344.928 + 4220 296.84391 112.2325 -3407.75 -2966.2172 293.82591 11345.458 + 4230 295.45499 112.1979 -3406.5968 -2967.1298 313.32611 11345.994 + 4240 294.90025 112.19295 -3406.4318 -2967.79 320.64488 11346.536 + 4250 294.33896 112.15246 -3405.0821 -2967.2752 347.61266 11347.085 + 4260 292.50339 112.10645 -3403.5482 -2968.4716 390.73716 11347.64 + 4270 290.30042 112.08588 -3402.8626 -2971.0627 429.04038 11348.203 + 4280 289.44734 112.07595 -3402.5317 -2972.0007 452.41154 11348.774 + 4290 290.27173 112.05848 -3401.9492 -2970.192 460.77553 11349.355 + 4300 292.06583 112.06505 -3402.1684 -2967.7426 444.89069 11349.944 + 4310 293.74496 112.1525 -3405.0833 -2968.1599 390.53476 11350.542 + 4320 294.41598 112.15205 -3405.0684 -2967.147 369.76308 11351.147 + 4330 294.3844 112.12406 -3404.1354 -2966.2609 358.56018 11351.76 + 4340 294.49418 112.12602 -3404.2005 -2966.1628 330.56658 11352.381 + 4350 294.94774 112.10215 -3403.405 -2964.6926 309.28604 11353.007 + 4360 295.27813 112.08407 -3402.8023 -2963.5984 287.43987 11353.64 + 4370 295.75524 112.14334 -3404.7779 -2964.8644 238.28923 11354.279 + 4380 297.03367 112.145 -3404.8333 -2963.0182 210.20054 11354.923 + 4390 298.97933 112.1735 -3405.7833 -2961.0742 169.91228 11355.571 + 4400 300.74402 112.1871 -3406.2366 -2958.9027 139.09782 11356.223 + 4410 301.58368 112.20496 -3406.8319 -2958.249 116.24921 11356.878 + 4420 301.37308 112.21319 -3407.1064 -2958.8368 106.92515 11357.535 + 4430 300.95878 112.19826 -3406.6087 -2958.9553 110.27193 11358.195 + 4440 301.05389 112.1998 -3406.6599 -2958.8651 106.89495 11358.857 + 4450 301.01024 112.15564 -3405.1881 -2957.4582 125.31948 11359.522 + 4460 300.15177 112.12548 -3404.1827 -2957.7297 146.28156 11360.189 + 4470 298.41347 112.08862 -3402.954 -2959.0866 176.11859 11360.86 + 4480 295.78779 112.04882 -3401.6275 -2961.6655 208.13775 11361.534 + 4490 293.99068 112.03472 -3401.1572 -2963.8683 215.77962 11362.213 + 4500 295.14988 112.07779 -3402.5929 -2963.5798 173.91619 11362.896 + 4510 298.46042 112.14087 -3404.6956 -2960.7584 107.34651 11363.583 + 4520 300.52531 112.14764 -3404.9214 -2957.9128 76.720645 11364.272 + 4530 299.04629 112.13473 -3404.4911 -2959.6824 90.465654 11364.963 + 4540 294.70636 112.07417 -3402.4725 -2964.1191 154.70066 11365.657 + 4550 289.8278 111.98502 -3399.5006 -2968.4037 238.15365 11366.354 + 4560 287.06081 111.92327 -3397.4423 -2970.4611 293.49845 11367.055 + 4570 287.17919 111.87809 -3395.9363 -2968.7791 313.34189 11367.763 + 4580 289.62792 111.9268 -3397.56 -2966.7604 270.18472 11368.477 + 4590 294.12786 112.01356 -3400.4522 -2962.9592 190.50138 11369.196 + 4600 299.82229 112.06503 -3402.1676 -2956.2047 114.51482 11369.919 + 4610 304.6382 112.14907 -3404.9691 -2951.8429 38.086885 11370.645 + 4620 307.00205 112.14884 -3404.9614 -2948.3191 19.268197 11371.373 + 4630 307.60392 112.13892 -3404.6308 -2947.0933 22.477149 11372.101 + 4640 308.3034 112.12276 -3404.0919 -2945.5139 26.859602 11372.829 + 4650 310.183 112.15394 -3405.1313 -2943.7576 -0.66225589 11373.559 + 4660 312.20571 112.18362 -3406.1208 -2941.7384 -34.675836 11374.29 + 4670 312.19174 112.18718 -3406.2395 -2941.8779 -47.79275 11375.02 + 4680 309.32117 112.15892 -3405.2975 -2945.2057 -27.792753 11375.749 + 4690 304.59351 112.09195 -3403.0651 -2950.0054 24.410403 11376.479 + 4700 299.72789 112.02282 -3400.7608 -2954.9383 83.204669 11377.21 + 4710 296.49159 111.96525 -3398.8415 -2957.8327 126.98296 11377.942 + 4720 296.3322 111.95105 -3398.3684 -2957.5968 130.30677 11378.677 + 4730 298.77269 111.98412 -3399.4706 -2955.0689 92.09603 11379.416 + 4740 301.85841 112.00426 -3400.1421 -2951.1506 48.80207 11380.156 + 4750 304.00922 112.04034 -3401.3446 -2949.1539 4.0719572 11380.897 + 4760 304.23108 111.99696 -3399.8988 -2947.3781 6.398444 11381.64 + 4770 301.96767 111.95167 -3398.3889 -2949.2349 33.686842 11382.382 + 4780 297.8986 111.87638 -3395.8794 -2952.7778 91.337441 11383.126 + 4790 293.94296 111.79523 -3393.1744 -2955.9565 152.72218 11383.873 + 4800 292.34822 111.7968 -3393.2268 -2958.3809 161.50646 11384.622 + 4810 294.61697 111.81922 -3393.9739 -2955.7535 126.77343 11385.375 + 4820 299.61819 111.89887 -3396.629 -2950.9697 44.321826 11386.131 + 4830 303.95004 112.00688 -3400.2294 -2948.1267 -44.597588 11386.888 + 4840 305.18896 112.02779 -3400.9263 -2946.9809 -73.576983 11387.644 + 4850 303.82769 112.00153 -3400.0509 -2948.1302 -58.862803 11388.4 + 4860 301.68863 111.98605 -3399.535 -2950.796 -36.669954 11389.155 + 4870 299.99992 111.92287 -3397.4291 -2951.202 2.5995097 11389.91 + 4880 299.05999 111.93139 -3397.713 -2952.884 8.2212876 11390.665 + 4890 298.44802 111.89024 -3396.3414 -2952.4226 26.370874 11391.421 + 4900 298.32652 111.86114 -3395.3712 -2951.6331 29.917068 11392.178 + 4910 299.53022 111.90937 -3396.9789 -2951.4504 -13.27231 11392.936 + 4920 301.92773 111.93806 -3397.9354 -2948.8408 -63.283119 11393.694 + 4930 303.49394 111.94498 -3398.166 -2946.7418 -100.84788 11394.451 + 4940 302.55359 111.92991 -3397.6638 -2947.6383 -110.70989 11395.207 + 4950 299.85877 111.9172 -3397.24 -2951.2228 -104.7902 11395.961 + 4960 296.98274 111.88576 -3396.192 -2954.4527 -85.285838 11396.714 + 4970 294.914 111.8836 -3396.1199 -2957.4577 -76.679334 11397.466 + 4980 293.61147 111.88637 -3396.2122 -2959.4874 -71.712277 11398.216 + 4990 292.91902 111.86375 -3395.4585 -2959.7636 -59.937667 11398.966 + 5000 293.0855 111.87124 -3395.7079 -2959.7654 -67.018003 11399.715 + 5010 294.598 111.83789 -3394.5965 -2956.4043 -71.117596 11400.463 + 5020 297.34325 111.85271 -3395.0903 -2952.8147 -107.42432 11401.211 + 5030 301.11793 111.88856 -3396.2855 -2948.3954 -162.94314 11401.956 + 5040 305.64741 111.99614 -3399.8714 -2945.2441 -252.13638 11402.699 + 5050 309.80654 112.03054 -3401.018 -2940.2042 -306.34653 11403.438 + 5060 311.28106 112.01626 -3400.542 -2937.535 -316.7573 11404.171 + 5070 309.33998 112.00428 -3400.1428 -2940.023 -297.50862 11404.899 + 5080 305.72309 111.99606 -3399.8685 -2945.1286 -263.59958 11405.622 + 5090 303.02725 111.97962 -3399.3206 -2948.5905 -232.93751 11406.34 + 5100 302.58088 111.94551 -3398.1836 -2948.1175 -214.79018 11407.054 + 5110 303.42424 111.95268 -3398.4225 -2947.102 -226.8692 11407.765 + 5120 303.58972 111.93667 -3397.889 -2946.3223 -230.86674 11408.471 + 5130 302.02765 111.88406 -3396.1354 -2946.8921 -213.99183 11409.174 + 5140 299.59878 111.82101 -3394.0336 -2948.4032 -194.8338 11409.873 + 5150 299.00485 111.80817 -3393.6057 -2948.8586 -215.24991 11410.568 + 5160 301.56809 111.87166 -3395.7221 -2947.1624 -288.39273 11411.26 + 5170 305.06087 111.97829 -3399.2763 -2945.5214 -378.74891 11411.947 + 5180 306.60043 111.93001 -3397.6671 -2941.6222 -383.23623 11412.627 + 5190 305.33426 111.90609 -3396.8695 -2942.708 -361.27186 11413.3 + 5200 302.00186 111.85969 -3395.3231 -2946.1182 -305.62771 11413.967 + 5210 297.85982 111.78411 -3392.8036 -2949.7597 -231.48813 11414.628 + 5220 294.40813 111.72016 -3390.672 -2952.7623 -175.28359 11415.286 + 5230 293.45936 111.70976 -3390.3255 -2953.8269 -169.86767 11415.94 + 5240 294.94525 111.72396 -3390.7988 -2952.0901 -202.8398 11416.591 + 5250 296.95868 111.76268 -3392.0893 -2950.3858 -256.42496 11417.239 + 5260 298.36144 111.77539 -3392.5131 -2948.7231 -297.67284 11417.883 + 5270 299.9619 111.77962 -3392.6541 -2946.4835 -332.96203 11418.521 + 5280 302.74579 111.79555 -3393.1851 -2942.8737 -373.52615 11419.153 + 5290 306.17905 111.83022 -3394.3406 -2938.9225 -416.81553 11419.779 + 5300 308.44521 111.90078 -3396.6926 -2937.9037 -455.80177 11420.397 + 5310 308.02982 111.90703 -3396.901 -2938.7299 -443.42372 11421.007 + 5320 305.00786 111.89358 -3396.4528 -2942.7767 -404.21095 11421.609 + 5330 301.5211 111.8173 -3393.91 -2945.4202 -344.16095 11422.203 + 5340 300.35286 111.76781 -3392.2602 -2945.5081 -323.17737 11422.791 + 5350 302.77611 111.77811 -3392.6036 -2942.2471 -363.37731 11423.373 + 5360 307.38336 111.83654 -3394.5514 -2937.342 -444.85145 11423.949 + 5370 311.21332 111.86627 -3395.5423 -2932.6361 -508.32271 11424.516 + 5380 311.78544 111.88912 -3396.304 -2932.5468 -538.29044 11425.075 + 5390 309.1498 111.86268 -3395.4225 -2935.5856 -517.04187 11425.623 + 5400 305.55347 111.78873 -3392.9576 -2938.4699 -464.74116 11426.162 + 5410 303.37076 111.69893 -3389.9643 -2938.7233 -413.66713 11426.692 + 5420 302.99892 111.71819 -3390.6063 -2939.9184 -415.6796 11427.215 + 5430 302.93829 111.71505 -3390.5018 -2939.9041 -410.70129 11427.729 + 5440 301.83468 111.69855 -3389.9516 -2940.9954 -394.29474 11428.237 + 5450 300.37836 111.66835 -3388.9449 -2942.1549 -372.81769 11428.737 + 5460 300.69038 111.70302 -3390.1007 -2942.8466 -393.08972 11429.23 + 5470 304.0612 111.74126 -3391.3754 -2939.1074 -437.40674 11429.716 + 5480 308.43934 111.79223 -3393.0743 -2934.2941 -489.30054 11430.193 + 5490 309.75954 111.8161 -3393.87 -2933.1262 -500.33525 11430.662 + 5500 306.15731 111.8015 -3393.3834 -2937.9976 -453.93342 11431.122 + 5510 300.42212 111.68054 -3389.3514 -2942.4963 -349.51515 11431.573 + 5520 297.01716 111.62611 -3387.5371 -2945.7466 -291.47271 11432.018 + 5530 297.60665 111.61433 -3387.1442 -2944.4769 -287.56086 11432.457 + 5540 300.57981 111.66586 -3388.862 -2941.7723 -334.95585 11432.891 + 5550 303.7393 111.70069 -3390.0229 -2938.2337 -384.40511 11433.319 + 5560 306.26661 111.74871 -3391.6235 -2936.0751 -437.49847 11433.74 + 5570 307.94817 111.77873 -3392.6242 -2934.5746 -475.4377 11434.153 + 5580 307.71307 111.76907 -3392.3024 -2934.6025 -479.47797 11434.557 + 5590 305.06207 111.75874 -3391.958 -2938.2013 -460.12167 11434.952 + 5600 300.86617 111.72273 -3390.7578 -2943.2421 -413.36436 11435.339 + 5610 296.50669 111.67621 -3389.2071 -2948.1759 -357.61827 11435.719 + 5620 293.5616 111.57439 -3385.813 -2949.1624 -292.15997 11436.092 + 5630 293.25354 111.5716 -3385.7202 -2949.5278 -291.89447 11436.459 + 5640 296.58426 111.58969 -3386.3229 -2945.1763 -337.65611 11436.821 + 5650 303.1078 111.70033 -3390.0111 -2939.1612 -451.24854 11437.177 + 5660 309.83897 111.81052 -3393.6842 -2932.8222 -569.53122 11437.524 + 5670 313.79402 111.8983 -3396.6098 -2929.865 -655.5822 11437.861 + 5680 314.57764 111.87411 -3395.8036 -2927.8932 -666.86078 11438.185 + 5690 313.46961 111.86976 -3395.6586 -2929.3963 -659.31727 11438.497 + 5700 311.4673 111.87825 -3395.9418 -2932.6578 -637.62795 11438.796 + 5710 308.81075 111.82075 -3394.0248 -2934.6923 -574.02683 11439.084 + 5720 304.71425 111.77088 -3392.3627 -2939.1233 -496.85943 11439.36 + 5730 299.18305 111.65393 -3388.4645 -2943.4524 -386.4782 11439.628 + 5740 294.33467 111.5918 -3386.3933 -2948.5928 -312.03292 11439.888 + 5750 292.33312 111.56909 -3385.6364 -2950.8131 -288.59554 11440.142 + 5760 293.2352 111.54724 -3384.908 -2948.7429 -302.20245 11440.391 + 5770 296.01439 111.58412 -3386.1375 -2945.8385 -362.20936 11440.634 + 5780 299.57634 111.62222 -3387.4073 -2941.8102 -430.44687 11440.87 + 5790 302.2015 111.65652 -3388.5507 -2939.0488 -484.56535 11441.099 + 5800 302.31851 111.66891 -3388.9636 -2939.2877 -501.87858 11441.318 + 5810 300.15059 111.67096 -3389.0318 -2942.5806 -487.91527 11441.528 + 5820 297.84134 111.64312 -3388.104 -2945.0876 -452.51112 11441.729 + 5830 296.96172 111.63115 -3387.705 -2945.9969 -426.94079 11441.921 + 5840 296.53664 111.64246 -3388.0822 -2947.0064 -411.66903 11442.105 + 5850 295.68822 111.59941 -3386.6472 -2946.8333 -376.21638 11442.282 + 5860 295.89206 111.61718 -3387.2393 -2947.1223 -380.953 11442.451 + 5870 299.3712 111.64594 -3388.198 -2942.906 -425.68545 11442.613 + 5880 305.7684 111.7509 -3391.6967 -2936.8894 -532.12832 11442.767 + 5890 312.13982 111.9108 -3397.0268 -2932.7425 -665.30456 11442.911 + 5900 315.93456 112.00815 -3400.2717 -2930.343 -753.56358 11443.043 + 5910 316.80268 112.02918 -3400.9726 -2929.7526 -782.40538 11443.16 + 5920 315.39944 112.02431 -3400.8103 -2931.6775 -770.99043 11443.262 + 5930 312.24109 111.93102 -3397.7008 -2933.2658 -698.78562 11443.35 + 5940 308.55707 111.81733 -3393.9108 -2934.9556 -606.16532 11443.424 + 5950 306.29304 111.78658 -3392.886 -2937.2983 -554.71195 11443.487 + 5960 306.2832 111.79328 -3393.1093 -2937.5363 -538.09897 11443.539 + 5970 307.30185 111.82107 -3394.0358 -2936.9476 -543.40687 11443.581 + 5980 307.6486 111.83772 -3394.5906 -2936.9866 -544.03194 11443.613 + 5990 306.45289 111.76567 -3392.1889 -2936.3634 -501.60286 11443.634 + 6000 304.69067 111.75913 -3391.9711 -2938.7668 -477.72322 11443.646 + 6010 303.41492 111.68577 -3389.5255 -2938.2188 -428.48886 11443.649 + 6020 301.89535 111.65184 -3388.3945 -2939.3481 -389.86777 11443.643 + 6030 298.63654 111.64033 -3388.011 -2943.8117 -348.20631 11443.63 + 6040 294.47919 111.57044 -3385.6813 -2947.6658 -283.83188 11443.61 + 6050 292.52809 111.54906 -3384.9686 -2949.8552 -265.08019 11443.585 + 6060 294.92215 111.588 -3386.2668 -2947.5924 -315.3257 11443.555 + 6070 300.73863 111.68378 -3389.4592 -2942.1333 -424.20854 11443.518 + 6080 306.92649 111.77783 -3392.5944 -2936.0645 -540.38385 11443.473 + 6090 310.90826 111.8393 -3394.6433 -2932.1908 -624.50414 11443.418 + 6100 311.80005 111.79014 -3393.0048 -2929.2259 -633.56098 11443.351 + 6110 310.3043 111.74081 -3391.3603 -2929.8062 -611.76548 11443.273 + 6120 308.07003 111.68717 -3389.5723 -2931.3415 -570.29181 11443.182 + 6130 305.36747 111.6771 -3389.2367 -2935.0257 -533.05282 11443.08 + 6140 302.37094 111.56741 -3385.5804 -2935.8265 -453.07064 11442.969 + 6150 301.2837 111.57241 -3385.7469 -2937.6103 -434.8878 11442.848 + 6160 303.51697 111.57727 -3385.909 -2934.4505 -447.9957 11442.719 + 6170 307.90762 111.66138 -3388.7128 -2930.7236 -511.40408 11442.581 + 6180 311.36513 111.73009 -3391.003 -2927.871 -562.48856 11442.434 + 6190 311.11046 111.77654 -3392.5513 -2929.7981 -576.0018 11442.276 + 6200 307.36579 111.74922 -3391.6406 -2934.4573 -532.82298 11442.106 + 6210 302.85217 111.65394 -3388.4647 -2937.9951 -456.15798 11441.927 + 6220 300.39571 111.6245 -3387.4832 -2940.6674 -420.2194 11441.738 + 6230 301.42682 111.6649 -3388.8299 -2940.4804 -438.20176 11441.541 + 6240 304.81996 111.69329 -3389.7762 -2936.3796 -470.9116 11441.336 + 6250 307.69783 111.75248 -3391.7494 -2934.0722 -513.49632 11441.121 + 6260 307.89364 111.78689 -3392.8962 -2934.9278 -529.39867 11440.897 + 6270 305.70706 111.74618 -3391.5392 -2936.8231 -502.82056 11440.662 + 6280 303.38026 111.73475 -3391.1582 -2939.903 -490.7223 11440.418 + 6290 302.61985 111.72971 -3390.9902 -2940.8661 -494.53635 11440.164 + 6300 302.7084 111.69536 -3389.8455 -2939.5897 -490.38666 11439.901 + 6310 301.36501 111.68857 -3389.6189 -2941.3613 -480.54303 11439.628 + 6320 298.10336 111.63797 -3387.9323 -2944.5262 -433.94487 11439.346 + 6330 295.05851 111.58315 -3386.105 -2947.2278 -384.61619 11439.055 + 6340 294.31978 111.55482 -3385.1608 -2947.3824 -363.45878 11438.757 + 6350 295.68865 111.56538 -3385.5125 -2945.6981 -376.43369 11438.451 + 6360 297.90732 111.59793 -3386.5978 -2943.4832 -409.55125 11438.138 + 6370 300.69878 111.63603 -3387.8678 -2940.6011 -452.87487 11437.818 + 6380 304.10074 111.67247 -3389.0823 -2936.7555 -498.36601 11437.488 + 6390 306.54537 111.71273 -3390.4245 -2934.4615 -532.6716 11437.149 + 6400 306.78573 111.76383 -3392.1276 -2935.807 -547.43007 11436.8 + 6410 305.90297 111.7252 -3390.8399 -2935.8325 -511.46257 11436.44 + 6420 305.75176 111.71851 -3390.6172 -2935.8346 -488.26838 11436.07 + 6430 307.37146 111.73731 -3391.2436 -2934.0519 -487.13144 11435.69 + 6440 310.30993 111.78202 -3392.7339 -2931.1714 -509.03092 11435.301 + 6450 313.13315 111.84854 -3394.9513 -2929.1894 -543.05637 11434.902 + 6460 314.44167 111.87415 -3395.805 -2928.0968 -553.92728 11434.492 + 6470 313.88042 111.85683 -3395.2275 -2928.3542 -535.07568 11434.072 + 6480 311.4787 111.77342 -3392.4475 -2929.1465 -476.20019 11433.641 + 6490 307.77733 111.70798 -3390.2661 -2932.4707 -412.28298 11433.201 + 6500 304.23179 111.6451 -3388.17 -2935.6483 -348.52993 11432.753 + 6510 302.29418 111.60508 -3386.836 -2937.1963 -302.63747 11432.298 + 6520 301.84323 111.58974 -3386.3246 -2937.3556 -273.11907 11431.837 + 6530 301.45687 111.60312 -3386.7708 -2938.3765 -251.0312 11431.37 + 6540 300.04488 111.58367 -3386.1223 -2939.8283 -206.52249 11430.899 + 6550 297.50815 111.54553 -3384.851 -2942.3301 -148.33854 11430.423 + 6560 294.56197 111.50848 -3383.6161 -2945.4775 -95.393541 11429.944 + 6570 292.58997 111.45997 -3381.9991 -2946.7937 -58.303695 11429.463 + 6580 292.42266 111.46899 -3382.2995 -2947.343 -71.266456 11428.981 + 6590 293.77828 111.53084 -3384.3614 -2947.3885 -129.64139 11428.497 + 6600 296.7094 111.57539 -3385.8464 -2944.5136 -201.5605 11428.01 + 6610 301.93077 111.64221 -3388.0736 -2938.9744 -298.40912 11427.52 + 6620 309.07114 111.78449 -3392.8165 -2933.0966 -428.32681 11427.023 + 6630 315.44562 111.91026 -3397.0086 -2927.8071 -529.43373 11426.518 + 6640 318.07732 111.95808 -3398.6025 -2925.4866 -554.34859 11426.003 + 6650 316.04355 111.91246 -3397.0821 -2926.9912 -497.01773 11425.477 + 6660 311.40802 111.83547 -3394.5156 -2931.3198 -405.75476 11424.941 + 6670 307.80464 111.78361 -3392.7868 -2934.9508 -339.39882 11424.397 + 6680 307.93824 111.77419 -3392.4729 -2934.4381 -330.60736 11423.847 + 6690 311.12049 111.83535 -3394.5118 -2931.7437 -381.9866 11423.289 + 6700 313.3959 111.8578 -3395.26 -2929.1073 -415.82786 11422.724 + 6710 311.42766 111.83727 -3394.5755 -2931.3505 -399.07436 11422.151 + 6720 305.06312 111.80273 -3393.4242 -2939.6659 -335.42165 11421.569 + 6730 296.2198 111.68771 -3389.5902 -2948.9857 -214.14945 11420.982 + 6740 287.8049 111.56012 -3385.3373 -2957.2493 -89.231176 11420.389 + 6750 283.20398 111.51947 -3383.9824 -2962.738 -31.2551 11419.795 + 6760 284.8599 111.49977 -3383.3256 -2959.6181 -36.925431 11419.2 + 6770 292.33639 111.57273 -3385.7576 -2950.9294 -129.67617 11418.603 + 6780 302.33249 111.72713 -3390.9045 -2941.2078 -277.30623 11418.004 + 6790 311.12604 111.88943 -3396.3144 -2933.538 -417.77032 11417.399 + 6800 316.4072 111.97367 -3399.1224 -2928.4906 -496.33753 11416.786 + 6810 318.20243 111.96052 -3398.684 -2925.382 -503.70116 11416.163 + 6820 318.12252 111.97536 -3399.1788 -2925.9956 -497.4755 11415.53 + 6830 317.05582 111.98171 -3399.3905 -2927.794 -470.43917 11414.888 + 6840 314.62091 111.89836 -3396.6119 -2928.6371 -390.87097 11414.236 + 6850 310.30033 111.85072 -3395.0239 -2933.4757 -307.50543 11413.577 + 6860 304.90471 111.81674 -3393.8915 -2940.3688 -221.15142 11412.911 + 6870 300.02128 111.70056 -3390.0188 -2943.7599 -111.84164 11412.241 + 6880 297.48866 111.65897 -3388.6323 -2946.1404 -54.820923 11411.568 + 6890 298.32064 111.66939 -3388.9798 -2945.2505 -49.991578 11410.894 + 6900 302.56547 111.69309 -3389.7698 -2939.7266 -80.175542 11410.218 + 6910 308.69898 111.77199 -3392.3996 -2933.2333 -145.07608 11409.541 + 6920 313.64045 111.83114 -3394.3715 -2927.8551 -190.60806 11408.86 + 6930 315.30156 111.85944 -3395.3146 -2926.3274 -195.83001 11408.176 + 6940 313.58225 111.83945 -3394.6484 -2928.2186 -153.29527 11407.487 + 6950 309.47627 111.79163 -3393.0542 -2932.7317 -82.07056 11406.796 + 6960 305.13244 111.70831 -3390.2769 -2936.4155 3.1917716 11406.102 + 6970 301.94189 111.64421 -3388.1403 -2939.0246 69.743776 11405.408 + 6980 299.91222 111.64672 -3388.224 -2942.1273 98.81739 11404.715 + 6990 298.87822 111.61448 -3387.1494 -2942.5907 129.8498 11404.024 + 7000 299.36836 111.63124 -3387.708 -2942.4203 128.86864 11403.334 + 7010 301.16006 111.721 -3390.6999 -2942.7471 90.111629 11402.647 + 7020 302.89773 111.73094 -3391.0313 -2940.4939 83.491142 11401.961 + 7030 303.28406 111.75398 -3391.7992 -2940.6872 85.563131 11401.276 + 7040 301.68437 111.75184 -3391.728 -2942.9954 113.97003 11400.592 + 7050 298.49021 111.66397 -3388.7991 -2944.8175 184.32172 11399.911 + 7060 295.12676 111.60883 -3386.9612 -2947.9825 235.61511 11399.232 + 7070 293.82409 111.56424 -3385.4745 -2948.4334 256.21291 11398.558 + 7080 296.12438 111.61889 -3387.2964 -2946.8339 199.43853 11397.887 + 7090 301.98034 111.74855 -3391.6184 -2942.4455 80.490139 11397.221 + 7100 309.39575 111.88848 -3396.2826 -2936.0798 -53.969342 11396.555 + 7110 315.65648 112.00936 -3400.3119 -2930.7968 -163.65684 11395.888 + 7120 318.78446 112.09801 -3403.2669 -2929.0992 -223.5058 11395.218 + 7130 318.34034 112.09559 -3403.1863 -2929.6792 -207.427 11394.543 + 7140 314.90101 111.99596 -3399.8655 -2931.4741 -121.94178 11393.864 + 7150 310.12154 111.88196 -3396.0653 -2934.783 -20.75413 11393.182 + 7160 307.2451 111.80762 -3393.5873 -2936.5835 42.400763 11392.499 + 7170 308.92331 111.86437 -3395.4789 -2935.9789 9.4887812 11391.817 + 7180 314.63397 111.98047 -3399.3489 -2931.3547 -83.877296 11391.134 + 7190 320.99056 112.11151 -3403.7168 -2926.2677 -187.64519 11390.45 + 7200 324.25215 112.15947 -3405.3158 -2923.0153 -227.25827 11389.761 + 7210 322.17171 112.08697 -3402.899 -2923.693 -168.16759 11389.068 + 7220 315.41633 111.9642 -3398.8068 -2929.6489 -42.688455 11388.372 + 7230 306.75728 111.82565 -3394.1884 -2937.9101 109.49747 11387.675 + 7240 299.75192 111.75024 -3391.6746 -2945.8163 225.107 11386.979 + 7250 296.94242 111.64984 -3388.3279 -2946.6486 314.75258 11386.287 + 7260 298.36756 111.69087 -3389.6956 -2945.8964 311.69208 11385.6 + 7270 301.89974 111.71967 -3390.6556 -2941.6026 288.8049 11384.919 + 7280 305.0806 111.78993 -3392.9975 -2939.2133 245.45556 11384.244 + 7290 306.38781 111.8568 -3395.2265 -2939.4979 212.81786 11383.572 + 7300 305.99737 111.80866 -3393.6219 -2938.474 235.46369 11382.905 + 7310 305.13516 111.80801 -3393.6004 -2939.7349 246.59969 11382.241 + 7320 303.99678 111.83823 -3394.6078 -2942.4356 252.74041 11381.582 + 7330 301.90176 111.8407 -3394.69 -2945.6341 278.5201 11380.927 + 7340 299.63001 111.77717 -3392.5724 -2946.8954 327.23788 11380.277 + 7350 299.43806 111.77587 -3392.5291 -2947.1377 327.73821 11379.633 + 7360 302.65843 111.859 -3395.3002 -2945.1187 260.82029 11378.995 + 7370 308.31051 111.9433 -3398.11 -2939.5215 168.59515 11378.361 + 7380 314.03248 112.02892 -3400.9642 -2933.8646 74.653662 11377.73 + 7390 317.48824 112.08096 -3402.6987 -2930.459 16.566072 11377.101 + 7400 317.73948 112.06396 -3402.1318 -2929.5184 21.699572 11376.471 + 7410 315.24994 112.01311 -3400.437 -2931.5266 76.853241 11375.842 + 7420 310.85429 111.93455 -3397.8184 -2935.4462 171.1985 11375.213 + 7430 305.61873 111.82631 -3394.2104 -2939.6257 290.14486 11374.588 + 7440 301.35723 111.76583 -3392.1944 -2943.9484 379.27336 11373.968 + 7450 300.47146 111.72207 -3390.7356 -2943.807 421.87304 11373.355 + 7460 304.47554 111.8031 -3393.4368 -2940.5525 361.85037 11372.749 + 7470 311.7176 111.93436 -3397.8119 -2934.1556 244.53058 11372.149 + 7480 317.95492 112.05706 -3401.902 -2928.9681 133.12663 11371.554 + 7490 319.59614 112.10332 -3403.444 -2928.069 90.19131 11370.961 + 7500 315.75908 112.02282 -3400.7608 -2931.0931 148.23486 11370.37 + 7510 307.84848 111.87461 -3395.8205 -2937.9192 273.50696 11369.781 + 7520 298.72376 111.7694 -3392.3132 -2947.9842 396.48778 11369.197 + 7530 291.69988 111.71886 -3390.6288 -2956.7473 481.33741 11368.62 + 7540 289.0237 111.69329 -3389.7764 -2959.8755 518.33236 11368.052 + 7550 290.59153 111.72647 -3390.8824 -2958.6495 493.82481 11367.494 + 7560 294.68547 111.80168 -3393.3893 -2955.067 428.51787 11366.944 + 7570 299.70592 111.86492 -3395.4973 -2949.7075 358.31761 11366.402 + 7580 304.67531 111.95905 -3398.6349 -2945.4534 276.31636 11365.867 + 7590 309.00426 112.02692 -3400.8973 -2941.2769 212.86255 11365.336 + 7600 311.46203 112.05986 -3401.9952 -2938.7191 182.85745 11364.81 + 7610 310.80941 112.07659 -3402.553 -2940.2476 187.69412 11364.287 + 7620 307.39673 112.03153 -3401.0511 -2943.8218 237.71094 11363.767 + 7630 304.20667 111.9863 -3399.5434 -2947.059 280.53065 11363.251 + 7640 305.5763 111.96982 -3398.994 -2944.4724 270.01404 11362.74 + 7650 312.61696 112.1191 -3403.97 -2938.9759 146.91524 11362.234 + 7660 321.47271 112.27538 -3409.1794 -2931.0131 7.8785623 11361.73 + 7670 327.09684 112.33475 -3411.1584 -2924.6267 -58.964402 11361.226 + 7680 326.60985 112.35542 -3411.8474 -2926.04 -47.096325 11360.722 + 7690 319.839 112.23966 -3407.9886 -2932.2524 81.078704 11360.216 + 7700 309.3606 112.09366 -3403.1221 -2942.9717 257.80591 11359.711 + 7710 299.65746 111.94887 -3398.2956 -2952.5778 427.70347 11359.211 + 7720 293.94107 111.81144 -3393.7146 -2956.4996 554.34189 11358.719 + 7730 292.23183 111.76832 -3392.2774 -2957.6047 599.53259 11358.237 + 7740 292.71097 111.77349 -3392.4495 -2957.0642 595.07786 11357.766 + 7750 294.20634 111.82718 -3394.2393 -2956.6297 553.20449 11357.306 + 7760 296.55543 111.83991 -3394.6636 -2953.5598 516.99198 11356.856 + 7770 298.9295 111.88482 -3396.1606 -2951.5256 470.59494 11356.415 + 7780 300.43407 111.89968 -3396.6561 -2949.7832 447.8261 11355.983 + 7790 301.07367 111.93184 -3397.7281 -2949.9038 431.71753 11355.56 + 7800 300.78232 111.92888 -3397.6295 -2950.2386 440.63834 11355.144 + 7810 299.97033 111.90545 -3396.8482 -2950.6651 462.26893 11354.737 + 7820 300.12659 111.93765 -3397.9216 -2951.5061 451.95121 11354.338 + 7830 302.32759 112.00404 -3400.1348 -2950.4454 407.00272 11353.947 + 7840 305.83685 112.06482 -3402.1607 -2947.2516 348.55388 11353.564 + 7850 308.73688 112.08807 -3402.9358 -2943.7131 303.75437 11353.187 + 7860 310.26315 112.10836 -3403.6121 -2942.1192 268.22854 11352.816 + 7870 310.63418 112.10157 -3403.3858 -2941.341 251.69536 11352.449 + 7880 310.41189 112.10082 -3403.3606 -2941.6464 241.51835 11352.088 + 7890 309.39052 112.09866 -3403.2885 -2943.0936 246.42901 11351.73 + 7900 306.59047 112.06698 -3402.2326 -2946.2025 287.86462 11351.377 + 7910 301.63054 111.94804 -3398.268 -2949.6155 389.05512 11351.03 + 7920 295.95642 111.84481 -3394.8271 -2954.6144 496.59032 11350.689 + 7930 291.22766 111.82041 -3394.0137 -2960.8346 568.74095 11350.358 + 7940 288.59834 111.81126 -3393.7088 -2964.4407 616.2564 11350.037 + 7950 288.72142 111.81623 -3393.8743 -2964.4231 630.30495 11349.728 + 7960 291.35083 111.91427 -3397.1423 -2963.78 580.24297 11349.43 + 7970 295.27853 111.95913 -3398.6375 -2959.4331 531.0658 11349.143 + 7980 299.70843 112.04406 -3401.4688 -2955.6752 453.09907 11348.866 + 7990 305.11528 112.1021 -3403.4034 -2949.5675 371.78987 11348.597 + 8000 311.28944 112.2009 -3406.6966 -2943.6772 268.88617 11348.336 + 8010 315.83904 112.30443 -3410.1478 -2940.3611 182.39209 11348.079 + 8020 316.16005 112.34148 -3411.3825 -2941.1184 163.23489 11347.826 + 8030 311.76679 112.29192 -3409.7308 -2946.0013 221.72591 11347.575 + 8040 304.7453 112.19447 -3406.4822 -2953.1967 323.23576 11347.329 + 8050 298.24403 112.11745 -3403.915 -2960.2996 410.31922 11347.089 + 8060 295.07606 112.06825 -3402.2749 -2963.3716 455.60842 11346.857 + 8070 295.79218 112.07203 -3402.4009 -2962.4325 444.55961 11346.633 + 8080 298.61892 112.13864 -3404.6212 -2960.4482 389.17131 11346.417 + 8090 300.76078 112.17027 -3405.6757 -2958.3169 353.46777 11346.209 + 8100 299.93421 112.1681 -3405.6032 -2959.4738 356.79664 11346.007 + 8110 295.99276 112.12178 -3404.0594 -2963.7926 404.20804 11345.812 + 8120 290.78013 112.07382 -3402.4607 -2969.9473 462.15621 11345.624 + 8130 286.96565 111.98196 -3399.3988 -2972.5591 520.62664 11345.446 + 8140 287.23986 111.98572 -3399.5241 -2972.2766 501.20178 11345.276 + 8150 292.93428 112.07983 -3402.6611 -2966.9435 396.85987 11345.116 + 8160 301.96698 112.24569 -3408.1897 -2959.0367 236.60381 11344.963 + 8170 309.5513 112.33249 -3411.083 -2950.6489 123.29773 11344.815 + 8180 311.98419 112.35703 -3411.9011 -2947.8482 86.075824 11344.669 + 8190 308.82749 112.28804 -3409.6013 -2950.2438 143.2382 11344.525 + 8200 302.26336 112.19754 -3406.5846 -2956.9908 246.8694 11344.383 + 8210 295.64787 112.11001 -3403.6671 -2963.9132 356.02362 11344.247 + 8220 291.25285 112.03966 -3401.322 -2968.1055 442.75149 11344.116 + 8230 289.1061 112.02243 -3400.7476 -2970.7241 486.54313 11343.994 + 8240 289.08462 112.03649 -3401.2164 -2971.225 494.06168 11343.881 + 8250 291.38044 112.07723 -3402.5743 -2969.168 467.05559 11343.778 + 8260 294.68044 112.15441 -3405.147 -2966.8322 415.06511 11343.683 + 8270 296.69258 112.17858 -3405.9527 -2964.645 393.15632 11343.596 + 8280 296.62536 112.13628 -3404.5425 -2963.3348 414.74564 11343.516 + 8290 295.44616 112.17397 -3405.7989 -2966.3451 416.02766 11343.444 + 8300 294.27353 112.18403 -3406.1343 -2968.4247 428.02411 11343.38 + 8310 293.47291 112.17651 -3405.8838 -2969.3651 442.09035 11343.324 + 8320 293.20923 112.16604 -3405.5345 -2969.408 451.29146 11343.276 + 8330 293.37895 112.16265 -3405.4216 -2969.0426 453.54056 11343.237 + 8340 293.17434 112.17654 -3405.8848 -2969.8102 452.17679 11343.207 + 8350 291.35219 112.13175 -3404.3917 -2971.0274 486.27357 11343.185 + 8360 286.87666 112.08591 -3402.8637 -2976.1564 541.51548 11343.172 + 8370 279.90361 111.98636 -3399.5454 -2983.21 635.85067 11343.17 + 8380 273.13983 111.93354 -3397.7848 -2991.51 706.92172 11343.179 + 8390 270.55407 111.86327 -3395.4422 -2993.0136 748.69151 11343.202 + 8400 273.01304 111.88873 -3396.291 -2990.2048 712.6039 11343.239 + 8410 277.46032 111.95111 -3398.3703 -2985.6691 644.90535 11343.29 + 8420 280.98629 112.02173 -3400.7242 -2982.7784 578.18296 11343.353 + 8430 283.42861 112.07813 -3402.6045 -2981.0259 521.42184 11343.426 + 8440 286.72442 112.12112 -3404.0373 -2977.5564 459.53741 11343.51 + 8450 291.7267 112.2044 -3406.8132 -2972.8919 365.59913 11343.603 + 8460 296.5633 112.29276 -3409.7587 -2968.6432 269.22624 11343.702 + 8470 298.9194 112.3279 -3410.93 -2966.3101 213.58812 11343.807 + 8480 298.71888 112.31627 -3410.5424 -2966.2207 201.91582 11343.916 + 8490 297.17189 112.26233 -3408.7442 -2966.7235 226.26406 11344.03 + 8500 294.90301 112.22383 -3407.461 -2968.8151 259.18716 11344.147 + 8510 292.39286 112.18913 -3406.3042 -2971.392 296.96289 11344.27 + 8520 291.14853 112.17538 -3405.846 -2972.7846 317.20394 11344.398 + 8530 291.91158 112.22667 -3407.5555 -2973.3592 296.58708 11344.533 + 8540 292.91813 112.20303 -3406.7678 -2971.0743 300.20133 11344.673 + 8550 292.41103 112.21716 -3407.2388 -2972.2995 296.55479 11344.819 + 8560 290.68114 112.17923 -3405.9744 -2973.6082 315.31139 11344.971 + 8570 289.55071 112.13935 -3404.6451 -2973.9604 319.78648 11345.129 + 8580 290.6424 112.19571 -3406.5237 -2974.2152 262.81263 11345.294 + 8590 293.21275 112.22539 -3407.5131 -2971.3814 202.44971 11345.463 + 8600 295.43347 112.30834 -3410.2779 -2970.8431 127.32619 11345.636 + 8610 296.92031 112.35132 -3411.7105 -2970.0641 79.917896 11345.813 + 8620 298.80424 112.35356 -3411.7855 -2967.3368 55.019334 11345.991 + 8630 301.09269 112.47699 -3415.8997 -2968.0471 -8.9530463 11346.17 + 8640 302.0151 112.47499 -3415.8331 -2966.6086 -7.2783877 11346.349 + 8650 300.48551 112.47536 -3415.8453 -2968.8959 14.896825 11346.529 + 8660 297.18394 112.39077 -3413.0258 -2970.9872 83.855865 11346.709 + 8670 293.06895 112.33588 -3411.1961 -2975.2782 143.368 11346.891 + 8680 289.20075 112.28907 -3409.6357 -2979.4715 190.45842 11347.076 + 8690 286.48191 112.27321 -3409.107 -2982.9869 208.66022 11347.265 + 8700 285.2568 112.22303 -3407.4342 -2983.1364 223.78377 11347.458 + 8710 284.66091 112.21085 -3407.0284 -2983.6169 221.89784 11347.655 + 8720 282.71291 112.18784 -3406.2612 -2985.7472 244.16641 11347.857 + 8730 277.88776 112.09313 -3403.1044 -2989.7675 327.27761 11348.064 + 8740 271.06737 111.98882 -3399.6275 -2996.4353 437.0719 11348.277 + 8750 265.18763 111.90797 -3396.9322 -3002.4857 531.24712 11348.499 + 8760 263.21239 111.84863 -3394.9545 -3003.446 579.13885 11348.731 + 8770 266.98443 111.86908 -3395.6361 -2998.517 540.52799 11348.974 + 8780 276.00421 112.05186 -3401.7288 -2991.1934 388.42612 11349.227 + 8790 287.04901 112.2291 -3407.6367 -2980.6731 215.77508 11349.489 + 8800 296.45048 112.39481 -3413.1605 -2972.2128 61.542232 11349.754 + 8810 303.84332 102.5009 -3416.6966 -2964.7527 -102.60393 11350.021 + 8820 308.35593 102.54287 -3418.0955 -2959.4395 -156.26397 11350.286 + 8830 309.34553 102.56267 -3418.7555 -2958.6275 -169.44279 11350.548 + 8840 307.92936 102.57728 -3419.2427 -2961.2211 -159.36337 11350.808 + 8850 306.49168 102.51666 -3417.2219 -2961.3388 -120.8613 11351.065 + 8860 307.02003 102.50577 -3416.8591 -2960.19 -118.8501 11351.32 + 8870 308.78244 102.51765 -3417.255 -2957.9645 -139.87882 11351.573 + 8880 309.4166 102.52413 -3417.471 -2957.2372 -156.54316 11351.824 + 8890 308.17218 102.52172 -3417.3905 -2959.0077 -161.32511 11352.072 + 8900 306.82798 102.52569 -3417.5232 -2961.1398 -170.00394 11352.317 + 8910 306.92686 102.52414 -3417.4712 -2960.9407 -186.21426 11352.56 + 8920 308.0801 102.54255 -3418.0851 -2959.8393 -215.35285 11352.799 + 8930 309.07511 102.53491 -3417.8303 -2958.1045 -229.29742 11353.034 + 8940 308.83435 102.54793 -3418.2644 -2958.8967 -236.83009 11353.266 + 8950 306.20013 102.50473 -3416.8242 -2961.3747 -198.18845 11353.493 + 8960 299.78229 102.45513 -3415.1708 -2969.2674 -124.11711 11353.717 + 8970 290.40609 102.32891 -3410.9636 -2979.0066 1.3665885 11353.939 + 8980 282.5525 102.23389 -3407.7963 -2987.5209 99.179915 11354.161 + 8990 281.20504 102.18256 -3406.0854 -2987.8142 125.84255 11354.385 + 9000 287.0527 102.25645 -3408.5483 -2981.5791 46.683648 11354.612 + 9010 295.6651 102.36854 -3412.2846 -2972.5052 -70.693581 11354.84 + 9020 302.05074 102.44673 -3414.8911 -2965.6136 -159.48967 11355.067 + 9030 304.50107 102.50445 -3416.8148 -2963.8926 -211.01569 11355.291 + 9040 304.28774 102.51765 -3417.255 -2964.6501 -223.85983 11355.511 + 9050 302.28659 102.46413 -3415.471 -2965.8426 -196.35161 11355.728 + 9060 298.64342 102.41317 -3413.7725 -2969.563 -159.56094 11355.942 + 9070 294.89692 102.3567 -3411.8898 -2973.253 -123.95652 11356.152 + 9080 292.89751 102.31892 -3410.6307 -2974.9679 -113.05552 11356.36 + 9090 293.44894 102.39098 -3413.0327 -2976.5497 -164.79219 11356.567 + 9100 296.30539 102.43897 -3414.6325 -2973.9007 -222.07876 11356.77 + 9110 300.45266 102.4672 -3415.5735 -2968.6729 -273.82557 11356.97 + 9120 303.24645 102.47481 -3415.8271 -2964.771 -295.97287 11357.165 + 9130 302.0034 102.50375 -3416.7918 -2967.5846 -285.82159 11357.354 + 9140 297.22892 102.43941 -3414.6469 -2972.5414 -207.07355 11357.538 + 9150 291.80756 102.39282 -3413.0939 -2979.0523 -128.58195 11357.719 + 9160 288.25245 102.34705 -3411.5685 -2982.8148 -67.13035 11357.898 + 9170 287.47436 102.34359 -3411.4531 -2983.8568 -50.741803 11358.076 + 9180 289.65745 102.36539 -3412.1798 -2981.3363 -76.008823 11358.253 + 9190 294.40993 102.43028 -3414.3425 -2976.4301 -145.63657 11358.428 + 9200 300.16761 102.47892 -3415.9641 -2969.4875 -218.94764 11358.601 + 9210 304.03105 102.53822 -3417.9405 -2965.7174 -279.64516 11358.771 + 9220 304.45288 102.53714 -3417.9046 -2965.054 -285.20667 11358.935 + 9230 302.34912 102.45421 -3415.1404 -2965.419 -234.46962 11359.094 + 9240 299.9859 102.40287 -3413.429 -2967.2227 -191.14447 11359.249 + 9250 299.26036 102.40403 -3413.4677 -2968.3406 -182.52388 11359.401 + 9260 300.61341 102.41715 -3413.905 -2966.7654 -201.16731 11359.549 + 9270 303.15524 102.4879 -3416.2632 -2965.3428 -259.88954 11359.693 + 9280 305.7968 102.55222 -3418.4075 -2963.5579 -324.73273 11359.833 + 9290 308.13692 102.54889 -3418.2962 -2959.9659 -365.60648 11359.967 + 9300 310.01596 102.56219 -3418.7396 -2957.6143 -409.04518 11360.094 + 9310 310.51245 102.5781 -3419.27 -2957.4063 -438.84588 11360.214 + 9320 308.74264 102.52394 -3417.4648 -2958.2335 -415.60354 11360.325 + 9330 304.79422 102.4761 -3415.8701 -2962.5118 -367.20347 11360.429 + 9340 299.98192 102.39538 -3413.1793 -2966.9789 -290.51501 11360.526 + 9350 296.40204 102.34525 -3411.5082 -2970.6326 -230.13543 11360.618 + 9360 295.69416 102.34673 -3411.5575 -2971.7349 -209.91046 11360.705 + 9370 297.70643 102.38972 -3412.9908 -2970.1751 -224.99069 11360.789 + 9380 299.82096 102.43428 -3414.4761 -2968.5151 -239.37907 11360.868 + 9390 298.72337 102.42593 -3414.1978 -2969.8694 -206.21757 11360.943 + 9400 293.6059 102.34454 -3411.4846 -2974.768 -112.34303 11361.015 + 9410 287.13801 102.25842 -3408.614 -2981.5179 -5.7729342 11361.084 + 9420 282.8911 102.1548 -3405.1599 -2984.3808 87.012111 11361.153 + 9430 283.42692 102.17512 -3405.8373 -2984.2613 87.107479 11361.224 + 9440 289.72806 102.25543 -3408.5142 -2977.5657 10.6752 11361.297 + 9450 299.9244 102.41878 -3413.9592 -2967.8444 -133.86835 11361.37 + 9460 309.66009 102.532 -3417.7333 -2957.1374 -259.63954 11361.441 + 9470 315.28802 102.61817 -3420.6055 -2951.6385 -343.43136 11361.507 + 9480 315.52197 102.64969 -3421.6563 -2952.3412 -362.61942 11361.567 + 9490 311.57319 102.59769 -3419.923 -2956.4815 -315.2172 11361.62 + 9500 306.2565 102.4815 -3416.0499 -2960.5165 -230.75414 11361.667 + 9510 301.77507 102.40965 -3413.655 -2964.7874 -168.80118 11361.711 + 9520 298.06371 102.36942 -3412.3141 -2968.9669 -127.60351 11361.75 + 9530 294.76822 102.34731 -3411.5771 -2973.1317 -102.52302 11361.788 + 9540 292.90584 102.28385 -3409.4618 -2973.7865 -77.156962 11361.824 + 9550 293.16449 102.2921 -3409.7367 -2973.6767 -95.839672 11361.858 + 9560 294.55295 102.32549 -3410.8498 -2972.7246 -130.00817 11361.89 + 9570 295.8006 102.34483 -3411.4945 -2971.5135 -152.72227 11361.921 + 9580 296.56754 102.34105 -3411.3684 -2970.2466 -158.45379 11361.948 + 9590 297.31038 102.32507 -3410.8355 -2968.6089 -157.7167 11361.972 + 9600 298.77172 102.30026 -3410.0086 -2965.6083 -160.9185 11361.994 + 9610 301.97013 102.34529 -3411.5097 -2962.352 -206.50568 11362.013 + 9620 306.75609 102.42638 -3414.2128 -2957.9364 -278.49373 11362.027 + 9630 310.90991 102.52954 -3417.6512 -2955.1963 -353.7554 11362.037 + 9640 312.31799 102.61062 -3420.354 -2955.8047 -397.46392 11362.04 + 9650 310.56146 102.5724 -3419.0801 -2957.1435 -366.33137 11362.036 + 9660 306.52496 102.52312 -3417.4373 -2961.5047 -309.52269 11362.025 + 9670 301.5321 102.45538 -3415.1793 -2966.6732 -237.6061 11362.008 + 9680 296.45601 102.39829 -3413.2764 -2972.3206 -171.66047 11361.986 + 9690 291.72252 102.30306 -3410.1021 -2976.187 -97.600507 11361.962 + 9700 287.60936 102.25271 -3408.4236 -2980.6264 -50.45167 11361.936 + 9710 284.48914 102.20566 -3406.8552 -2983.6992 -17.860652 11361.908 + 9720 283.50169 102.17913 -3405.971 -2984.2837 -14.443294 11361.881 + 9730 286.7196 102.21649 -3407.2163 -2980.7426 -68.48769 11361.852 + 9740 294.47386 102.35054 -3411.6845 -2973.677 -189.93928 11361.823 + 9750 303.04659 102.46162 -3415.3875 -2964.6286 -302.19943 11361.79 + 9760 307.57479 102.51124 -3417.0413 -2959.5471 -355.30344 11361.752 + 9770 306.33697 102.48869 -3416.2898 -2960.6367 -335.18312 11361.707 + 9780 301.59673 102.4207 -3414.0233 -2965.421 -271.49738 11361.655 + 9790 297.79054 102.41425 -3413.8083 -2970.8674 -241.91326 11361.599 + 9800 298.13818 102.4199 -3413.9967 -2970.5387 -252.57086 11361.537 + 9810 302.00013 102.47911 -3415.9702 -2966.7679 -313.68954 11361.471 + 9820 306.02146 102.5605 -3418.6833 -2963.4996 -383.20843 11361.399 + 9830 306.37399 102.56224 -3418.7413 -2963.0332 -388.56733 11361.32 + 9840 301.99104 102.49174 -3416.3914 -2967.2027 -323.29197 11361.234 + 9850 296.02715 102.3854 -3412.8467 -2972.5287 -226.75584 11361.141 + 9860 292.42188 102.30722 -3410.2407 -2975.2854 -155.99935 11361.044 + 9870 291.8673 102.30306 -3410.1021 -2975.9716 -137.71183 11360.944 + 9880 292.47183 102.29279 -3409.7597 -2974.73 -129.16333 11360.842 + 9890 292.67046 102.31184 -3410.3947 -2975.0696 -133.25336 11360.737 + 9900 292.93826 102.35955 -3411.9851 -2976.2616 -152.45548 11360.629 + 9910 292.98738 112.38031 -3412.677 -2976.8805 -98.388723 11360.519 + 9920 291.50325 112.33603 -3411.2009 -2977.612 -60.193026 11360.407 + 9930 289.16 112.27732 -3409.2439 -2979.1403 -8.3725785 11360.294 + 9940 287.58579 112.26303 -3408.7678 -2981.0057 18.664629 11360.18 + 9950 287.74855 112.25799 -3408.5998 -2980.5956 25.021437 11360.067 + 9960 289.38626 112.25385 -3408.4617 -2978.0215 15.413572 11359.954 + 9970 291.31111 112.27816 -3409.2721 -2975.9689 -9.6707803 11359.842 + 9980 291.60283 112.29087 -3409.6956 -2975.9585 -17.695205 11359.729 + 9990 289.34786 112.2374 -3407.9134 -2977.5304 19.599433 11359.616 + 10000 286.02579 112.17791 -3405.9303 -2980.4887 67.188883 11359.503 +Loop time of 2.50279 on 1 procs for 10000 steps with 500 atoms -Performance: 265.041 ns/day, 0.091 hours/ns, 3067.606 timesteps/s -100.1% CPU use with 1 MPI tasks x no OpenMP threads +Performance: 345.215 ns/day, 0.070 hours/ns, 3995.539 timesteps/s +100.0% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 2.5777 | 2.5777 | 2.5777 | 0.0 | 79.07 -Bond | 0.0006814 | 0.0006814 | 0.0006814 | 0.0 | 0.02 -Neigh | 0.019258 | 0.019258 | 0.019258 | 0.0 | 0.59 -Comm | 0.088224 | 0.088224 | 0.088224 | 0.0 | 2.71 -Output | 0.0010309 | 0.0010309 | 0.0010309 | 0.0 | 0.03 -Modify | 0.33808 | 0.33808 | 0.33808 | 0.0 | 10.37 -Other | | 0.2349 | | | 7.20 +Pair | 2.0544 | 2.0544 | 2.0544 | 0.0 | 82.08 +Bond | 0.00075364 | 0.00075364 | 0.00075364 | 0.0 | 0.03 +Neigh | 0.015478 | 0.015478 | 0.015478 | 0.0 | 0.62 +Comm | 0.07215 | 0.07215 | 0.07215 | 0.0 | 2.88 +Output | 0.0074148 | 0.0074148 | 0.0074148 | 0.0 | 0.30 +Modify | 0.23494 | 0.23494 | 0.23494 | 0.0 | 9.39 +Other | | 0.1177 | | | 4.70 Nlocal: 500 ave 500 max 500 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -171,4 +1076,4 @@ Neighbor list builds = 20 Dangerous builds = 0 #write_data ${rep}/lj-out.data -Total wall time: 0:00:03 +Total wall time: 0:00:02 diff --git a/examples/USER/misc/grem/lj-temper/1/log.lammps.1 b/examples/USER/misc/grem/lj-temper/1/log.lammps.1 index 8b12a2c318b10c519328746581e2cf828e086afc..3241f17a5cca70ef71e219a6b95aa495c768aa61 100644 --- a/examples/USER/misc/grem/lj-temper/1/log.lammps.1 +++ b/examples/USER/misc/grem/lj-temper/1/log.lammps.1 @@ -16,10 +16,6 @@ read_data 1/lj.data #dump dump all xyz 1000 ${rep}/dump.xyz -thermo 100 -thermo_style custom step temp pe etotal press vol -timestep 1.0 - fix fxnpt all npt temp ${T0} ${T0} 1000.0 iso ${press} ${press} 10000.0 fix fxnpt all npt temp 300 ${T0} 1000.0 iso ${press} ${press} 10000.0 fix fxnpt all npt temp 300 300 1000.0 iso ${press} ${press} 10000.0 @@ -27,135 +23,1044 @@ fix fxnpt all npt temp 300 300 1000.0 iso 0 ${press} 10000.0 fix fxnpt all npt temp 300 300 1000.0 iso 0 0 10000.0 fix fxgREM all grem ${lambda} -.03 -30000 fxnpt fix fxgREM all grem 910 -.03 -30000 fxnpt + +thermo 10 +thermo_style custom step temp f_fxgREM pe etotal press vol thermo_modify press fxgREM_press +timestep 1.0 temper/grem 10000 100 ${lambda} fxgREM fxnpt 10294 98392 #${walker} temper/grem 10000 100 910 fxgREM fxnpt 10294 98392 Neighbor list info ... - 1 neighbor list requests update every 1 steps, delay 10 steps, check yes max neighbors/atom: 2000, page size: 100000 master list distance cutoff = 7 ghost atom cutoff = 7 - binsize = 3.5 -> bins = 7 7 7 -Memory usage per processor = 5.38334 Mbytes -Step Temp PotEng TotEng Press Volume - 0 288.55656 -3406.7669 -2977.5609 178.123 11354.789 - 100 292.15244 -3406.6138 -2972.0592 101.11822 11354.9 - 200 284.76856 -3403.8146 -2980.243 229.78409 11355.232 - 300 292.70849 -3408.8867 -2973.505 148.33374 11355.918 - 400 300.34739 -3407.488 -2960.744 182.63695 11357.028 - 500 285.96923 -3401.9081 -2976.5506 276.3915 11358.563 - 600 307.70286 -3413.8121 -2956.1274 -99.74504 11360.502 - 700 282.68114 -3402.3696 -2981.9028 215.37371 11362.639 - 800 296.16373 -3406.3812 -2965.8601 17.480161 11365.099 - 900 295.1242 -3403.489 -2964.5141 139.94644 11367.775 - 1000 298.65751 -3404.9437 -2960.7133 43.569751 11370.721 - 1100 299.9828 -3403.2527 -2957.051 1.5561287 11373.784 - 1200 283.05677 -3393.2885 -2972.263 279.02297 11376.992 - 1300 301.24616 -3401.9018 -2953.821 -61.001277 11380.464 - 1400 289.04947 -3395.7975 -2965.8583 92.997624 11383.831 - 1500 274.01283 -3384.3607 -2976.7874 455.32787 11387.235 - 1600 302.26525 -3397.7531 -2948.1564 12.134543 11391.218 - 1700 293.79228 -3393.1889 -2956.1952 57.363507 11395.294 - 1800 306.98845 -3396.1039 -2939.4818 -135.9312 11399.326 - 1900 293.15719 -3386.5828 -2950.5337 132.42643 11403.179 - 2000 307.19197 -3395.5097 -2938.585 -161.2466 11406.936 - 2100 307.45189 -3392.5861 -2935.2747 -157.46524 11410.403 - 2200 300.88603 -3388.4914 -2940.9462 -28.501997 11413.598 - 2300 289.88236 -3384.6698 -2953.4918 122.58753 11416.723 - 2400 296.43305 -3388.0129 -2947.0912 -25.774318 11420.076 - 2500 317.23755 -3397.1902 -2925.3234 -459.4925 11423.59 - 2600 287.60187 -3378.3654 -2950.5794 184.57759 11426.853 - 2700 303.64909 -3386.2885 -2934.6335 -135.08152 11430.242 - 2800 302.47742 -3388.9859 -2939.0737 -323.13721 11433.517 - 2900 291.98004 -3377.25 -2942.9518 -47.677493 11436.421 - 3000 307.8161 -3390.9845 -2933.1314 -381.60025 11439.119 - 3100 279.57067 -3372.6802 -2956.8401 112.35716 11441.316 - 3200 288.77945 -3377.9064 -2948.3688 -56.270381 11443.418 - 3300 304.79453 -3385.7254 -2932.3667 -445.46244 11445.179 - 3400 292.93209 -3377.8456 -2942.1313 -194.7257 11446.284 - 3500 306.76065 -3386.1118 -2929.8286 -428.54219 11447.116 - 3600 309.27383 -3386.5092 -2926.4878 -396.29387 11447.442 - 3700 295.73613 -3380.6566 -2940.7715 -224.46949 11447.298 - 3800 286.51576 -3373.1466 -2946.9762 -38.200592 11446.876 - 3900 297.11081 -3379.8253 -2937.8955 -209.71089 11446.168 - 4000 298.6827 -3378.5589 -2934.291 -161.67235 11445.075 - 4100 296.79632 -3375.8545 -2934.3925 -3.2770185 11443.642 - 4200 293.39337 -3373.8298 -2937.4294 61.151041 11442.097 - 4300 305.32078 -3376.9036 -2922.7621 -35.097493 11440.456 - 4400 288.33476 -3367.9377 -2939.0616 323.25684 11438.693 - 4500 301.83114 -3379.5242 -2930.5732 44.950393 11437.061 - 4600 296.53718 -3370.7936 -2929.717 247.99542 11435.523 - 4700 313.14834 -3380.7524 -2914.9679 24.50122 11434.29 - 4800 316.57471 -3385.4942 -2914.6133 -89.103894 11433.295 - 4900 295.8169 -3373.4805 -2933.4753 186.45389 11432.498 - 5000 302.82001 -3376.4769 -2926.0551 126.1166 11431.848 - 5100 293.31592 -3369.1186 -2932.8334 398.18962 11431.273 - 5200 300.8404 -3371.9981 -2924.5208 325.82663 11431.294 - 5300 288.65507 -3366.8598 -2937.5072 560.98385 11432.053 - 5400 300.15414 -3370.3662 -2923.9097 331.21385 11433.792 - 5500 293.62342 -3364.1197 -2927.3771 510.06055 11436.251 - 5600 293.52337 -3365.0972 -2928.5034 443.00006 11439.517 - 5700 314.46913 -3373.0792 -2905.3302 116.1735 11443.39 - 5800 316.07035 -3371.7131 -2901.5824 59.579025 11447.527 - 5900 302.07546 -3364.3048 -2914.9904 292.07764 11452.145 - 6000 298.16549 -3363.0572 -2919.5586 322.85603 11457.452 - 6100 299.62417 -3361.8594 -2916.1912 270.01732 11463.408 - 6200 307.26123 -3366.6924 -2909.6646 33.334477 11469.872 - 6300 301.85984 -3360.01 -2911.0164 152.48267 11476.656 - 6400 304.04037 -3359.0287 -2906.7917 85.558236 11483.787 - 6500 315.39938 -3361.3137 -2892.181 -27.949849 11491.188 - 6600 312.50182 -3354.0774 -2889.2547 14.380903 11498.754 - 6700 312.24745 -3354.727 -2890.2826 -11.564949 11506.565 - 6800 299.5486 -3344.6674 -2899.1115 140.00843 11514.587 - 6900 319.34442 -3353.0254 -2878.0248 -369.57931 11522.809 - 7000 312.5114 -3351.5646 -2886.7275 -338.9529 11530.444 - 7100 301.83696 -3339.7552 -2890.7956 -59.056671 11537.499 - 7200 317.79602 -3352.0354 -2879.3379 -476.34779 11544.273 - 7300 305.24957 -3341.8025 -2887.7669 -284.85596 11550.312 - 7400 327.39994 -3353.74 -2866.7574 -694.54924 11555.826 - 7500 320.25094 -3351.0448 -2874.6958 -667.61837 11560.396 - 7600 298.07438 -3332.5105 -2889.1475 -256.85225 11564.025 - 7700 310.50497 -3344.7831 -2882.9305 -657.61332 11566.855 - 7800 314.16294 -3347.8412 -2880.5476 -688.03112 11568.446 - 7900 300.40053 -3338.4536 -2891.6306 -476.3332 11568.775 - 8000 316.35903 -3348.7364 -2878.1763 -751.21864 11567.899 - 8100 308.957 -3347.7238 -2888.1737 -689.04336 11565.712 - 8200 296.70594 -3337.3321 -2896.0045 -354.23246 11562.212 - 8300 306.40458 -3348.666 -2892.9124 -647.83868 11557.563 - 8400 314.95559 -3354.0671 -2885.5945 -672.95129 11551.603 - 8500 299.19386 -3343.5909 -2898.5628 -244.49812 11544.459 - 8600 313.97242 -3350.861 -2883.8508 -417.72283 11536.587 - 8700 308.40661 -3348.3697 -2889.6382 -234.46379 11527.955 - 8800 299.75365 -3345.1001 -2899.2392 30.349293 11518.756 - 8900 310.85222 -3354.562 -2892.1929 -112.13993 11509.361 - 9000 307.93603 -3354.4949 -2896.4634 -24.119632 11499.746 - 9100 294.88785 -3349.2164 -2910.593 357.76086 11489.968 - 9200 296.56133 -3350.8626 -2909.7501 415.59418 11480.36 - 9300 304.6863 -3357.1682 -2903.9704 367.05129 11471.041 - 9400 314.87579 -3363.7046 -2895.3507 267.77411 11462.364 - 9500 295.39002 -3353.7081 -2914.3378 677.18586 11454.261 - 9600 310.14825 -3364.8576 -2903.5356 492.49166 11447.14 - 9700 302.90439 -3360.306 -2909.7587 620.88798 11441.001 - 9800 308.43227 -3363.4547 -2904.6851 678.75194 11435.989 - 9900 297.04137 -3358.0021 -2916.1755 801.45219 11432.184 - 10000 306.41271 -3360.5568 -2904.7911 825.76031 11429.68 -Loop time of 3.25989 on 1 procs for 10000 steps with 500 atoms + binsize = 3.5, bins = 7 7 7 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.729 | 5.729 | 5.729 Mbytes +Step Temp f_fxgREM PotEng TotEng Press Volume + 0 288.55656 112.20301 -3406.7669 -2977.5609 178.123 11354.789 + 10 288.31142 112.19886 -3406.6287 -2977.7873 169.48583 11354.791 + 20 289.8223 112.2004 -3406.6801 -2975.5914 143.0151 11354.796 + 30 291.70551 112.20808 -3406.9361 -2973.0463 110.8113 11354.804 + 40 292.3991 112.19188 -3406.3961 -2971.4746 97.5904 11354.814 + 50 292.5823 112.21058 -3407.0192 -2971.8253 77.444998 11354.825 + 60 293.9295 112.21708 -3407.2359 -2970.038 57.79557 11354.839 + 70 296.04201 112.2877 -3409.5899 -2969.2498 13.811528 11354.853 + 80 296.75736 112.29156 -3409.7185 -2968.3144 11.76625 11354.868 + 90 295.13392 112.24842 -3408.2806 -2969.2912 48.889984 11354.884 + 100 292.15244 112.19841 -3406.6138 -2972.0592 101.11822 11354.9 + 110 289.12916 112.15164 -3405.0545 -2974.9968 152.31767 11354.918 + 120 287.27612 112.08096 -3402.6986 -2975.3971 202.34047 11354.94 + 130 287.42354 112.0831 -3402.7701 -2975.2494 207.659 11354.965 + 140 289.21301 112.14415 -3404.805 -2974.6226 176.72962 11354.993 + 150 291.22691 112.22079 -3407.3598 -2974.1819 138.20681 11355.026 + 160 291.8086 112.23001 -3407.667 -2973.6239 136.41994 11355.061 + 170 290.14945 112.24531 -3408.1771 -2976.6018 147.28873 11355.098 + 180 287.06496 112.19729 -3406.5762 -2979.5889 189.13996 11355.139 + 190 284.67717 112.11349 -3403.7829 -2980.3472 235.97079 11355.183 + 200 284.76856 112.11444 -3403.8146 -2980.243 229.78409 11355.232 + 210 287.30412 112.19165 -3406.3882 -2979.0451 178.2025 11355.285 + 220 290.24387 112.2716 -3409.0532 -2977.3375 128.2392 11355.341 + 230 291.75312 112.26846 -3408.9488 -2974.9881 127.5165 11355.4 + 240 291.56602 112.24089 -3408.0298 -2974.3475 156.54095 11355.461 + 250 289.54014 112.18263 -3406.0876 -2975.4186 212.94237 11355.526 + 260 286.70194 112.15809 -3405.2696 -2978.8222 257.79759 11355.594 + 270 285.294 112.15348 -3405.1159 -2980.7627 275.81206 11355.668 + 280 286.55908 112.15001 -3405.0003 -2978.7654 263.50914 11355.747 + 290 289.68555 112.21705 -3407.235 -2976.3497 203.07914 11355.83 + 300 292.70849 112.2666 -3408.8867 -2973.505 148.33374 11355.918 + 310 294.49815 112.29546 -3409.8488 -2971.8051 116.25484 11356.008 + 320 295.01917 112.26366 -3408.7887 -2969.97 128.97587 11356.101 + 330 293.62796 112.22757 -3407.5857 -2970.8364 172.03075 11356.196 + 340 289.75335 112.17364 -3405.7881 -2974.802 251.12304 11356.295 + 350 284.97643 112.09285 -3403.0949 -2979.214 349.19648 11356.399 + 360 281.8078 112.02317 -3400.7724 -2981.6047 424.30214 11356.509 + 370 281.86417 111.99526 -3399.842 -2980.5904 446.31739 11356.627 + 380 285.76759 112.05907 -3401.969 -2976.9114 388.90905 11356.754 + 390 292.89931 112.11358 -3403.7861 -2968.1206 299.89217 11356.888 + 400 300.34739 112.22464 -3407.488 -2960.744 182.63695 11357.028 + 410 304.41494 112.31486 -3410.4953 -2957.7011 100.39899 11357.171 + 420 303.85492 112.30406 -3410.1355 -2958.1743 95.619711 11357.317 + 430 300.81739 112.25584 -3408.528 -2961.0849 126.6436 11357.464 + 440 298.33667 112.26429 -3408.8098 -2965.0566 131.90634 11357.614 + 450 297.59531 112.2703 -3409.0098 -2966.3594 123.7656 11357.766 + 460 297.63297 112.29317 -3409.7725 -2967.066 102.31468 11357.921 + 470 297.08461 112.30627 -3410.2092 -2968.3183 91.899247 11358.078 + 480 294.93467 112.27288 -3409.096 -2970.403 117.26815 11358.237 + 490 290.85555 112.17178 -3405.726 -2973.1004 189.72375 11358.398 + 500 285.96923 112.05724 -3401.9081 -2976.5506 276.3915 11358.563 + 510 282.89047 111.98875 -3399.6252 -2978.847 332.0296 11358.733 + 520 282.00698 111.95278 -3398.4259 -2978.9619 356.61549 11358.909 + 530 281.59566 111.97984 -3399.3281 -2980.4759 347.9844 11359.092 + 540 282.13143 112.00287 -3400.0957 -2980.4466 327.18332 11359.282 + 550 285.29816 112.05396 -3401.7988 -2977.4394 270.12617 11359.477 + 560 290.61836 112.13691 -3404.5637 -2972.2909 180.78896 11359.678 + 570 296.03555 112.19783 -3406.5945 -2966.264 98.242808 11359.883 + 580 300.53871 112.26318 -3408.7725 -2961.744 23.345649 11360.089 + 590 304.57492 112.35224 -3411.7414 -2958.7093 -51.135366 11360.296 + 600 307.70286 112.41436 -3413.8121 -2956.1274 -99.74504 11360.502 + 610 307.4453 112.41196 -3413.7319 -2956.4303 -89.528722 11360.707 + 620 302.60338 112.36055 -3412.0183 -2961.9187 -20.95705 11360.909 + 630 295.45279 112.24699 -3408.2329 -2968.7692 88.92139 11361.112 + 640 288.99864 112.14528 -3404.8425 -2974.9789 184.507 11361.316 + 650 284.34769 112.06977 -3402.3256 -2979.3799 248.31274 11361.524 + 660 281.61709 111.99077 -3399.6924 -2980.8083 291.11773 11361.737 + 670 280.81464 112.02279 -3400.7596 -2983.0691 272.78278 11361.955 + 680 281.12808 112.05752 -3401.9172 -2983.7605 245.57676 11362.178 + 690 281.62922 112.06372 -3402.1239 -2983.2218 230.96812 11362.406 + 700 282.68114 112.07109 -3402.3696 -2981.9028 215.37371 11362.639 + 710 285.68075 112.10301 -3403.4336 -2978.5051 179.25783 11362.875 + 720 291.09563 112.17909 -3405.9695 -2972.9868 110.88608 11363.115 + 730 296.71165 112.25454 -3408.4848 -2967.1487 43.449083 11363.358 + 740 298.95953 112.34075 -3411.3584 -2966.6788 -0.16353109 11363.601 + 750 296.12026 112.26709 -3408.903 -2968.4466 55.376662 11363.845 + 760 290.41552 112.14957 -3404.9858 -2973.0147 143.51434 11364.089 + 770 286.15427 112.06806 -3402.2686 -2976.6358 197.31982 11364.337 + 780 286.44173 112.09388 -3403.1293 -2977.0689 166.80088 11364.588 + 790 291.05969 112.11693 -3403.8977 -2970.9685 101.12001 11364.842 + 800 296.16373 112.19144 -3406.3812 -2965.8601 17.480161 11365.099 + 810 297.57076 112.16867 -3405.6224 -2963.0084 10.893497 11365.356 + 820 295.03676 112.10776 -3403.5921 -2964.7473 62.199066 11365.613 + 830 291.33925 112.06297 -3402.0989 -2968.7538 126.16879 11365.871 + 840 288.62338 112.02665 -3400.8882 -2971.5828 181.67525 11366.132 + 850 287.67016 111.98421 -3399.4736 -2971.5861 220.14947 11366.397 + 860 288.98767 112.00317 -3400.1058 -2970.2585 209.25845 11366.666 + 870 292.17389 112.03349 -3401.1163 -2966.5298 174.42425 11366.939 + 880 295.40208 112.08024 -3402.6746 -2963.2864 133.4123 11367.215 + 890 296.65783 112.12162 -3404.0539 -2962.7979 113.59974 11367.494 + 900 295.1242 112.10467 -3403.489 -2964.5141 139.94644 11367.775 + 910 291.92281 112.00467 -3400.1557 -2965.9426 209.05038 11368.059 + 920 289.32856 111.97002 -3399.0008 -2968.6465 240.69528 11368.347 + 930 289.67692 111.96061 -3398.6872 -2967.8147 228.61897 11368.639 + 940 294.3022 112.0659 -3402.1968 -2964.4446 130.39654 11368.936 + 950 301.51105 112.13134 -3404.378 -2955.9032 25.61018 11369.235 + 960 306.83743 112.27981 -3409.327 -2952.9296 -91.059043 11369.535 + 970 307.4444 112.29416 -3409.8054 -2952.5051 -109.84508 11369.833 + 980 304.50616 112.27686 -3409.2287 -2956.2989 -78.391305 11370.13 + 990 301.06082 112.21176 -3407.0587 -2959.2536 -15.295458 11370.425 + 1000 298.65751 112.14831 -3404.9437 -2960.7133 43.569751 11370.721 + 1010 297.03945 112.14146 -3404.7155 -2962.8918 73.28153 11371.017 + 1020 295.59833 112.09804 -3403.268 -2963.5879 110.62196 11371.314 + 1030 293.93194 112.05872 -3401.9573 -2964.7558 139.816 11371.614 + 1040 292.24705 112.00732 -3400.2439 -2965.5486 164.01425 11371.917 + 1050 291.34252 111.98463 -3399.4876 -2966.1376 163.57877 11372.222 + 1060 291.75453 112.00753 -3400.2511 -2966.2883 132.24681 11372.531 + 1070 293.50063 112.03285 -3401.0949 -2964.535 90.509052 11372.842 + 1080 296.32578 112.0879 -3402.93 -2962.1679 34.42726 11373.156 + 1090 298.98455 112.08885 -3402.9618 -2958.2449 8.5704615 11373.47 + 1100 299.9828 112.09758 -3403.2527 -2957.051 1.5561287 11373.784 + 1110 298.93539 112.0604 -3402.0133 -2957.3696 34.967375 11374.099 + 1120 296.7305 112.04723 -3401.5744 -2960.2103 69.990098 11374.414 + 1130 295.27674 112.02291 -3400.7635 -2961.5617 100.03863 11374.731 + 1140 296.45601 112.02269 -3400.7563 -2959.8005 95.241061 11375.05 + 1150 299.73047 112.0732 -3402.44 -2956.6136 50.324323 11375.37 + 1160 302.03965 112.15096 -3405.0321 -2955.771 0.470984 11375.692 + 1170 300.54713 112.07455 -3402.485 -2955.4439 40.510086 11376.014 + 1180 294.8817 111.98784 -3399.5948 -2960.9806 118.6072 11376.337 + 1190 287.72897 111.90408 -3396.8027 -2968.8277 206.40841 11376.662 + 1200 283.05677 111.79866 -3393.2885 -2972.263 279.02297 11376.992 + 1210 282.91094 111.79618 -3393.206 -2972.3975 272.12528 11377.326 + 1220 285.66291 111.85359 -3395.1197 -2970.2178 215.47532 11377.666 + 1230 288.36949 111.92597 -3397.5325 -2968.6047 152.40126 11378.01 + 1240 289.67758 111.95124 -3398.3748 -2967.5013 118.13181 11378.357 + 1250 291.00888 111.94924 -3398.308 -2965.4544 96.526587 11378.707 + 1260 293.84005 111.99979 -3399.9931 -2962.9283 46.221913 11379.058 + 1270 297.63013 112.03864 -3401.288 -2958.5858 -4.0302383 11379.41 + 1280 300.44491 112.06113 -3402.0378 -2955.1488 -38.37951 11379.762 + 1290 300.95481 112.06094 -3402.0312 -2954.3838 -48.631626 11380.114 + 1300 301.24616 112.05705 -3401.9018 -2953.821 -61.001277 11380.464 + 1310 305.01154 112.12689 -3404.2298 -2950.5482 -131.36483 11380.814 + 1320 311.60599 112.19261 -3406.4203 -2942.93 -220.63077 11381.161 + 1330 316.32472 112.31781 -3410.5936 -2940.0846 -310.56676 11381.504 + 1340 315.12652 112.28889 -3409.6296 -2940.9028 -285.69651 11381.842 + 1350 308.72437 112.185 -3406.1667 -2946.9626 -182.76942 11382.174 + 1360 300.42386 112.07734 -3402.5779 -2955.7202 -57.265006 11382.503 + 1370 291.86699 111.91447 -3397.149 -2963.019 88.315244 11382.831 + 1380 285.09338 111.81702 -3393.9007 -2969.8459 183.43851 11383.161 + 1390 283.26305 111.79401 -3393.1335 -2971.8012 194.92821 11383.494 + 1400 289.04947 111.87392 -3395.7975 -2965.8583 92.997624 11383.831 + 1410 298.69036 122.03915 -3401.305 -2957.0257 -21.917871 11384.17 + 1420 305.72142 122.18925 -3406.3083 -2951.5709 -158.60828 11384.508 + 1430 306.18416 122.18438 -3406.1461 -2950.7203 -168.54358 11384.845 + 1440 300.22148 122.07081 -3402.3602 -2955.8035 -71.469632 11385.178 + 1450 291.42314 121.93755 -3397.9182 -2964.4484 66.749781 11385.51 + 1460 283.4744 121.77675 -3392.5582 -2970.9115 214.18411 11385.843 + 1470 277.92785 121.69509 -3389.8362 -2976.4396 312.70165 11386.181 + 1480 274.55484 121.59371 -3386.457 -2978.0775 399.10464 11386.525 + 1490 273.23059 121.55309 -3385.103 -2978.6932 441.31809 11386.876 + 1500 274.01283 121.53082 -3384.3607 -2976.7874 455.32787 11387.235 + 1510 275.76419 121.54172 -3384.7239 -2974.5456 445.30959 11387.604 + 1520 277.35277 121.55749 -3385.2497 -2972.7084 430.6344 11387.981 + 1530 279.24904 121.57869 -3385.9563 -2970.5945 407.78618 11388.366 + 1540 282.0952 121.64066 -3388.022 -2968.4268 357.15314 11388.759 + 1550 285.68636 121.69629 -3389.8764 -2964.9396 297.54558 11389.159 + 1560 289.73011 121.77973 -3392.6576 -2961.706 217.83566 11389.565 + 1570 294.53429 121.84139 -3394.7129 -2956.6154 138.00068 11389.976 + 1580 299.54983 121.91472 -3397.1573 -2951.5997 54.361106 11390.389 + 1590 302.73665 121.94695 -3398.2315 -2947.9337 6.7766333 11390.803 + 1600 302.26525 121.93259 -3397.7531 -2948.1564 12.134543 11391.218 + 1610 299.58012 111.87448 -3395.8158 -2950.2131 -8.8182286 11391.632 + 1620 298.30785 111.86386 -3395.462 -2951.7517 -4.1802827 11392.047 + 1630 299.96638 111.89816 -3396.6052 -2950.4279 -44.628518 11392.461 + 1640 304.39437 111.97855 -3399.285 -2946.5214 -126.01637 11392.875 + 1650 309.41084 112.08456 -3402.8186 -2942.5934 -218.22304 11393.287 + 1660 311.88453 112.09696 -3403.2319 -2939.3273 -244.85935 11393.695 + 1670 309.3293 112.06489 -3402.1629 -2942.059 -205.20034 11394.098 + 1680 302.67297 111.94894 -3398.2981 -2948.095 -95.057083 11394.497 + 1690 296.14217 111.83547 -3394.5157 -2954.0266 13.5455 11394.896 + 1700 293.79228 111.79567 -3393.1889 -2956.1952 57.363507 11395.294 + 1710 295.7862 111.80846 -3393.6152 -2953.6556 41.473788 11395.694 + 1720 298.77061 111.83771 -3394.5904 -2950.1918 7.2109878 11396.094 + 1730 299.47475 111.8605 -3395.35 -2949.904 -7.6906478 11396.495 + 1740 297.37447 111.82886 -3394.2953 -2951.9733 20.940471 11396.896 + 1750 294.56326 111.74995 -3391.6648 -2953.5243 74.927297 11397.297 + 1760 293.2422 111.71562 -3390.5206 -2954.345 98.812622 11397.7 + 1770 294.28814 111.697 -3389.9001 -2952.1688 91.747187 11398.105 + 1780 297.53099 111.75044 -3391.6812 -2949.1264 32.671888 11398.512 + 1790 301.97003 111.8238 -3394.1267 -2944.9692 -50.514508 11398.919 + 1800 306.98845 111.88312 -3396.1039 -2939.4818 -135.9312 11399.326 + 1810 311.96833 111.99708 -3399.9027 -2935.8735 -239.01551 11399.73 + 1820 315.20101 112.03074 -3401.0247 -2932.1871 -291.50322 11400.129 + 1830 315.71772 112.04293 -3401.431 -2931.8248 -304.51268 11400.524 + 1840 314.4024 112.00492 -3400.1641 -2932.5143 -272.21999 11400.913 + 1850 312.76857 111.95783 -3398.5945 -2933.3749 -221.95593 11401.296 + 1860 310.42516 111.93857 -3397.9522 -2936.2183 -171.45063 11401.676 + 1870 306.27985 111.86967 -3395.6558 -2940.0877 -89.686333 11402.052 + 1880 300.77741 111.79815 -3393.2717 -2945.8881 -3.1165245 11402.427 + 1890 295.78471 111.6485 -3388.2835 -2948.3262 98.501625 11402.802 + 1900 293.15719 111.59748 -3386.5828 -2950.5337 132.42643 11403.179 + 1910 293.23718 111.56387 -3385.4623 -2949.2942 128.6136 11403.558 + 1920 295.7208 111.6254 -3387.5134 -2947.6511 63.786379 11403.94 + 1930 300.35286 111.72266 -3390.7552 -2944.0031 -31.396452 11404.323 + 1940 305.69582 111.8239 -3394.1299 -2939.4306 -129.74185 11404.706 + 1950 309.6976 111.91076 -3397.0253 -2936.3736 -206.25068 11405.087 + 1960 311.1315 111.93134 -3397.7114 -2934.9268 -229.05583 11405.463 + 1970 309.68047 111.86844 -3395.6148 -2934.9886 -191.25444 11405.836 + 1980 306.80624 111.84112 -3394.7041 -2938.3531 -153.85545 11406.205 + 1990 305.41903 111.81899 -3393.9663 -2939.6786 -131.34084 11406.572 + 2000 307.19197 111.86529 -3395.5097 -2938.585 -161.2466 11406.936 + 2010 310.43882 111.94413 -3398.1378 -2936.3836 -215.82109 11407.296 + 2020 312.07892 111.94103 -3398.0344 -2933.8407 -225.84464 11407.654 + 2030 310.71069 111.91952 -3397.3173 -2935.1587 -205.54292 11408.007 + 2040 307.40377 111.87766 -3395.922 -2938.6822 -164.12641 11408.356 + 2050 304.30365 111.78807 -3392.9356 -2940.307 -109.59091 11408.702 + 2060 303.15019 111.75068 -3391.6895 -2940.7765 -94.333282 11409.047 + 2070 304.14735 111.76888 -3392.2961 -2939.8999 -118.906 11409.389 + 2080 305.81644 111.77067 -3392.3558 -2937.477 -140.82484 11409.73 + 2090 306.81107 111.74544 -3391.5147 -2935.1565 -142.32093 11410.068 + 2100 307.45189 111.77758 -3392.5861 -2935.2747 -157.46524 11410.403 + 2110 308.74871 111.79708 -3393.2359 -2933.9956 -169.22025 11410.736 + 2120 310.79003 111.79358 -3393.1193 -2930.8427 -177.2892 11411.065 + 2130 312.5027 111.80031 -3393.3438 -2928.5197 -188.73968 11411.392 + 2140 312.85497 111.81614 -3393.8713 -2928.5233 -194.35187 11411.715 + 2150 311.65069 111.83764 -3394.5881 -2931.0313 -189.40791 11412.034 + 2160 309.04678 111.79649 -3393.2164 -2933.5327 -148.18309 11412.35 + 2170 305.18266 111.71088 -3390.3628 -2936.4267 -79.718603 11412.663 + 2180 301.21656 111.63946 -3387.9822 -2939.9454 -17.844582 11412.975 + 2190 299.32602 111.61843 -3387.281 -2942.0563 4.3619189 11413.286 + 2200 300.88603 111.65474 -3388.4914 -2940.9462 -28.501997 11413.598 + 2210 303.76461 121.7445 -3391.4833 -2939.6564 -37.623171 11413.91 + 2220 305.30342 121.78666 -3392.8885 -2938.7728 -76.274224 11414.221 + 2230 304.88731 121.77372 -3392.4575 -2938.9607 -74.35779 11414.531 + 2240 302.34675 121.71839 -3390.6132 -2940.8953 -34.97964 11414.84 + 2250 297.88305 121.62829 -3387.6096 -2944.5311 35.668522 11415.149 + 2260 292.66199 121.54826 -3384.942 -2949.6294 108.43938 11415.458 + 2270 288.78505 121.4887 -3382.9567 -2953.4108 160.29761 11415.77 + 2280 287.75561 121.48855 -3382.9518 -2954.9371 164.14686 11416.084 + 2290 288.94866 121.5206 -3384.02 -2954.2308 138.14089 11416.402 + 2300 289.88236 121.54009 -3384.6698 -2953.4918 122.58753 11416.723 + 2310 288.68545 121.52101 -3384.0336 -2954.6358 143.38342 11417.046 + 2320 286.25612 121.45596 -3381.8654 -2956.0811 194.24971 11417.373 + 2330 285.14493 121.41872 -3380.624 -2956.4925 223.33975 11417.703 + 2340 286.87399 121.45251 -3381.7504 -2955.0471 197.40401 11418.037 + 2350 291.06298 121.53866 -3384.6219 -2951.6877 124.05566 11418.375 + 2360 295.86539 121.62405 -3387.4682 -2947.3909 38.195254 11418.716 + 2370 299.44663 121.70312 -3390.1039 -2944.6997 -40.176145 11419.057 + 2380 300.98476 121.73741 -3391.2471 -2943.5551 -84.646371 11419.398 + 2390 300.1448 121.71291 -3390.4303 -2943.9877 -81.47692 11419.738 + 2400 296.43305 121.64039 -3388.0129 -2947.0912 -25.774318 11420.076 + 2410 289.6685 121.51271 -3383.7572 -2952.8972 85.456317 11420.414 + 2420 281.3448 121.35314 -3378.4379 -2959.9588 224.77811 11420.754 + 2430 274.65414 121.20745 -3373.5816 -2965.0544 340.41959 11421.098 + 2440 272.91796 121.15509 -3371.8364 -2965.8917 367.03173 11421.448 + 2450 278.02901 121.25771 -3375.2568 -2961.7098 262.99585 11421.806 + 2460 289.21861 121.45828 -3381.9427 -2951.752 59.555643 11422.169 + 2470 302.7222 121.74044 -3391.3479 -2941.0716 -200.80466 11422.533 + 2480 313.76557 121.90733 -3396.9111 -2930.2086 -394.16203 11422.893 + 2490 318.93328 121.97571 -3399.1903 -2924.8012 -488.60654 11423.246 + 2500 317.23755 121.91571 -3397.1902 -2925.3234 -459.4925 11423.59 + 2510 310.32712 121.82445 -3394.1482 -2932.5601 -356.98884 11423.926 + 2520 301.48415 121.67082 -3389.0273 -2940.5925 -203.1254 11424.255 + 2530 294.52902 121.54369 -3384.7897 -2946.7001 -70.901724 11424.58 + 2540 291.84747 121.48223 -3382.7409 -2948.6399 0.95112369 11424.903 + 2550 292.8544 121.49927 -3383.309 -2947.7103 8.9958029 11425.227 + 2560 295.32516 121.53481 -3384.4938 -2945.22 -4.4652777 11425.551 + 2570 296.70748 121.53885 -3384.6282 -2943.2983 2.2055059 11425.875 + 2580 295.40417 121.50616 -3383.5388 -2944.1475 43.528038 11426.2 + 2590 291.72518 121.43843 -3381.2809 -2947.3618 113.16074 11426.525 + 2600 287.60187 121.35096 -3378.3654 -2950.5794 184.57759 11426.853 + 2610 285.75661 121.38349 -3379.4498 -2954.4085 181.52836 11427.184 + 2620 287.43585 121.38894 -3379.6313 -2952.0923 152.50846 11427.519 + 2630 291.37945 121.45503 -3381.8343 -2948.4294 80.249921 11427.857 + 2640 294.74827 121.53041 -3384.3469 -2945.9312 11.896562 11428.197 + 2650 295.64551 121.50583 -3383.5276 -2943.7773 6.4682246 11428.537 + 2660 294.74104 121.47628 -3382.5428 -2944.1379 21.935775 11428.877 + 2670 294.33031 121.46756 -3382.2519 -2944.4578 25.890763 11429.218 + 2680 296.11878 121.4512 -3381.7068 -2941.2525 10.893357 11429.56 + 2690 299.83398 121.51703 -3383.9011 -2937.9208 -57.854278 11429.902 + 2700 303.64909 121.58866 -3386.2885 -2934.6335 -135.08152 11430.242 + 2710 305.14074 121.62617 -3387.5389 -2933.6652 -182.08029 11430.581 + 2720 302.94886 121.62862 -3387.6207 -2937.0072 -185.1792 11430.916 + 2730 298.14993 121.59589 -3386.5297 -2943.0543 -150.34737 11431.248 + 2740 292.67914 121.5115 -3383.7166 -2948.3786 -87.831473 11431.577 + 2750 288.2257 121.43775 -3381.2584 -2952.5445 -39.60642 11431.905 + 2760 286.9577 121.43735 -3381.2449 -2954.417 -47.633663 11432.232 + 2770 289.68172 121.48535 -3382.8451 -2951.9655 -108.76492 11432.558 + 2780 294.68316 121.58242 -3386.0806 -2947.7617 -207.11325 11432.882 + 2790 299.51755 121.6252 -3387.5066 -2941.997 -277.61993 11433.202 + 2800 302.47742 121.66958 -3388.9859 -2939.0737 -323.13721 11433.517 + 2810 302.72635 121.66873 -3388.9576 -2938.6751 -317.30645 11433.826 + 2820 300.3436 121.58487 -3386.1625 -2939.4241 -250.22549 11434.13 + 2830 297.08273 121.51277 -3383.7591 -2941.8711 -178.83831 11434.428 + 2840 295.57979 121.46067 -3382.0223 -2942.3697 -135.17705 11434.724 + 2850 297.39057 121.48034 -3382.6781 -2940.3322 -155.107 11435.016 + 2860 301.45166 121.52493 -3384.1642 -2935.7777 -210.88302 11435.306 + 2870 304.91672 121.62195 -3387.3984 -2933.8579 -283.51201 11435.592 + 2880 304.72225 121.58958 -3386.3193 -2933.068 -271.90273 11435.873 + 2890 299.62705 121.51597 -3383.8655 -2938.193 -197.43931 11436.149 + 2900 291.98004 121.3175 -3377.25 -2942.9518 -47.677493 11436.421 + 2910 285.64674 121.22979 -3374.3263 -2949.4485 50.88047 11436.692 + 2920 282.86712 121.21804 -3373.9348 -2953.1914 87.233096 11436.964 + 2930 283.67958 121.21576 -3373.8587 -2951.9068 82.604585 11437.238 + 2940 287.21352 121.3099 -3376.9966 -2949.7883 10.623844 11437.514 + 2950 292.14611 121.40553 -3380.1844 -2945.6392 -79.265 11437.79 + 2960 296.89016 121.51751 -3383.9171 -2942.3155 -175.19536 11438.064 + 2970 300.12916 121.57467 -3385.8225 -2939.4031 -236.77422 11438.336 + 2980 302.20838 121.63464 -3387.8214 -2938.3093 -287.40904 11438.603 + 2990 304.64073 121.70098 -3390.0326 -2936.9026 -339.84237 11438.864 + 3000 307.8161 121.72954 -3390.9845 -2933.1314 -381.60025 11439.119 + 3010 310.72947 121.77316 -3392.4387 -2930.2521 -426.58264 11439.367 + 3020 312.53958 121.7889 -3392.9633 -2928.0844 -450.04935 11439.608 + 3030 312.49094 121.79073 -3393.0242 -2928.2176 -449.02983 11439.839 + 3040 309.73908 121.7397 -3391.3234 -2930.61 -400.9218 11440.062 + 3050 304.05498 121.64761 -3388.2537 -2935.9949 -307.99255 11440.278 + 3060 296.18807 121.53195 -3384.3984 -2943.8411 -185.36974 11440.488 + 3070 287.85771 121.39226 -3379.7422 -2951.5756 -51.099571 11440.695 + 3080 281.51145 121.24122 -3374.7073 -2955.9804 68.773795 11440.9 + 3090 278.68968 121.22085 -3374.0283 -2959.4986 105.01282 11441.107 + 3100 279.57067 121.18041 -3372.6802 -2956.8401 112.35716 11441.316 + 3110 283.23891 121.24147 -3374.7155 -2953.4191 55.294964 11441.528 + 3120 287.0302 121.32051 -3377.3502 -2950.4146 -10.351676 11441.74 + 3130 288.75914 121.35045 -3378.3482 -2948.8409 -39.087682 11441.952 + 3140 288.86822 121.37873 -3379.291 -2949.6214 -51.515881 11442.164 + 3150 289.15543 121.36284 -3378.7615 -2948.6647 -45.538755 11442.375 + 3160 290.01869 121.39289 -3379.7629 -2948.3821 -60.140383 11442.586 + 3170 290.3588 121.38183 -3379.3944 -2947.5077 -55.744768 11442.795 + 3180 289.4469 121.36003 -3378.6676 -2948.1372 -41.38054 11443.003 + 3190 288.17301 121.32373 -3377.4578 -2948.8223 -26.918463 11443.211 + 3200 288.77945 121.33719 -3377.9064 -2948.3688 -56.270381 11443.418 + 3210 292.61319 121.37731 -3379.2438 -2944.0038 -130.93347 11443.625 + 3220 298.72162 121.46642 -3382.2139 -2937.8881 -247.75665 11443.829 + 3230 304.29801 121.56165 -3385.3882 -2932.768 -363.76171 11444.028 + 3240 307.30603 121.58676 -3386.2253 -2929.1309 -430.08567 11444.22 + 3250 308.46328 121.64558 -3388.1861 -2929.3704 -489.21976 11444.405 + 3260 309.45149 121.68162 -3389.3873 -2929.1017 -531.49647 11444.58 + 3270 310.53561 121.68778 -3389.5928 -2927.6946 -555.3418 11444.746 + 3280 310.86344 121.68443 -3389.4811 -2927.0953 -559.37189 11444.9 + 3290 309.20309 121.65295 -3388.4315 -2928.5154 -526.9055 11445.045 + 3300 304.79453 121.57176 -3385.7254 -2932.3667 -445.46244 11445.179 + 3310 298.22424 121.47929 -3382.643 -2939.057 -337.28653 11445.305 + 3320 291.59323 121.38969 -3379.6564 -2945.9335 -229.85854 11445.424 + 3330 287.05779 121.35392 -3378.4639 -2951.4871 -163.36345 11445.539 + 3340 284.87461 121.28566 -3376.1887 -2952.4593 -108.38359 11445.651 + 3350 283.95926 121.27571 -3375.857 -2953.4891 -91.349874 11445.761 + 3360 283.69509 121.23795 -3374.5984 -2952.6235 -74.997711 11445.869 + 3370 284.68525 121.2386 -3374.6199 -2951.1721 -85.999729 11445.975 + 3380 287.19722 121.24684 -3374.8948 -2947.7107 -112.9909 11446.081 + 3390 290.57885 121.30632 -3376.8773 -2944.6633 -165.26284 11446.184 + 3400 292.93209 121.33537 -3377.8456 -2942.1313 -194.7257 11446.284 + 3410 291.76142 121.31269 -3377.0898 -2943.1168 -172.6622 11446.38 + 3420 286.88546 121.25064 -3375.0212 -2948.3008 -105.31015 11446.473 + 3430 282.35446 121.16335 -3372.1116 -2952.1307 -34.843492 11446.565 + 3440 282.89328 121.17145 -3372.3818 -2951.5995 -46.727521 11446.655 + 3450 288.98985 121.20861 -3373.6204 -2943.7699 -119.26274 11446.745 + 3460 297.07668 121.3646 -3378.82 -2936.941 -257.29499 11446.833 + 3470 303.62926 121.49689 -3383.2298 -2931.6043 -374.65079 11446.915 + 3480 307.53501 121.57346 -3385.782 -2928.347 -444.57094 11446.991 + 3490 308.82784 121.6384 -3387.9465 -2928.5885 -478.59915 11447.058 + 3500 306.76065 121.58336 -3386.1118 -2929.8286 -428.54219 11447.116 + 3510 301.59019 121.46045 -3382.0149 -2933.4223 -320.67255 11447.166 + 3520 296.6128 121.39244 -3379.748 -2938.5589 -231.82779 11447.21 + 3530 294.04622 121.35625 -3378.5416 -2941.1701 -174.62678 11447.25 + 3540 292.92581 121.33429 -3377.8097 -2942.1048 -139.27627 11447.286 + 3550 292.55142 121.34157 -3378.0522 -2942.9042 -127.55412 11447.32 + 3560 293.27364 121.32125 -3377.3749 -2941.1526 -121.96709 11447.351 + 3570 295.91725 121.34354 -3378.1182 -2937.9637 -156.7775 11447.38 + 3580 300.81071 121.43009 -3381.0031 -2933.5699 -240.11843 11447.406 + 3590 306.25194 121.53849 -3384.6164 -2929.0899 -338.32794 11447.427 + 3600 309.27383 121.59528 -3386.5092 -2926.4878 -396.29387 11447.442 + 3610 308.14806 121.52966 -3384.3219 -2925.975 -370.94221 11447.45 + 3620 303.70915 121.5107 -3383.6899 -2931.9455 -332.81969 11447.45 + 3630 298.10851 121.41167 -3380.389 -2936.9752 -250.53854 11447.445 + 3640 292.91537 121.32403 -3377.4675 -2941.7781 -174.96568 11447.434 + 3650 289.83094 121.28248 -3376.0825 -2944.981 -136.01651 11447.42 + 3660 290.33941 121.31757 -3377.2523 -2945.3944 -158.30815 11447.404 + 3670 293.74139 121.38911 -3379.6371 -2942.7191 -217.62554 11447.384 + 3680 296.888 121.43234 -3381.0781 -2939.4797 -260.39443 11447.36 + 3690 297.61413 121.43968 -3381.3225 -2938.644 -262.15069 11447.332 + 3700 295.73613 121.4197 -3380.6566 -2940.7715 -224.46949 11447.298 + 3710 292.15784 121.32961 -3377.6536 -2943.091 -142.08819 11447.26 + 3720 288.70489 121.26152 -3375.384 -2945.9574 -68.863176 11447.22 + 3730 287.02462 121.21756 -3373.9187 -2946.9913 -23.511213 11447.178 + 3740 287.33794 121.17794 -3372.5979 -2945.2045 -0.76652367 11447.136 + 3750 288.41045 121.21903 -3373.9676 -2944.9789 -20.664794 11447.094 + 3760 289.32124 121.25713 -3375.2377 -2944.8943 -43.830154 11447.052 + 3770 289.74427 121.23914 -3374.6381 -2943.6655 -44.839405 11447.009 + 3780 289.16352 121.22661 -3374.2202 -2944.1114 -43.361367 11446.965 + 3790 287.63185 121.23242 -3374.4141 -2946.5835 -44.84352 11446.921 + 3800 286.51576 121.1944 -3373.1466 -2946.9762 -38.200592 11446.876 + 3810 288.60461 121.22991 -3374.3304 -2945.0529 -89.234348 11446.829 + 3820 295.02935 121.36335 -3378.7782 -2939.9444 -213.80273 11446.781 + 3830 302.96572 121.48477 -3382.8257 -2932.1871 -342.25226 11446.73 + 3840 307.8583 121.57087 -3385.6957 -2927.7798 -424.41413 11446.671 + 3850 307.82912 121.57589 -3385.8629 -2927.9904 -425.63611 11446.605 + 3860 304.82856 121.55478 -3385.1595 -2931.7501 -384.43244 11446.53 + 3870 302.13834 121.51176 -3383.7252 -2934.3173 -332.70406 11446.448 + 3880 300.98997 121.45725 -3381.9085 -2934.2087 -287.91701 11446.36 + 3890 299.84578 121.41474 -3380.4914 -2934.4935 -249.7476 11446.267 + 3900 297.11081 121.39476 -3379.8253 -2937.8955 -209.71089 11446.168 + 3910 293.23391 121.32658 -3377.5525 -2941.3893 -143.37072 11446.066 + 3920 289.7596 121.24199 -3374.733 -2943.7375 -76.199915 11445.961 + 3930 288.21644 121.18826 -3372.9421 -2944.242 -36.754134 11445.854 + 3940 288.999 121.16072 -3372.0241 -2942.16 -25.308345 11445.746 + 3950 291.24154 121.21076 -3373.6921 -2940.4924 -55.387764 11445.638 + 3960 293.71042 121.25295 -3375.0984 -2938.2265 -84.359934 11445.53 + 3970 295.34442 121.28792 -3376.264 -2936.9616 -105.24072 11445.419 + 3980 295.93386 121.29223 -3376.4078 -2936.2286 -108.7094 11445.307 + 3990 296.45686 121.29136 -3376.3788 -2935.4216 -114.1245 11445.192 + 4000 298.6827 121.35677 -3378.5589 -2934.291 -161.67235 11445.075 + 4010 303.1746 121.46699 -3382.233 -2931.2838 -244.93297 11444.955 + 4020 308.48148 121.53283 -3384.4277 -2925.5848 -314.33597 11444.831 + 4030 312.42541 121.58582 -3386.1939 -2921.4848 -359.69109 11444.7 + 4040 313.52134 121.57954 -3385.9845 -2919.6453 -349.93551 11444.563 + 4050 311.68092 121.53592 -3384.5305 -2920.9288 -295.48459 11444.419 + 4060 308.20934 121.44718 -3381.5727 -2923.1347 -208.57588 11444.269 + 4070 304.77563 121.375 -3379.1665 -2925.8359 -131.16799 11444.115 + 4080 302.05526 121.35918 -3378.6393 -2929.355 -87.273655 11443.959 + 4090 299.63707 121.31729 -3377.2431 -2931.5557 -43.491299 11443.801 + 4100 296.79632 121.27564 -3375.8545 -2934.3925 -3.2770185 11443.642 + 4110 293.2432 121.17384 -3372.4614 -2936.2844 61.177737 11443.483 + 4120 289.78281 121.10529 -3370.1765 -2939.1465 108.30347 11443.326 + 4130 288.60354 121.0897 -3369.6567 -2940.3808 115.47491 11443.17 + 4140 291.02012 121.13095 -3371.0318 -2938.1614 72.630448 11443.016 + 4150 295.58835 121.30186 -3376.7286 -2937.0633 -32.088689 11442.864 + 4160 298.35127 121.32968 -3377.6561 -2933.8812 -59.581582 11442.711 + 4170 296.65089 121.3084 -3376.9468 -2935.7011 -26.59023 11442.557 + 4180 292.60517 121.22909 -3374.3029 -2939.0749 49.147273 11442.402 + 4190 290.72236 121.18097 -3372.6989 -2940.2714 92.84254 11442.249 + 4200 293.39337 121.2149 -3373.8298 -2937.4294 61.151041 11442.097 + 4210 299.35417 121.35549 -3378.5162 -2933.2496 -46.284045 11441.947 + 4220 305.09367 121.44871 -3381.6238 -2927.8201 -140.05291 11441.795 + 4230 308.0747 121.47239 -3382.4132 -2924.1754 -186.36253 11441.641 + 4240 308.90737 121.46118 -3382.0393 -2922.563 -198.50555 11441.484 + 4250 309.77103 121.47822 -3382.6075 -2921.8465 -214.7363 11441.323 + 4260 311.30404 121.49167 -3383.0557 -2920.0145 -226.04473 11441.158 + 4270 312.2144 121.50966 -3383.6553 -2919.26 -227.49904 11440.988 + 4280 311.5543 121.47564 -3382.5215 -2919.1081 -191.54355 11440.814 + 4290 309.15349 121.42597 -3380.8656 -2921.0232 -132.12913 11440.636 + 4300 305.32078 121.30711 -3376.9036 -2922.7621 -35.097493 11440.456 + 4310 301.83829 121.24986 -3374.9955 -2926.0339 30.953582 11440.275 + 4320 301.49874 121.22969 -3374.323 -2925.8664 52.573117 11440.095 + 4330 304.60655 121.29935 -3376.6451 -2923.566 8.3635111 11439.915 + 4340 308.27454 121.36723 -3378.9076 -2920.3726 -39.64981 11439.736 + 4350 308.68128 121.41579 -3380.5262 -2921.3862 -48.546743 11439.556 + 4360 303.46646 121.29968 -3376.6561 -2925.2728 58.560156 11439.375 + 4370 293.69001 121.10941 -3370.3137 -2933.4721 234.60284 11439.195 + 4380 284.38997 120.96747 -3365.5822 -2942.5736 383.84733 11439.02 + 4390 281.82066 120.90729 -3363.5763 -2944.3895 435.13504 11438.852 + 4400 288.33476 121.03813 -3367.9377 -2939.0616 323.25684 11438.693 + 4410 300.64159 121.26555 -3375.5182 -2928.3366 112.53463 11438.539 + 4420 313.30705 121.51947 -3383.9825 -2917.962 -117.28122 11438.388 + 4430 321.64199 121.66393 -3388.7976 -2910.3795 -265.89993 11438.234 + 4440 323.27558 121.67928 -3389.3095 -2908.4616 -298.8234 11438.076 + 4450 318.82205 121.61215 -3387.0718 -2912.8481 -238.21166 11437.912 + 4460 311.57096 121.48283 -3382.7609 -2919.3227 -121.94167 11437.743 + 4470 305.29805 121.37931 -3379.3104 -2925.2027 -19.249637 11437.572 + 4480 302.20701 121.34516 -3378.1721 -2928.6621 31.608536 11437.401 + 4490 301.86539 121.37251 -3379.0838 -2930.0819 36.6133 11437.231 + 4500 301.83114 121.38573 -3379.5242 -2930.5732 44.950393 11437.061 + 4510 300.69354 121.37501 -3379.1671 -2931.9083 74.35439 11436.891 + 4520 299.07515 121.31618 -3377.2061 -2932.3544 130.20148 11436.724 + 4530 297.77448 121.26315 -3375.4383 -2932.5213 183.17993 11436.559 + 4540 296.70489 121.22611 -3374.2037 -2932.8777 224.84418 11436.397 + 4550 296.0746 121.19692 -3373.2307 -2932.8421 251.41148 11436.24 + 4560 296.75595 121.17951 -3372.6504 -2931.2484 254.39696 11436.087 + 4570 298.20494 121.20362 -3373.4541 -2929.8968 228.97573 11435.94 + 4580 298.6187 121.18925 -3372.975 -2928.8023 223.85684 11435.796 + 4590 297.36567 121.13694 -3371.2312 -2928.9223 245.45204 11435.657 + 4600 296.53718 121.12381 -3370.7936 -2929.717 247.99542 11435.523 + 4610 298.85806 121.17928 -3372.6428 -2928.1141 200.35424 11435.394 + 4620 304.1064 121.27181 -3375.7272 -2923.3919 116.86048 11435.268 + 4630 309.59172 121.37659 -3379.2198 -2918.7256 29.790192 11435.145 + 4640 312.59335 121.44841 -3381.6138 -2916.6549 -19.674381 11435.022 + 4650 311.95155 121.44647 -3381.5489 -2917.5446 -8.4711918 11434.899 + 4660 309.72085 121.40013 -3380.0043 -2919.318 32.49946 11434.776 + 4670 309.6271 121.38064 -3379.3547 -2918.8078 45.684052 11434.654 + 4680 311.87712 121.3996 -3379.9867 -2916.0931 24.664385 11434.532 + 4690 313.69797 121.48573 -3382.8577 -2916.2557 -16.083599 11434.411 + 4700 313.14834 121.42257 -3380.7524 -2914.9679 24.50122 11434.29 + 4710 310.1134 121.3699 -3378.9965 -2917.7263 88.005351 11434.17 + 4720 305.06916 121.29286 -3376.4286 -2922.6613 180.46822 11434.051 + 4730 299.24301 121.18206 -3372.7352 -2927.6339 289.16437 11433.935 + 4740 294.66495 121.09388 -3369.7962 -2931.5044 369.81124 11433.825 + 4750 293.24652 121.04681 -3368.2268 -2932.0449 397.00251 11433.722 + 4760 295.42269 121.03296 -3367.7655 -2928.3466 370.60416 11433.627 + 4770 300.18593 121.13662 -3371.2205 -2924.7167 269.94247 11433.539 + 4780 306.46538 121.27952 -3375.9839 -2920.1399 139.09706 11433.456 + 4790 312.83484 121.41628 -3380.5426 -2915.2245 12.858572 11433.375 + 4800 316.57471 121.56483 -3385.4942 -2914.6133 -89.103894 11433.295 + 4810 315.4019 121.53381 -3384.4602 -2915.3238 -75.973052 11433.214 + 4820 310.38842 121.45712 -3381.9041 -2920.2248 -9.5670482 11433.131 + 4830 304.99978 121.40535 -3380.1784 -2926.5144 52.039039 11433.048 + 4840 301.97087 121.32927 -3377.6424 -2928.4837 100.58872 11432.965 + 4850 302.07526 121.37683 -3379.2276 -2929.9136 71.968838 11432.885 + 4860 304.24312 121.39066 -3379.6888 -2927.1503 38.71679 11432.806 + 4870 306.25878 121.39389 -3379.7963 -2924.2596 14.992947 11432.728 + 4880 305.61864 121.39646 -3379.882 -2925.2974 20.445466 11432.65 + 4890 301.38718 121.29476 -3376.4919 -2928.2013 99.826783 11432.573 + 4900 295.8169 121.20441 -3373.4805 -2933.4753 186.45389 11432.498 + 4910 292.7373 121.11172 -3370.3906 -2934.966 250.13371 11432.426 + 4920 294.73813 121.14607 -3371.5356 -2933.135 218.42 11432.359 + 4930 301.33504 121.22391 -3374.1305 -2925.9174 128.39026 11432.296 + 4940 309.57915 121.40646 -3380.2152 -2919.7397 -15.005956 11432.235 + 4950 316.02716 121.5054 -3383.5134 -2913.447 -106.73682 11432.175 + 4960 318.58788 121.52234 -3384.0782 -2910.2028 -126.86261 11432.112 + 4970 316.77858 121.51162 -3383.7208 -2912.5366 -94.346249 11432.047 + 4980 311.71707 121.45409 -3381.8029 -2918.1474 -15.927158 11431.98 + 4990 305.94637 121.31673 -3377.2242 -2922.1522 92.455269 11431.913 + 5000 302.82001 121.29431 -3376.4769 -2926.0551 126.1166 11431.848 + 5010 303.96795 121.34904 -3378.3014 -2926.1721 87.846907 11431.785 + 5020 308.04191 121.43693 -3381.2309 -2923.0419 11.191027 11431.724 + 5030 311.80602 121.49557 -3383.1858 -2919.3979 -48.531152 11431.663 + 5040 313.15103 121.50266 -3383.422 -2917.6336 -58.634435 11431.602 + 5050 311.39813 121.44984 -3381.6613 -2918.4802 -6.1522599 11431.539 + 5060 306.37406 121.32618 -3377.5395 -2921.8313 112.49768 11431.476 + 5070 299.0361 121.18456 -3372.8185 -2928.025 261.3789 11431.416 + 5080 292.26711 121.07308 -3369.1028 -2934.3776 388.69845 11431.361 + 5090 289.84189 121.0202 -3367.3401 -2936.2222 445.54983 11431.313 + 5100 293.31592 121.07356 -3369.1186 -2932.8334 398.18962 11431.273 + 5110 298.03568 131.14395 -3371.4649 -2928.1594 380.41194 11431.242 + 5120 298.8909 131.1512 -3371.7066 -2927.1291 359.0829 11431.218 + 5130 295.86625 131.09052 -3369.6841 -2929.6055 396.21627 11431.201 + 5140 292.57084 131.08778 -3369.5926 -2934.4156 414.31492 11431.192 + 5150 291.935 131.06196 -3368.7319 -2934.5007 420.62522 11431.191 + 5160 293.76964 131.10398 -3370.1327 -2933.1727 382.1961 11431.198 + 5170 296.45636 131.13495 -3371.165 -2930.2087 346.19977 11431.212 + 5180 298.56648 131.17442 -3372.4806 -2928.3856 318.06614 11431.233 + 5190 299.91367 131.17522 -3372.5074 -2926.4086 315.88913 11431.26 + 5200 300.8404 131.15994 -3371.9981 -2924.5208 325.82663 11431.294 + 5210 301.33431 131.19142 -3373.0472 -2924.8352 321.12915 11431.333 + 5220 301.37267 131.21398 -3373.7992 -2925.5302 323.17565 11431.379 + 5230 300.47162 131.24585 -3374.8616 -2927.9328 330.30384 11431.431 + 5240 297.78262 131.18355 -3372.7851 -2929.856 390.12177 11431.49 + 5250 293.37938 131.09184 -3369.7279 -2933.3483 476.89365 11431.557 + 5260 288.5776 131.00823 -3366.941 -2937.7037 563.36303 11431.632 + 5270 284.85195 130.92195 -3364.0651 -2940.3694 638.59201 11431.719 + 5280 283.2497 130.88487 -3362.8289 -2941.5165 669.97431 11431.818 + 5290 284.60451 130.92108 -3364.036 -2940.7084 639.87555 11431.929 + 5300 288.65507 131.00579 -3366.8598 -2937.5072 560.98385 11432.053 + 5310 293.64397 131.09686 -3369.8954 -2933.1222 468.06919 11432.188 + 5320 297.23935 131.18843 -3372.9477 -2930.8267 389.23781 11432.332 + 5330 297.92189 131.16784 -3372.2613 -2929.1251 383.78723 11432.484 + 5340 295.88364 131.12963 -3370.9878 -2930.8833 414.06812 11432.643 + 5350 292.652 131.06027 -3368.6755 -2933.3779 469.72267 11432.81 + 5360 289.96002 131.00939 -3366.9796 -2935.6861 512.24248 11432.987 + 5370 288.82334 130.9809 -3366.0299 -2936.427 526.52852 11433.174 + 5380 289.919 130.9997 -3366.6568 -2935.4242 496.25641 11433.37 + 5390 293.90776 131.02352 -3367.4506 -2930.2851 434.654 11433.577 + 5400 300.15414 131.11099 -3370.3662 -2923.9097 331.21385 11433.792 + 5410 305.95085 131.20156 -3373.3855 -2918.3068 238.80358 11434.013 + 5420 308.54184 131.2532 -3375.1067 -2916.1741 199.18497 11434.239 + 5430 307.50215 131.234 -3374.4665 -2917.0804 225.68274 11434.47 + 5440 304.76698 131.15408 -3371.8028 -2918.485 291.69611 11434.705 + 5450 302.82614 131.15682 -3371.8941 -2921.4631 316.57324 11434.945 + 5460 302.32493 131.19557 -3373.1858 -2923.5003 312.63356 11435.192 + 5470 301.65321 131.14414 -3371.4714 -2922.7851 343.08644 11435.446 + 5480 299.35845 131.08072 -3369.3573 -2924.0843 392.21574 11435.706 + 5490 296.00745 131.00849 -3366.9496 -2926.661 454.06321 11435.974 + 5500 293.62342 130.92359 -3364.1197 -2927.3771 510.06055 11436.251 + 5510 293.90877 130.91837 -3363.9458 -2926.7787 508.66227 11436.538 + 5520 296.3502 130.97873 -3365.9576 -2925.1592 459.09636 11436.834 + 5530 298.51077 131.01379 -3367.1262 -2923.1141 417.98617 11437.14 + 5540 298.65249 130.99768 -3366.5894 -2922.3664 413.06782 11437.454 + 5550 297.06517 130.999 -3366.6334 -2924.7715 416.90716 11437.777 + 5560 295.08318 130.969 -3365.6334 -2926.7195 436.38677 11438.107 + 5570 293.80199 130.96447 -3365.4822 -2928.474 441.75621 11438.447 + 5580 293.22853 130.93024 -3364.3413 -2928.186 453.43962 11438.794 + 5590 293.18573 130.95367 -3365.1223 -2929.0308 442.42097 11439.151 + 5600 293.52337 130.95292 -3365.0972 -2928.5034 443.00006 11439.517 + 5610 295.33417 121.00992 -3366.9973 -2927.7101 351.5285 11439.891 + 5620 299.87371 121.08043 -3369.3477 -2923.3083 285.46544 11440.271 + 5630 306.58927 121.17036 -3372.3453 -2916.317 188.69256 11440.658 + 5640 314.42614 121.31992 -3377.3307 -2909.6456 57.183028 11441.048 + 5650 321.43602 121.40726 -3380.2419 -2902.1302 -42.548803 11441.44 + 5660 325.3986 121.46329 -3382.1097 -2898.1039 -99.841592 11441.831 + 5670 325.18238 121.40038 -3380.0127 -2896.3286 -71.006159 11442.221 + 5680 321.9195 121.31804 -3377.2681 -2898.4372 -5.1476385 11442.61 + 5690 317.90338 121.2499 -3374.9967 -2902.1395 62.122047 11442.999 + 5700 314.46913 121.19238 -3373.0792 -2905.3302 116.1735 11443.39 + 5710 312.0068 121.13668 -3371.2227 -2907.1362 153.84412 11443.783 + 5720 310.45453 121.08952 -3369.6506 -2907.873 176.39037 11444.179 + 5730 308.36253 121.03267 -3367.7558 -2909.0899 206.22332 11444.579 + 5740 304.60008 120.99709 -3366.5696 -2913.5001 242.78863 11444.984 + 5750 300.25164 120.94314 -3364.7714 -2918.1698 293.49781 11445.393 + 5760 297.60365 120.88426 -3362.8087 -2920.1458 333.2608 11445.809 + 5770 298.53552 120.92527 -3364.1757 -2920.1267 304.19324 11446.231 + 5780 303.73079 120.97188 -3365.7295 -2913.953 237.62234 11446.659 + 5790 310.9535 121.10237 -3370.0789 -2907.5592 123.84955 11447.091 + 5800 316.07035 121.15139 -3371.7131 -2901.5824 59.579025 11447.527 + 5810 316.2165 131.20522 -3373.5073 -2903.1592 102.78686 11447.964 + 5820 312.18519 131.11861 -3370.6203 -2906.2684 181.38185 11448.405 + 5830 307.10624 131.01565 -3367.1882 -2910.3909 276.69732 11448.849 + 5840 303.60071 130.95136 -3365.0452 -2913.4622 339.92983 11449.298 + 5850 302.2033 130.91633 -3363.8776 -2914.3731 366.28048 11449.755 + 5860 301.96551 130.92749 -3364.2498 -2915.099 357.45155 11450.219 + 5870 301.95773 130.91949 -3363.9829 -2914.8437 347.80746 11450.69 + 5880 302.06817 130.9215 -3364.0499 -2914.7464 329.56923 11451.168 + 5890 302.31056 130.91423 -3363.8075 -2914.1435 312.64449 11451.653 + 5900 302.07546 130.92914 -3364.3048 -2914.9904 292.07764 11452.145 + 5910 300.74146 130.89503 -3363.1676 -2915.8375 302.13817 11452.642 + 5920 298.65522 130.8226 -3360.7534 -2916.5264 338.91993 11453.146 + 5930 296.73447 130.79282 -3359.7605 -2918.3905 364.39998 11453.656 + 5940 295.5406 130.76927 -3358.9757 -2919.3814 386.55917 11454.175 + 5950 294.99299 130.7504 -3358.3465 -2919.5668 404.75575 11454.701 + 5960 294.86662 130.73956 -3357.9854 -2919.3936 417.11766 11455.235 + 5970 295.22448 130.7815 -3359.3833 -2920.2593 402.47864 11455.778 + 5980 296.24485 130.84017 -3361.339 -2920.6973 371.68362 11456.328 + 5990 297.4302 130.88615 -3362.8716 -2920.4667 340.05175 11456.887 + 6000 298.16549 130.89171 -3363.0572 -2919.5586 322.85603 11457.452 + 6010 299.06272 130.92006 -3364.0021 -2919.1689 292.49092 11458.024 + 6020 300.89808 130.9232 -3364.1067 -2916.5437 264.07987 11458.602 + 6030 302.77522 130.95465 -3365.1548 -2914.7996 226.55047 11459.186 + 6040 303.5262 130.91672 -3363.8908 -2912.4185 228.98333 11459.775 + 6050 303.30873 130.90327 -3363.4423 -2912.2936 233.22005 11460.368 + 6060 303.26058 130.89702 -3363.234 -2912.1568 234.71794 11460.967 + 6070 303.60235 130.93774 -3364.5913 -2913.0059 214.97622 11461.57 + 6080 303.31006 130.94245 -3364.7485 -2913.5978 213.25763 11462.178 + 6090 301.69962 130.94114 -3364.7045 -2915.9492 223.86866 11462.79 + 6100 299.62417 130.85578 -3361.8594 -2916.1912 270.01732 11463.408 + 6110 298.48832 130.84396 -3361.4655 -2917.4867 282.50313 11464.031 + 6120 298.08727 130.81875 -3360.6249 -2917.2427 295.05061 11464.66 + 6130 297.74282 130.83008 -3361.0028 -2918.1329 292.15838 11465.295 + 6140 297.96892 130.84061 -3361.3538 -2918.1476 281.25591 11465.936 + 6150 299.67332 130.89951 -3363.3169 -2917.5756 233.93496 11466.583 + 6160 302.55507 130.93786 -3364.5954 -2914.5677 177.32388 11467.235 + 6170 305.59573 130.98704 -3366.2347 -2911.6842 110.30761 11467.891 + 6180 307.58081 130.99982 -3366.6608 -2909.1576 65.120245 11468.549 + 6190 307.86772 131.01886 -3367.2954 -2909.3655 34.468656 11469.21 + 6200 307.26123 131.00077 -3366.6924 -2909.6646 33.334477 11469.872 + 6210 306.16434 130.9416 -3364.7199 -2909.3236 62.148082 11470.535 + 6220 304.20525 130.86167 -3362.0558 -2909.5736 114.04889 11471.199 + 6230 301.49671 130.85003 -3361.6677 -2913.2142 150.58908 11471.867 + 6240 298.81254 130.8113 -3360.3767 -2915.9157 196.6026 11472.538 + 6250 297.52315 130.76808 -3358.9361 -2916.3929 228.1392 11473.213 + 6260 298.38488 130.79824 -3359.9415 -2916.1166 208.20037 11473.893 + 6270 300.41135 130.79182 -3359.7274 -2912.8883 189.438 11474.578 + 6280 301.78229 130.79227 -3359.7424 -2910.8641 172.11102 11475.267 + 6290 301.97901 130.78161 -3359.3868 -2910.2159 167.30802 11475.96 + 6300 301.85984 130.8003 -3360.01 -2911.0164 152.48267 11476.656 + 6310 302.31117 130.83004 -3361.0015 -2911.3365 126.9104 11477.356 + 6320 303.23471 130.81364 -3360.4545 -2909.4159 113.54785 11478.06 + 6330 303.79528 130.78622 -3359.5408 -2907.6683 107.41234 11478.765 + 6340 303.42145 130.76428 -3358.8092 -2907.4928 108.86921 11479.474 + 6350 302.25307 130.74659 -3358.2196 -2908.6411 118.18869 11480.185 + 6360 300.99721 130.72327 -3357.4424 -2909.7319 132.727 11480.9 + 6370 300.52902 130.72315 -3357.4383 -2910.4241 133.3151 11481.617 + 6380 301.36639 130.70502 -3356.8341 -2908.5744 131.9067 11482.337 + 6390 302.92162 130.75993 -3358.6645 -2908.0915 98.083321 11483.061 + 6400 304.04037 130.77086 -3359.0287 -2906.7917 85.558236 11483.787 + 6410 304.08127 130.7884 -3359.6132 -2907.3153 79.763619 11484.516 + 6420 303.50603 130.76237 -3358.7457 -2907.3035 95.71087 11485.247 + 6430 303.07158 130.67992 -3355.9975 -2905.2015 132.28109 11485.98 + 6440 302.84297 130.69628 -3356.5426 -2906.0866 130.08786 11486.716 + 6450 302.71585 130.65926 -3355.3087 -2905.0418 145.92658 11487.456 + 6460 304.01534 130.65 -3355.0001 -2902.8003 136.47269 11488.199 + 6470 308.426 130.72489 -3357.4963 -2898.736 68.173149 11488.945 + 6480 314.43141 130.85052 -3361.684 -2893.9911 -30.856651 11489.693 + 6490 317.62943 130.90173 -3363.391 -2890.9413 -74.928409 11490.44 + 6500 315.39938 130.83941 -3361.3137 -2892.181 -27.949849 11491.188 + 6510 309.65119 130.73642 -3357.8807 -2897.298 62.20165 11491.935 + 6520 304.82403 130.62405 -3354.1351 -2900.7324 140.94596 11492.685 + 6530 303.59714 130.60156 -3353.3853 -2901.8075 145.91118 11493.438 + 6540 305.17559 130.61386 -3353.7952 -2899.8696 106.99363 11494.194 + 6550 307.5524 130.62985 -3354.3284 -2896.8675 57.293813 11494.952 + 6560 310.03758 130.67155 -3355.7184 -2894.561 -0.40912979 11495.713 + 6570 312.78351 130.71833 -3357.2778 -2892.036 -53.357835 11496.474 + 6580 314.83451 130.73194 -3357.7314 -2889.4389 -75.958118 11497.235 + 6590 314.91084 130.69925 -3356.6415 -2888.2355 -53.152164 11497.995 + 6600 312.50182 130.62232 -3354.0774 -2889.2547 14.380903 11498.754 + 6610 308.03746 130.5294 -3350.98 -2892.7977 106.90623 11499.515 + 6620 302.905 130.41978 -3347.326 -2896.7778 206.54809 11500.279 + 6630 299.05706 130.31171 -3343.7237 -2898.8989 285.55706 11501.047 + 6640 297.83331 130.29019 -3343.0063 -2900.0018 299.61584 11501.821 + 6650 299.39366 130.32655 -3344.2183 -2898.893 261.07875 11502.602 + 6660 302.64071 130.39169 -3346.3898 -2896.2347 195.22353 11503.388 + 6670 305.77268 130.48224 -3349.4081 -2894.5944 122.675 11504.178 + 6680 307.8656 130.52095 -3350.6984 -2892.7717 82.138825 11504.972 + 6690 309.66859 130.58564 -3352.8547 -2892.2462 36.760951 11505.768 + 6700 312.24745 130.64181 -3354.727 -2890.2826 -11.564949 11506.565 + 6710 315.24083 130.69825 -3356.6084 -2887.7116 -64.274848 11507.363 + 6720 317.12359 130.78052 -3359.3507 -2887.6534 -118.49441 11508.16 + 6730 316.51973 130.75926 -3358.642 -2887.8429 -111.57791 11508.955 + 6740 312.69603 130.67552 -3355.8508 -2890.7392 -49.633088 11509.749 + 6750 305.61898 130.49734 -3349.9113 -2895.3262 80.049765 11510.543 + 6760 296.65404 130.31845 -3343.9484 -2902.698 227.37851 11511.339 + 6770 289.18044 130.18068 -3339.356 -2909.222 342.98042 11512.14 + 6780 287.04575 130.09535 -3336.5115 -2909.5527 384.56708 11512.948 + 6790 291.4753 130.20012 -3340.0039 -2906.4565 288.55085 11513.765 + 6800 299.5486 130.34002 -3344.6674 -2899.1115 140.00843 11514.587 + 6810 306.68526 130.46176 -3348.7252 -2892.5541 1.8985385 11515.413 + 6820 310.02442 130.52558 -3350.8525 -2889.7147 -82.459263 11516.24 + 6830 309.76295 130.46826 -3348.942 -2888.1931 -87.190244 11517.066 + 6840 307.6045 130.40975 -3346.9918 -2889.4535 -72.124057 11517.891 + 6850 305.64839 130.3829 -3346.0966 -2891.4678 -68.676332 11518.715 + 6860 305.58845 130.36139 -3345.3796 -2890.84 -83.222792 11519.539 + 6870 308.26421 130.42857 -3347.6192 -2889.0995 -152.89628 11520.362 + 6880 312.55597 130.50343 -3350.1142 -2885.2109 -238.35853 11521.182 + 6890 316.50496 130.56773 -3352.2575 -2881.4804 -316.44062 11521.998 + 6900 319.34442 130.59076 -3353.0254 -2878.0248 -369.57931 11522.809 + 6910 322.21292 130.65794 -3355.2646 -2875.9973 -439.12634 11523.613 + 6920 325.64667 130.72438 -3357.4792 -2873.1045 -510.42908 11524.41 + 6930 327.67586 130.80306 -3360.1019 -2872.7089 -570.05967 11525.197 + 6940 326.76948 130.82873 -3360.9575 -2874.9127 -580.81931 11525.974 + 6950 323.93928 130.77369 -3359.1229 -2877.2878 -539.25831 11526.74 + 6960 320.32953 130.70775 -3356.925 -2880.4591 -483.49198 11527.497 + 6970 316.67419 130.64904 -3354.968 -2883.9391 -428.29681 11528.244 + 6980 313.8414 130.56316 -3352.1054 -2885.2901 -368.45433 11528.984 + 6990 312.43265 130.5603 -3352.0099 -2887.29 -350.29486 11529.717 + 7000 312.5114 130.54694 -3351.5646 -2886.7275 -338.9529 11530.444 + 7010 313.46202 130.5506 -3351.6868 -2885.4358 -338.41678 11531.165 + 7020 314.03111 130.54297 -3351.4324 -2884.335 -326.21454 11531.88 + 7030 313.18298 130.49221 -3349.7403 -2883.9043 -280.83129 11532.59 + 7040 310.27018 130.44489 -3348.163 -2886.6596 -217.53981 11533.294 + 7050 305.91733 130.29877 -3343.2923 -2888.2634 -105.39533 11533.995 + 7060 301.41959 130.20536 -3340.1788 -2891.84 -18.186143 11534.694 + 7070 298.17424 130.14022 -3338.0073 -2894.4957 37.787659 11535.394 + 7080 297.3149 130.12531 -3337.5102 -2895.2769 43.794726 11536.094 + 7090 298.92601 130.11484 -3337.1613 -2892.5315 17.967238 11536.796 + 7100 301.83696 130.19266 -3339.7552 -2890.7956 -59.056671 11537.499 + 7110 304.94913 130.28629 -3342.8764 -2889.2877 -149.01523 11538.202 + 7120 308.32311 130.29424 -3343.1414 -2884.5341 -210.51803 11538.902 + 7130 311.92637 130.40139 -3346.7129 -2882.7461 -311.70753 11539.599 + 7140 315.22631 130.47904 -3349.3013 -2880.4261 -395.76191 11540.29 + 7150 316.96261 130.49882 -3349.9608 -2878.5029 -438.31945 11540.974 + 7160 316.255 130.51463 -3350.4878 -2880.0825 -451.94916 11541.65 + 7170 314.91375 130.4985 -3349.9498 -2881.5395 -440.25631 11542.318 + 7180 315.3125 130.52019 -3350.6731 -2881.6696 -449.95947 11542.978 + 7190 316.90732 130.53757 -3351.2524 -2879.8768 -464.87528 11543.629 + 7200 317.79602 130.56106 -3352.0354 -2879.3379 -476.34779 11544.273 + 7210 316.72825 130.51898 -3350.6328 -2879.5235 -448.7442 11544.907 + 7220 313.92107 130.46301 -3348.7669 -2881.8331 -404.76929 11545.533 + 7230 310.84006 130.41375 -3347.1249 -2884.7739 -365.75674 11546.152 + 7240 308.94947 130.37186 -3345.7285 -2886.1896 -343.12739 11546.764 + 7250 308.23359 130.34101 -3344.7004 -2886.2263 -335.82281 11547.37 + 7260 307.27195 130.30143 -3343.3811 -2886.3374 -321.96815 11547.969 + 7270 305.65792 130.29665 -3343.2216 -2888.5787 -312.81782 11548.563 + 7280 304.63707 130.2955 -3343.1834 -2890.0589 -305.12823 11549.152 + 7290 304.68904 130.28363 -3342.7876 -2889.5857 -298.36076 11549.735 + 7300 305.24957 130.25408 -3341.8025 -2887.7669 -284.85596 11550.312 + 7310 305.35736 130.24731 -3341.577 -2887.3811 -272.5344 11550.885 + 7320 304.37179 130.18394 -3339.4646 -2886.7346 -229.05924 11551.453 + 7330 302.70991 130.09167 -3336.389 -2886.131 -173.14556 11552.017 + 7340 301.99132 130.08086 -3336.0286 -2886.8394 -165.51696 11552.578 + 7350 303.95302 130.11797 -3337.2655 -2885.1584 -211.1136 11553.136 + 7360 308.76582 130.2301 -3341.0035 -2881.7377 -319.1359 11553.69 + 7370 315.16088 130.35319 -3345.1064 -2876.3285 -448.97789 11554.238 + 7380 321.65556 130.44287 -3348.0956 -2869.6573 -563.21693 11554.779 + 7390 326.3254 130.58012 -3352.6705 -2867.2862 -670.10763 11555.308 + 7400 327.39994 130.6122 -3353.74 -2866.7574 -694.54924 11555.826 + 7410 324.96103 130.54057 -3351.3523 -2867.9974 -641.31404 11556.33 + 7420 320.96628 130.51789 -3350.5963 -2873.1832 -590.89118 11556.822 + 7430 316.84671 130.39818 -3346.606 -2875.3205 -506.20367 11557.303 + 7440 313.61331 130.33135 -3344.3783 -2877.9023 -456.35222 11557.774 + 7450 312.42365 130.31607 -3343.8691 -2879.1625 -452.42033 11558.237 + 7460 314.07546 130.39147 -3346.3822 -2879.2187 -515.04212 11558.691 + 7470 318.30062 130.43001 -3347.667 -2874.219 -588.1756 11559.135 + 7480 322.43835 130.55779 -3351.9264 -2872.3238 -690.09045 11559.569 + 7490 323.35732 130.57178 -3352.3926 -2871.4231 -711.87875 11559.989 + 7500 320.25094 130.53134 -3351.0448 -2874.6958 -667.61837 11560.396 + 7510 314.77472 130.44114 -3348.038 -2879.8345 -575.69874 11560.79 + 7520 309.35457 130.32059 -3344.0195 -2883.878 -468.18012 11561.173 + 7530 305.1503 130.19224 -3339.7412 -2885.8532 -368.21665 11561.547 + 7540 302.05466 130.12086 -3337.3621 -2888.0787 -302.0951 11561.914 + 7550 299.70449 130.08373 -3336.1243 -2890.3366 -259.17708 11562.276 + 7560 297.68595 130.02149 -3334.0498 -2891.2645 -214.88896 11562.633 + 7570 295.82581 129.99779 -3333.2597 -2893.2412 -192.72147 11562.986 + 7580 294.63186 129.96719 -3332.2398 -2893.9972 -180.71733 11563.336 + 7590 295.15347 129.92637 -3330.8791 -2891.8607 -187.07967 11563.682 + 7600 298.07438 129.97532 -3332.5105 -2889.1475 -256.85225 11564.025 + 7610 303.3963 130.12594 -3337.5312 -2886.2522 -394.11561 11564.363 + 7620 309.94279 130.23333 -3341.111 -2880.0945 -530.72625 11564.694 + 7630 315.59703 130.37882 -3345.9606 -2876.534 -672.72532 11565.015 + 7640 318.5323 130.44006 -3348.0021 -2874.2094 -753.99589 11565.322 + 7650 318.96609 130.46077 -3348.6923 -2874.2545 -789.44796 11565.616 + 7660 318.56818 130.50096 -3350.0321 -2876.1861 -814.38809 11565.894 + 7670 318.11745 130.48498 -3349.4995 -2876.3239 -806.42398 11566.157 + 7680 316.38913 130.43636 -3347.8788 -2877.274 -768.3224 11566.404 + 7690 313.23025 130.36794 -3345.598 -2879.6917 -705.11799 11566.636 + 7700 310.50497 130.34349 -3344.7831 -2882.9305 -657.61332 11566.855 + 7710 308.74458 130.36547 -3345.5158 -2886.2816 -632.48628 11567.06 + 7720 306.31874 130.35879 -3345.293 -2889.6671 -590.15026 11567.254 + 7730 302.63455 130.28037 -3342.6791 -2892.5331 -512.50689 11567.437 + 7740 299.13612 130.21348 -3340.4493 -2895.507 -446.77052 11567.609 + 7750 298.0468 130.20509 -3340.1695 -2896.8475 -430.64475 11567.773 + 7760 300.33257 130.25021 -3341.6737 -2894.9518 -469.03026 11567.928 + 7770 304.852 130.33469 -3344.4896 -2891.0453 -545.78208 11568.075 + 7780 309.54234 130.39893 -3346.631 -2886.2103 -618.02309 11568.211 + 7790 312.65589 130.41687 -3347.2289 -2882.1769 -659.79078 11568.335 + 7800 314.16294 130.43524 -3347.8412 -2880.5476 -688.03112 11568.446 + 7810 315.31829 130.40739 -3346.913 -2877.901 -696.55138 11568.543 + 7820 316.96488 130.45619 -3348.5397 -2877.0785 -738.84359 11568.627 + 7830 318.75398 130.49994 -3349.998 -2875.8756 -779.52285 11568.697 + 7840 319.77663 130.51707 -3350.569 -2874.9255 -800.40544 11568.752 + 7850 319.36777 130.51828 -3350.6093 -2875.574 -797.67564 11568.79 + 7860 317.17471 130.47308 -3349.1027 -2877.3293 -756.66704 11568.814 + 7870 313.21321 130.38576 -3346.1919 -2880.311 -681.40229 11568.822 + 7880 308.13711 130.26908 -3342.3028 -2883.9722 -586.08726 11568.818 + 7890 303.26337 130.18398 -3339.466 -2888.3847 -509.27844 11568.801 + 7900 300.40053 130.15361 -3338.4536 -2891.6306 -476.3332 11568.775 + 7910 300.64546 130.14793 -3338.2644 -2891.077 -485.4729 11568.739 + 7920 303.73549 130.18944 -3339.6479 -2887.8644 -540.37616 11568.694 + 7930 307.94525 130.23687 -3341.2292 -2883.1839 -605.56118 11568.638 + 7940 310.9357 130.30814 -3343.6047 -2881.1114 -664.04694 11568.571 + 7950 311.35198 130.32947 -3344.3158 -2881.2033 -674.05514 11568.49 + 7960 309.49123 130.28308 -3342.7692 -2882.4244 -633.58486 11568.397 + 7970 307.38871 130.2833 -3342.7767 -2885.5593 -608.17997 11568.291 + 7980 307.87615 130.29429 -3343.1429 -2885.2005 -612.32774 11568.172 + 7990 311.79629 130.37188 -3345.7293 -2881.956 -675.13747 11568.042 + 8000 316.35903 130.46209 -3348.7364 -2878.1763 -751.21864 11567.899 + 8010 317.9114 130.49792 -3349.9307 -2877.0616 -780.5499 11567.741 + 8020 315.36125 130.45414 -3348.4714 -2879.3955 -742.08936 11567.567 + 8030 310.51292 130.35005 -3345.0018 -2883.1374 -660.84883 11567.379 + 8040 306.37305 130.27949 -3342.6495 -2886.9428 -599.45224 11567.178 + 8050 304.74821 130.24194 -3341.3979 -2888.1081 -575.44349 11566.964 + 8060 305.6885 130.29752 -3343.2506 -2888.5621 -611.33998 11566.74 + 8070 307.90181 130.36203 -3345.4011 -2887.4205 -660.38956 11566.503 + 8080 309.65264 130.39791 -3346.5969 -2886.0121 -690.62735 11566.253 + 8090 309.8876 130.41229 -3347.0762 -2886.1419 -695.41724 11565.989 + 8100 308.957 130.43171 -3347.7238 -2888.1737 -689.04336 11565.712 + 8110 308.17928 130.44619 -3348.2065 -2889.8132 -681.96117 11565.421 + 8120 308.66692 130.43005 -3347.6683 -2888.5496 -676.92772 11565.116 + 8130 310.5806 130.46136 -3348.7119 -2886.7468 -702.60591 11564.798 + 8140 312.3917 130.50256 -3350.0855 -2885.4265 -729.06767 11564.466 + 8150 311.95117 130.45664 -3348.5548 -2884.5511 -698.65466 11564.119 + 8160 308.49527 130.38383 -3346.1278 -2887.2644 -625.7147 11563.759 + 8170 303.23966 130.2564 -3341.8801 -2890.8341 -511.54152 11563.386 + 8180 298.44017 130.18909 -3339.6364 -2895.7293 -422.77463 11563.003 + 8190 296.06366 130.13721 -3337.907 -2897.5347 -364.70418 11562.611 + 8200 296.70594 130.11996 -3337.3321 -2896.0045 -354.23246 11562.212 + 8210 299.69536 130.1719 -3339.0634 -2893.2893 -403.55102 11561.806 + 8220 304.36492 130.2643 -3342.1435 -2889.4237 -495.06191 11561.391 + 8230 309.55745 130.37095 -3345.6982 -2885.255 -605.95716 11560.966 + 8240 314.20872 130.47906 -3349.3019 -2881.9402 -718.41638 11560.53 + 8250 318.15091 130.56168 -3352.0561 -2878.8307 -814.77273 11560.078 + 8260 321.12312 130.65728 -3355.2425 -2877.5962 -898.93263 11559.611 + 8270 321.66982 130.7098 -3356.9934 -2878.5339 -931.02404 11559.125 + 8280 318.46085 130.66568 -3355.5227 -2881.8363 -878.99875 11558.622 + 8290 312.43135 130.57858 -3352.6192 -2887.9013 -773.41314 11558.1 + 8300 306.40458 130.45998 -3348.666 -2892.9124 -647.83868 11557.563 + 8310 302.94328 130.34785 -3344.9282 -2894.323 -544.0133 11557.013 + 8320 302.59511 130.30796 -3343.5987 -2893.5114 -495.86026 11556.452 + 8330 304.23133 130.36573 -3345.5244 -2893.0034 -505.02584 11555.88 + 8340 305.90809 130.38089 -3346.0296 -2891.0145 -502.39341 11555.299 + 8350 305.72955 130.41531 -3347.1769 -2892.4274 -494.91942 11554.706 + 8360 303.28357 130.40573 -3346.8576 -2895.7463 -456.8076 11554.104 + 8370 300.74663 130.3509 -3345.0301 -2897.6922 -410.54356 11553.492 + 8380 301.57655 130.3338 -3344.46 -2895.8877 -417.58525 11552.872 + 8390 306.90576 130.45142 -3348.3805 -2891.8815 -522.19474 11552.243 + 8400 314.95559 130.62201 -3354.0671 -2885.5945 -672.95129 11551.603 + 8410 322.52915 130.76789 -3358.9298 -2879.1921 -806.15989 11550.949 + 8420 326.70356 130.79669 -3359.8898 -2873.943 -853.49719 11550.28 + 8430 325.86539 130.81875 -3360.6249 -2875.9248 -839.39056 11549.593 + 8440 320.36955 130.75811 -3358.6037 -2882.0783 -740.50095 11548.889 + 8450 312.13374 130.59697 -3353.2322 -2888.9569 -572.02982 11548.171 + 8460 303.56932 130.39801 -3346.6004 -2895.0641 -385.89579 11547.441 + 8470 296.90512 130.24783 -3341.5942 -2899.9703 -240.46004 11546.702 + 8480 293.77692 130.19769 -3339.923 -2902.9521 -172.82377 11545.959 + 8490 294.80777 130.21627 -3340.5423 -2902.0381 -177.25707 11545.211 + 8500 299.19386 130.30773 -3343.5909 -2898.5628 -244.49812 11544.459 + 8510 304.30835 130.39369 -3346.4562 -2893.8206 -318.42171 11543.701 + 8520 307.44288 130.47292 -3349.0974 -2891.7994 -370.93937 11542.937 + 8530 307.57632 130.46633 -3348.8776 -2891.3812 -360.54989 11542.166 + 8540 305.42692 130.44697 -3348.2324 -2893.933 -322.52787 11541.386 + 8550 302.58852 130.39844 -3346.6146 -2896.5371 -267.69373 11540.6 + 8560 300.77806 130.31852 -3343.9505 -2896.5659 -212.86111 11539.808 + 8570 301.03811 130.33004 -3344.3346 -2896.5632 -216.39366 11539.011 + 8580 303.74041 130.36923 -3345.6411 -2893.8503 -257.65203 11538.209 + 8590 308.53908 130.44132 -3348.0439 -2889.1153 -332.98252 11537.402 + 8600 313.97242 130.52583 -3350.861 -2883.8508 -417.72283 11536.587 + 8610 317.91608 130.60918 -3353.6393 -2880.7632 -485.1295 11535.764 + 8620 318.94261 130.65175 -3355.0582 -2880.6552 -507.0389 11534.931 + 8630 316.96263 130.62808 -3354.2692 -2882.8113 -474.65877 11534.087 + 8640 313.01031 130.54241 -3351.4137 -2885.8346 -402.50149 11533.233 + 8650 309.27854 130.48076 -3349.3586 -2889.3302 -343.10319 11532.371 + 8660 307.82276 130.4515 -3348.3833 -2890.5203 -315.42206 11531.502 + 8670 308.79112 130.46928 -3348.9759 -2889.6725 -322.48383 11530.625 + 8680 310.39282 130.47181 -3349.0602 -2887.3744 -320.37632 11529.742 + 8690 310.51329 130.47322 -3349.1073 -2887.2423 -294.76763 11528.852 + 8700 308.40661 130.45109 -3348.3697 -2889.6382 -234.46379 11527.955 + 8710 305.34501 130.38279 -3346.093 -2891.9154 -147.52575 11527.053 + 8720 303.75649 130.38645 -3346.2149 -2894.4002 -105.73896 11526.148 + 8730 305.48254 130.39122 -3346.3741 -2891.992 -103.59347 11525.24 + 8740 310.23218 130.47843 -3349.2808 -2887.834 -169.89042 11524.329 + 8750 315.31584 130.57627 -3352.5422 -2883.5338 -250.22326 11523.414 + 8760 317.62432 130.64399 -3354.7998 -2882.3577 -296.937 11522.494 + 8770 316.12584 130.64965 -3354.9884 -2884.7752 -281.13412 11521.567 + 8780 311.60466 130.58471 -3352.8236 -2889.3352 -205.06342 11520.634 + 8790 305.83974 130.50355 -3350.1183 -2895.2049 -103.51352 11519.696 + 8800 299.75365 130.353 -3345.1001 -2899.2392 30.349293 11518.756 + 8810 294.5909 130.29133 -3343.0443 -2904.8627 119.59174 11517.816 + 8820 292.39763 130.27921 -3342.6403 -2907.721 156.76383 11516.877 + 8830 294.31309 130.27958 -3342.6527 -2904.8843 144.14266 11515.94 + 8840 299.30815 130.37132 -3345.7106 -2900.5124 62.774833 11515.006 + 8850 304.94374 130.49928 -3349.976 -2896.3953 -39.87108 11514.072 + 8860 308.98179 130.60825 -3353.6085 -2894.0215 -119.67316 11513.136 + 8870 310.89466 130.63651 -3354.5503 -2892.118 -145.94636 11512.198 + 8880 311.51979 130.64374 -3354.7913 -2891.4292 -146.9069 11511.256 + 8890 311.52356 130.63495 -3354.4982 -2891.1305 -131.34067 11510.31 + 8900 310.85222 130.63686 -3354.562 -2892.1929 -112.13993 11509.361 + 8910 309.45345 130.63643 -3354.5478 -2894.2592 -86.671073 11508.409 + 8920 307.76632 130.63035 -3354.345 -2896.5659 -61.229289 11507.455 + 8930 306.95469 130.61368 -3353.7892 -2897.2174 -44.389424 11506.499 + 8940 307.76662 130.58913 -3352.9709 -2895.1914 -42.374302 11505.541 + 8950 309.75038 130.64256 -3354.7522 -2894.0219 -80.043519 11504.582 + 8960 311.46723 130.70274 -3356.758 -2893.4741 -115.31 11503.62 + 8970 311.77421 130.73161 -3357.7203 -2893.9798 -123.03851 11502.656 + 8980 310.72758 130.7262 -3357.5401 -2895.3564 -102.69928 11501.688 + 8990 309.06656 130.6734 -3355.78 -2896.0669 -57.569398 11500.718 + 9000 307.93603 130.63485 -3354.4949 -2896.4634 -24.119632 11499.746 + 9010 308.95031 130.64193 -3354.7311 -2895.191 -27.855513 11498.772 + 9020 312.36284 130.73119 -3357.7062 -2893.0902 -81.512018 11497.797 + 9030 316.09258 130.81497 -3360.4991 -2890.3353 -128.53279 11496.82 + 9040 317.1486 130.81922 -3360.6407 -2888.9062 -112.32697 11495.839 + 9050 314.06577 130.80206 -3360.0688 -2892.9197 -42.754526 11494.856 + 9060 307.68843 130.64981 -3354.9936 -2897.3304 109.97315 11493.871 + 9070 300.83128 130.52372 -3350.7906 -2903.3269 253.81005 11492.887 + 9080 296.16719 130.41261 -3347.0871 -2906.5609 363.63538 11491.908 + 9090 294.36767 130.39249 -3346.4163 -2908.5667 399.0105 11490.934 + 9100 294.88785 130.47649 -3349.2164 -2910.593 357.76086 11489.968 + 9110 297.55158 130.55085 -3351.695 -2909.1096 287.80721 11489.008 + 9120 302.41681 130.63249 -3354.4165 -2904.5944 187.27654 11488.052 + 9130 309.065 130.78054 -3359.3513 -2899.6405 45.535304 11487.1 + 9140 316.30304 130.89293 -3363.0978 -2892.621 -82.363239 11486.147 + 9150 323.12981 131.02922 -3367.6405 -2887.0094 -202.05958 11485.193 + 9160 327.69033 131.10975 -3370.3251 -2882.9106 -261.18639 11484.233 + 9170 327.16158 131.07938 -3369.3127 -2882.6846 -213.10207 11483.269 + 9180 320.24169 130.94414 -3364.8046 -2888.4694 -53.230871 11482.299 + 9190 308.7206 130.71555 -3357.185 -2897.9865 188.94769 11481.328 + 9200 296.56133 130.52588 -3350.8626 -2909.7501 415.59418 11480.36 + 9210 288.90861 130.36453 -3345.4844 -2915.7547 574.98237 11479.399 + 9220 289.45484 130.37013 -3345.6708 -2915.1287 578.71512 11478.448 + 9230 296.6305 130.50834 -3350.2781 -2909.0627 456.85323 11477.508 + 9240 305.44198 130.67462 -3355.8208 -2901.499 300.24673 11476.575 + 9250 312.41829 130.80159 -3360.0531 -2895.3546 175.12559 11475.647 + 9260 316.48615 130.8722 -3362.4068 -2891.6576 105.50438 11474.723 + 9270 317.77572 130.88484 -3362.8281 -2890.1608 96.183546 11473.8 + 9280 316.31209 130.89129 -3363.0431 -2892.5529 128.03009 11472.877 + 9290 311.79107 130.8224 -3360.7468 -2896.9812 226.66872 11471.957 + 9300 304.6863 130.71505 -3357.1682 -2903.9704 367.05129 11471.041 + 9310 297.51311 130.55893 -3351.9643 -2909.4361 523.27623 11470.131 + 9320 293.64692 130.49073 -3349.691 -2912.9135 608.78374 11469.23 + 9330 294.26987 130.50569 -3350.1896 -2912.4855 612.94872 11468.34 + 9340 297.79784 130.55732 -3351.9108 -2908.9591 568.23655 11467.462 + 9350 302.06161 130.67461 -3355.8205 -2906.5267 487.27345 11466.593 + 9360 305.65979 130.75139 -3358.3797 -2903.734 426.52958 11465.733 + 9370 308.13529 130.75508 -3358.5027 -2900.1748 404.16449 11464.881 + 9380 309.79498 130.79821 -3359.9404 -2899.1439 371.59399 11464.036 + 9390 311.83462 130.83297 -3361.0991 -2897.2688 334.00848 11463.197 + 9400 314.87579 130.91114 -3363.7046 -2895.3507 267.77411 11462.364 + 9410 317.77773 130.9827 -3366.09 -2893.4197 202.58548 11461.536 + 9420 319.5121 130.99995 -3366.665 -2891.415 169.30437 11460.711 + 9430 319.78186 131.02436 -3367.4788 -2891.8275 152.44783 11459.888 + 9440 318.0284 131.033 -3367.7667 -2894.7236 168.17497 11459.068 + 9450 314.07956 130.99487 -3366.4958 -2899.3263 230.89701 11458.251 + 9460 308.88675 130.89308 -3363.1027 -2903.6571 337.65267 11457.437 + 9470 303.86886 130.8065 -3360.2168 -2908.2349 442.66844 11456.63 + 9480 300.09247 130.73217 -3357.7389 -2911.374 534.21071 11455.83 + 9490 297.63992 130.67176 -3355.7255 -2913.0086 608.38651 11455.04 + 9500 295.39002 130.61124 -3353.7081 -2914.3378 677.18586 11454.261 + 9510 292.9949 130.5596 -3351.9868 -2916.1791 737.41686 11453.495 + 9520 292.11259 130.51759 -3350.5864 -2916.091 773.3085 11452.742 + 9530 294.4661 130.57388 -3352.4626 -2914.4666 738.24886 11452.003 + 9540 299.38922 130.71571 -3357.1904 -2911.8716 644.91949 11451.278 + 9550 304.61534 130.80826 -3360.2752 -2907.183 567.12796 11450.564 + 9560 308.2118 130.86129 -3362.0431 -2903.6014 519.63194 11449.861 + 9570 309.67974 130.9321 -3364.4033 -2903.7782 485.4507 11449.167 + 9580 309.61632 130.9455 -3364.85 -2904.3192 487.11561 11448.482 + 9590 309.328 130.919 -3363.9668 -2903.8648 505.27085 11447.806 + 9600 310.14825 130.94573 -3364.8576 -2903.5356 492.49166 11447.14 + 9610 311.91025 130.97111 -3365.7037 -2901.7609 472.4479 11446.481 + 9620 312.52224 130.98603 -3366.2011 -2901.348 466.68927 11445.832 + 9630 310.61312 130.9715 -3365.7167 -2903.7032 494.10973 11445.191 + 9640 307.17584 130.94291 -3364.7637 -2907.8629 539.71782 11444.559 + 9650 304.09736 130.88185 -3362.7285 -2910.4067 592.50642 11443.937 + 9660 302.4362 130.79714 -3359.9048 -2910.0539 639.90038 11443.326 + 9670 301.86504 130.76119 -3358.7064 -2909.705 656.77165 11442.727 + 9680 301.77435 130.77397 -3359.1325 -2910.266 650.22315 11442.14 + 9690 301.90553 130.76734 -3358.9114 -2909.8498 648.64441 11441.564 + 9700 302.90439 130.80918 -3360.306 -2909.7587 620.88798 11441.001 + 9710 305.54876 130.85542 -3361.8473 -2907.3667 578.03014 11440.449 + 9720 309.28161 130.91899 -3363.9664 -2903.9334 523.63499 11439.908 + 9730 311.72538 130.97724 -3365.9079 -2902.2401 490.08238 11439.377 + 9740 310.48175 130.94398 -3364.7994 -2902.9814 532.58841 11438.854 + 9750 305.27899 130.86509 -3362.1697 -2908.0903 634.12318 11438.342 + 9760 298.8736 130.7234 -3357.4467 -2912.8949 771.70021 11437.842 + 9770 295.63662 130.65505 -3355.1682 -2915.4311 849.35391 11437.355 + 9780 298.18574 130.69524 -3356.5078 -2912.9792 829.70901 11436.885 + 9790 304.03087 130.84699 -3361.5664 -2909.3435 732.38636 11436.43 + 9800 308.43227 130.90364 -3363.4547 -2904.6851 678.75194 11435.989 + 9810 309.389 130.94449 -3364.8163 -2904.6236 655.73171 11435.56 + 9820 309.20478 130.94735 -3364.9117 -2904.993 650.04196 11435.144 + 9830 311.58621 130.9963 -3366.5432 -2903.0824 597.89887 11434.74 + 9840 316.80613 131.07312 -3369.104 -2897.8789 507.30369 11434.347 + 9850 320.92896 131.18393 -3372.7976 -2895.4401 414.42821 11433.963 + 9860 320.01362 131.12947 -3370.9825 -2894.9865 433.4292 11433.587 + 9870 313.81089 131.05079 -3368.3598 -2901.5899 515.78327 11433.22 + 9880 305.53941 130.90419 -3363.4731 -2909.0064 649.28439 11432.862 + 9890 299.21519 130.7995 -3359.9832 -2914.9233 753.05794 11432.516 + 9900 297.04137 130.74006 -3358.0021 -2916.1755 801.45219 11432.184 + 9910 298.82519 130.76151 -3358.7171 -2914.2373 781.78912 11431.867 + 9920 302.18487 130.79397 -3359.7989 -2910.3219 743.67238 11431.564 + 9930 304.1639 130.83896 -3361.2986 -2908.8779 716.25452 11431.276 + 9940 303.39638 130.86995 -3362.3318 -2911.0527 726.22258 11431.001 + 9950 300.5113 130.78869 -3359.623 -2912.6352 804.30803 11430.741 + 9960 297.71143 130.74853 -3358.2844 -2915.4612 867.5875 11430.495 + 9970 297.2169 130.70973 -3356.991 -2914.9034 908.28216 11430.266 + 9980 299.38293 130.71469 -3357.1564 -2911.847 904.21977 11430.054 + 9990 303.00811 130.7639 -3358.7965 -2908.0949 865.72383 11429.859 + 10000 306.41271 130.8167 -3360.5568 -2904.7911 825.76031 11429.68 +Loop time of 2.50278 on 1 procs for 10000 steps with 500 atoms -Performance: 265.040 ns/day, 0.091 hours/ns, 3067.589 timesteps/s -100.0% CPU use with 1 MPI tasks x no OpenMP threads +Performance: 345.216 ns/day, 0.070 hours/ns, 3995.560 timesteps/s +100.0% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 2.688 | 2.688 | 2.688 | 0.0 | 82.46 -Bond | 0.00086093 | 0.00086093 | 0.00086093 | 0.0 | 0.03 -Neigh | 0.03159 | 0.03159 | 0.03159 | 0.0 | 0.97 -Comm | 0.093323 | 0.093323 | 0.093323 | 0.0 | 2.86 -Output | 0.0014281 | 0.0014281 | 0.0014281 | 0.0 | 0.04 -Modify | 0.35664 | 0.35664 | 0.35664 | 0.0 | 10.94 -Other | | 0.0881 | | | 2.70 +Pair | 2.0969 | 2.0969 | 2.0969 | 0.0 | 83.78 +Bond | 0.0007844 | 0.0007844 | 0.0007844 | 0.0 | 0.03 +Neigh | 0.026091 | 0.026091 | 0.026091 | 0.0 | 1.04 +Comm | 0.076576 | 0.076576 | 0.076576 | 0.0 | 3.06 +Output | 0.0075617 | 0.0075617 | 0.0075617 | 0.0 | 0.30 +Modify | 0.2441 | 0.2441 | 0.2441 | 0.0 | 9.75 +Other | | 0.05081 | | | 2.03 Nlocal: 500 ave 500 max 500 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -171,4 +1076,4 @@ Neighbor list builds = 32 Dangerous builds = 0 #write_data ${rep}/lj-out.data -Total wall time: 0:00:03 +Total wall time: 0:00:02 diff --git a/examples/USER/misc/grem/lj-temper/2/log.lammps.2 b/examples/USER/misc/grem/lj-temper/2/log.lammps.2 index cc4fd71a2403a58db90544d1df4afead9ed3a228..4d3076a406e8416a16ce9e5209fa635954897bf3 100644 --- a/examples/USER/misc/grem/lj-temper/2/log.lammps.2 +++ b/examples/USER/misc/grem/lj-temper/2/log.lammps.2 @@ -16,10 +16,6 @@ read_data 2/lj.data #dump dump all xyz 1000 ${rep}/dump.xyz -thermo 100 -thermo_style custom step temp pe etotal press vol -timestep 1.0 - fix fxnpt all npt temp ${T0} ${T0} 1000.0 iso ${press} ${press} 10000.0 fix fxnpt all npt temp 300 ${T0} 1000.0 iso ${press} ${press} 10000.0 fix fxnpt all npt temp 300 300 1000.0 iso ${press} ${press} 10000.0 @@ -27,135 +23,1044 @@ fix fxnpt all npt temp 300 300 1000.0 iso 0 ${press} 10000.0 fix fxnpt all npt temp 300 300 1000.0 iso 0 0 10000.0 fix fxgREM all grem ${lambda} -.03 -30000 fxnpt fix fxgREM all grem 920 -.03 -30000 fxnpt + +thermo 10 +thermo_style custom step temp f_fxgREM pe etotal press vol thermo_modify press fxgREM_press +timestep 1.0 temper/grem 10000 100 ${lambda} fxgREM fxnpt 10294 98392 #${walker} temper/grem 10000 100 920 fxgREM fxnpt 10294 98392 Neighbor list info ... - 1 neighbor list requests update every 1 steps, delay 10 steps, check yes max neighbors/atom: 2000, page size: 100000 master list distance cutoff = 7 ghost atom cutoff = 7 - binsize = 3.5 -> bins = 7 7 7 -Memory usage per processor = 5.3797 Mbytes -Step Temp PotEng TotEng Press Volume - 0 303.5147 -3379.1918 -2927.7367 939.06157 11374.376 - 100 318.12855 -3389.1972 -2916.0051 732.24211 11375.313 - 200 300.55209 -3378.8051 -2931.7566 879.17024 11377.778 - 300 303.98225 -3382.1398 -2929.9893 688.68264 11381.592 - 400 315.07191 -3384.8092 -2916.1636 538.20119 11386.698 - 500 309.36576 -3382.7455 -2922.5874 478.65718 11393.035 - 600 310.56557 -3382.7619 -2920.8192 337.42905 11400.274 - 700 304.59438 -3382.4412 -2929.3802 343.41809 11408.201 - 800 307.37357 -3384.554 -2927.3591 165.57164 11416.925 - 900 316.86746 -3390.4013 -2919.085 -181.01119 11426.176 - 1000 299.18538 -3381.9504 -2936.9349 -39.701634 11435.48 - 1100 300.12698 -3382.5392 -2936.1231 -186.50374 11444.773 - 1200 296.14975 -3382.3095 -2941.8092 -328.47519 11453.747 - 1300 290.48513 -3379.3388 -2947.2642 -379.01155 11462.196 - 1400 307.12598 -3390.0083 -2933.1817 -784.07577 11469.94 - 1500 292.27399 -3382.948 -2948.2126 -757.50578 11476.37 - 1600 298.4373 -3386.9034 -2943.0005 -916.53485 11481.206 - 1700 284.99283 -3380.9051 -2956.9999 -761.53168 11484.379 - 1800 296.30929 -3387.9389 -2947.2013 -939.04947 11486.001 - 1900 274.95571 -3376.1525 -2967.1767 -557.57811 11485.93 - 2000 286.6494 -3387.2487 -2960.8795 -874.00502 11484.534 - 2100 301.45624 -3391.8264 -2943.4331 -1081.5931 11481.54 - 2200 294.31641 -3388.9801 -2951.2068 -874.50856 11476.941 - 2300 308.19066 -3397.6354 -2939.2252 -1180.813 11470.81 - 2400 294.64485 -3394.5337 -2956.2718 -968.71497 11462.705 - 2500 298.56162 -3398.2204 -2954.1326 -933.7456 11452.815 - 2600 294.06753 -3394.6475 -2957.2443 -699.18137 11441.168 - 2700 293.43299 -3392.0706 -2955.6113 -469.64296 11428.099 - 2800 294.09756 -3396.3977 -2958.9499 -245.0764 11414.04 - 2900 299.63675 -3400.8857 -2955.1988 -183.76834 11399.474 - 3000 289.06398 -3398.5529 -2968.5921 99.751425 11384.569 - 3100 307.44213 -3408.7747 -2951.4778 -8.3031637 11369.696 - 3200 302.98498 -3406.3856 -2955.7184 182.98744 11355.058 - 3300 291.9362 -3402.8973 -2968.6643 497.72457 11340.867 - 3400 295.54663 -3410.1026 -2970.4993 587.20344 11327.555 - 3500 304.27556 -3418.4127 -2965.8259 574.20245 11315.364 - 3600 303.72416 -3418.133 -2966.3663 573.08992 11304.186 - 3700 309.84914 -3426.5382 -2965.6611 418.30717 11294.016 - 3800 317.00037 -3430.4681 -2958.9541 467.30843 11284.802 - 3900 297.49383 -3424.0999 -2981.6004 779.41312 11276.818 - 4000 299.31354 -3421.8877 -2976.6815 903.93135 11270.237 - 4100 294.42695 -3423.8465 -2985.9087 1006.7998 11265.197 - 4200 300.01777 -3428.3819 -2982.1283 922.16275 11261.965 - 4300 292.77221 -3426.1312 -2990.6547 959.47941 11260.451 - 4400 300.87895 -3431.0308 -2983.4961 790.75992 11260.593 - 4500 290.78158 -3422.5048 -2989.9892 1001.9136 11262.315 - 4600 297.70252 -3430.615 -2987.8051 741.34784 11265.627 - 4700 303.5284 -3430.3888 -2978.9133 665.85341 11270.32 - 4800 291.67229 -3426.7342 -2992.8938 674.67006 11276.471 - 4900 297.00112 -3426.7467 -2984.98 532.96983 11283.928 - 5000 301.01123 -3430.4774 -2982.746 304.12386 11292.466 - 5100 302.29794 -3430.5071 -2980.8618 154.274 11301.776 - 5200 298.81333 -3428.9592 -2984.497 71.617996 11311.612 - 5300 287.74054 -3421.9947 -2994.0024 126.94546 11321.814 - 5400 299.8741 -3430.5222 -2984.4822 -283.50833 11332.317 - 5500 285.30311 -3421.1025 -2996.7357 -152.79946 11342.614 - 5600 293.4497 -3423.1142 -2986.63 -384.69324 11352.571 - 5700 292.90437 -3421.8788 -2986.2057 -385.78559 11361.964 - 5800 295.75086 -3421.8274 -2981.9204 -538.6221 11370.715 - 5900 285.54471 -3417.8008 -2993.0746 -584.6133 11378.554 - 6000 279.42138 -3415.0373 -2999.4192 -496.44729 11385.309 - 6100 292.98635 -3420.2915 -2984.4965 -807.7782 11390.881 - 6200 285.18497 -3413.9471 -2989.7561 -720.47221 11394.96 - 6300 296.76693 -3419.0502 -2977.6318 -883.05972 11397.626 - 6400 288.53322 -3415.8922 -2986.7209 -743.81572 11398.671 - 6500 301.89212 -3418.3134 -2969.2718 -760.06071 11398.197 - 6600 295.43874 -3414.401 -2974.9583 -595.49387 11396.453 - 6700 307.77134 -3414.4111 -2956.6245 -619.65168 11393.607 - 6800 307.77881 -3415.132 -2957.3343 -598.73379 11389.666 - 6900 299.95571 -3411.6113 -2965.4499 -414.64515 11384.775 - 7000 310.48581 -3418.9557 -2957.1316 -554.8734 11379.061 - 7100 297.78265 -3411.5928 -2968.6637 -204.17877 11372.511 - 7200 308.16321 -3414.8796 -2956.5102 -168.32518 11365.403 - 7300 305.49867 -3410.8638 -2956.4577 -63.088242 11357.993 - 7400 302.44796 -3410.8142 -2960.9458 62.696434 11350.522 - 7500 312.15684 -3419.0214 -2954.7118 27.395999 11343.127 - 7600 307.34561 -3414.5741 -2957.4208 257.94209 11335.847 - 7700 304.79016 -3411.896 -2958.5437 470.94979 11328.999 - 7800 311.79257 -3417.7252 -2953.9574 385.82094 11322.846 - 7900 316.24138 -3419.8749 -2949.4899 332.38484 11317.485 - 8000 315.73781 -3421.0721 -2951.436 300.24782 11312.939 - 8100 303.38025 -3416.1142 -2964.8591 623.97214 11309.099 - 8200 313.53945 -3418.4381 -2952.072 462.62665 11306.275 - 8300 312.90183 -3417.7579 -2952.3402 535.34718 11304.458 - 8400 302.89319 -3412.1607 -2961.63 754.08676 11303.725 - 8500 321.89652 -3422.325 -2943.5284 382.37676 11304.219 - 8600 308.02194 -3419.4464 -2961.2871 471.90335 11305.76 - 8700 310.12893 -3419.2625 -2957.9692 389.78683 11308.17 - 8800 306.85617 -3418.0084 -2961.5831 425.37989 11311.375 - 8900 287.92832 -3407.5424 -2979.2708 659.46236 11315.622 - 9000 278.06342 -3403.7402 -2990.142 757.54946 11321.103 - 9100 290.97422 -3412.7395 -2979.9374 406.79415 11327.735 - 9200 285.14294 -3408.7977 -2984.6692 329.51839 11335.252 - 9300 280.67249 -3406.8005 -2989.3214 234.83036 11343.32 - 9400 289.42707 -3416.0941 -2985.5933 -28.179186 11351.699 - 9500 285.62232 -3407.4208 -2982.5793 10.204022 11360.293 - 9600 285.51121 -3409.1935 -2984.5173 -102.42078 11368.884 - 9700 280.01648 -3404.052 -2987.5487 -91.259523 11377.233 - 9800 289.42587 -3410.2875 -2979.7884 -383.72706 11385.424 - 9900 291.01583 -3408.3435 -2975.4795 -514.96551 11393.129 - 10000 290.66211 -3409.5137 -2977.1759 -667.48493 11399.792 -Loop time of 3.25989 on 1 procs for 10000 steps with 500 atoms + binsize = 3.5, bins = 7 7 7 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.725 | 5.725 | 5.725 Mbytes +Step Temp f_fxgREM PotEng TotEng Press Volume + 0 303.5147 121.37576 -3379.1918 -2927.7367 939.06157 11374.376 + 10 302.01819 121.39208 -3379.7359 -2930.5067 955.34812 11374.385 + 20 299.66358 121.35742 -3378.5808 -2932.854 998.03963 11374.412 + 30 297.90005 121.29795 -3376.5984 -2933.4946 1044.5312 11374.459 + 40 297.80982 121.28845 -3376.2818 -2933.3122 1054.8591 11374.525 + 50 299.23154 121.29836 -3376.6121 -2931.5279 1041.3766 11374.611 + 60 302.03518 121.3687 -3378.9567 -2929.7023 989.46264 11374.717 + 70 306.89734 121.46445 -3382.1482 -2925.6616 909.21372 11374.842 + 80 312.96036 121.61757 -3387.2525 -2921.7477 798.74753 11374.984 + 90 317.17373 121.67419 -3389.1395 -2917.3676 741.54767 11375.141 + 100 318.12855 121.67592 -3389.1972 -2916.0051 732.24211 11375.313 + 110 317.40874 121.63553 -3387.8509 -2915.7295 752.10626 11375.498 + 120 317.26883 121.64121 -3388.0404 -2916.1271 747.01078 11375.698 + 130 318.44405 121.66687 -3388.8957 -2915.2343 716.45621 11375.912 + 140 320.49327 121.72022 -3390.6741 -2913.9647 662.18511 11376.14 + 150 321.57523 121.72069 -3390.6897 -2912.3709 631.6531 11376.38 + 160 319.8381 121.66628 -3388.8758 -2913.1409 645.43574 11376.633 + 170 314.72914 121.61398 -3387.1325 -2918.9968 690.12596 11376.897 + 180 307.88565 121.47522 -3382.5075 -2924.5509 787.49791 11377.175 + 190 302.4306 121.38539 -3379.5129 -2929.6703 860.06931 11377.468 + 200 300.55209 121.36415 -3378.8051 -2931.7566 879.17024 11377.778 + 210 302.1658 121.35469 -3378.4897 -2929.0409 861.25419 11378.104 + 220 306.03052 121.42929 -3380.9764 -2925.7792 784.32025 11378.447 + 230 311.24302 121.51255 -3383.7517 -2920.8013 683.50702 11378.804 + 240 317.19613 121.63306 -3387.7686 -2915.9634 557.05248 11379.175 + 250 321.84027 121.77732 -3392.5772 -2913.8642 433.86307 11379.556 + 260 322.49145 121.77838 -3392.6128 -2912.9312 405.00168 11379.945 + 270 319.15693 121.73888 -3391.296 -2916.5742 434.93244 11380.343 + 280 313.97609 121.65023 -3388.341 -2921.3254 510.85834 11380.749 + 290 308.48891 121.54438 -3384.8125 -2925.9586 605.15694 11381.164 + 300 303.98225 121.46419 -3382.1398 -2929.9893 688.68264 11381.592 + 310 301.16194 121.35538 -3378.5127 -2930.5572 772.56756 11382.032 + 320 299.57734 121.33603 -3377.8676 -2932.269 811.10407 11382.487 + 330 298.20583 121.29686 -3376.5619 -2933.0033 850.91749 11382.958 + 340 296.68231 121.27869 -3375.9562 -2934.6638 875.27503 11383.445 + 350 296.07804 121.2376 -3374.5866 -2934.193 890.91889 11383.949 + 360 297.85202 121.28674 -3376.2248 -2933.1925 843.92836 11384.469 + 370 302.07191 121.34854 -3378.2848 -2928.9758 765.82512 11385.006 + 380 307.49834 121.42425 -3380.8082 -2923.4278 672.28582 11385.557 + 390 312.23703 121.47974 -3382.6579 -2918.229 595.95834 11386.122 + 400 315.07191 121.54428 -3384.8092 -2916.1636 538.20119 11386.698 + 410 315.94513 121.55364 -3385.1213 -2915.1768 524.5897 11387.284 + 420 315.30599 121.57916 -3385.9721 -2916.9783 521.24858 11387.881 + 430 313.40144 121.54636 -3384.8785 -2918.7176 551.21708 11388.488 + 440 310.63766 121.51 -3383.6667 -2921.6167 585.62129 11389.105 + 450 308.5878 121.47864 -3382.6214 -2923.6204 604.44349 11389.733 + 460 308.95854 121.50288 -3383.4294 -2923.877 573.35315 11390.374 + 470 311.48972 121.52682 -3384.2274 -2920.91 518.66227 11391.025 + 480 313.53991 121.55393 -3385.131 -2918.7641 464.40873 11391.686 + 490 312.7438 121.56121 -3385.3737 -2920.191 442.86257 11392.356 + 500 309.36576 121.48237 -3382.7455 -2922.5874 478.65718 11393.035 + 510 306.02972 121.44378 -3381.4592 -2926.2632 503.73832 11393.723 + 520 304.91443 121.42569 -3380.8562 -2927.3191 503.69667 11394.421 + 530 306.90624 121.44801 -3381.6004 -2925.1006 461.88876 11395.128 + 540 310.95566 121.50359 -3383.453 -2920.93 389.09174 11395.845 + 550 314.97025 121.58624 -3386.2079 -2917.7136 305.58596 11396.569 + 560 317.27856 121.58689 -3386.2296 -2914.3018 271.64243 11397.299 + 570 316.65742 121.58979 -3386.3265 -2915.3226 265.5502 11398.034 + 580 313.57724 121.55613 -3385.2042 -2918.7818 296.4929 11398.775 + 590 310.76616 121.50776 -3383.592 -2921.3508 332.13482 11399.521 + 600 310.56557 121.48286 -3382.7619 -2920.8192 337.42905 11400.274 + 610 312.25189 121.52378 -3384.1259 -2919.6749 301.12723 11401.034 + 620 313.24348 121.52065 -3384.0218 -2918.0959 287.28351 11401.799 + 630 311.62555 121.50992 -3383.6639 -2920.1445 300.88185 11402.57 + 640 306.89182 121.41129 -3380.3762 -2923.8979 376.45557 11403.347 + 650 300.52127 121.30298 -3376.7659 -2929.7633 469.88722 11404.132 + 660 295.39559 121.24088 -3374.6961 -2935.3176 534.41022 11404.925 + 670 293.59119 121.20717 -3373.5724 -2936.8778 557.79376 11405.729 + 680 295.36949 121.2459 -3374.8633 -2935.5236 520.47729 11406.543 + 690 299.69654 121.34257 -3378.0857 -2932.3098 439.24953 11407.368 + 700 304.59438 121.47324 -3382.4412 -2929.3802 343.41809 11408.201 + 710 307.96318 121.55668 -3385.2225 -2927.1506 284.402 11409.041 + 720 308.35357 121.56263 -3385.4211 -2926.7686 286.21174 11409.886 + 730 305.33918 121.48732 -3382.9107 -2928.7418 350.49893 11410.738 + 740 300.03501 121.40623 -3380.2078 -2933.9285 432.74134 11411.596 + 750 295.12581 121.33235 -3377.7448 -2938.7676 498.71123 11412.463 + 760 293.47318 121.31724 -3377.2413 -2940.7222 500.40178 11413.339 + 770 296.15733 121.36068 -3378.6892 -2938.1776 430.22536 11414.225 + 780 301.50769 121.4365 -3381.2166 -2932.7467 318.64474 11415.119 + 790 306.13175 121.49937 -3383.3122 -2927.9645 219.00268 11416.02 + 800 307.37357 121.53662 -3384.554 -2927.3591 165.57164 11416.925 + 810 304.85262 121.4546 -3381.8199 -2928.3748 199.62236 11417.834 + 820 300.60164 121.32558 -3377.5193 -2930.3971 273.18602 11418.746 + 830 297.77967 121.28163 -3376.0544 -2933.1297 303.65329 11419.664 + 840 298.49364 121.32122 -3377.374 -2933.3873 268.86417 11420.588 + 850 302.57668 121.41376 -3380.4586 -2930.3987 179.21637 11421.517 + 860 308.14872 121.51752 -3383.9175 -2925.5696 66.802979 11422.449 + 870 313.06449 121.57803 -3385.9345 -2920.2748 -27.644002 11423.383 + 880 316.30535 121.70702 -3390.2339 -2919.7537 -134.25313 11424.317 + 890 317.74593 121.72456 -3390.8186 -2918.1956 -177.66609 11425.248 + 900 316.86746 121.71204 -3390.4013 -2919.085 -181.01119 11426.176 + 910 312.83916 121.63073 -3387.691 -2922.3665 -122.44293 11427.101 + 920 306.04955 121.52724 -3384.2412 -2929.0157 -26.251061 11428.024 + 930 298.70537 121.40199 -3380.0665 -2935.7648 85.194278 11428.946 + 940 293.16469 121.24629 -3374.8762 -2938.8159 191.47072 11429.87 + 950 290.52469 121.25346 -3375.1154 -2942.9819 203.84747 11430.798 + 960 291.09669 121.26724 -3375.5747 -2942.5904 178.59821 11431.73 + 970 293.99694 121.32293 -3377.4311 -2940.1329 112.43402 11432.665 + 980 297.31974 121.40213 -3380.071 -2937.8304 32.375746 11433.603 + 990 299.27867 121.46862 -3382.2872 -2937.1329 -28.673784 11434.542 + 1000 299.18538 121.45851 -3381.9504 -2936.9349 -39.701634 11435.48 + 1010 298.23484 121.43293 -3381.0978 -2937.4961 -33.404095 11436.418 + 1020 298.25068 121.43673 -3381.2242 -2937.5989 -43.442228 11437.355 + 1030 300.04566 121.44897 -3381.6325 -2935.3373 -71.253925 11438.291 + 1040 303.03618 121.49472 -3383.1573 -2932.4139 -123.1949 11439.227 + 1050 305.62588 121.56886 -3385.6285 -2931.0332 -184.60439 11440.16 + 1060 306.07572 121.55697 -3385.2324 -2929.968 -198.95981 11441.09 + 1070 304.58306 121.53369 -3384.4562 -2931.412 -196.21389 11442.016 + 1080 303.06947 121.52182 -3384.0606 -2933.2678 -198.57704 11442.939 + 1090 302.02905 121.49888 -3383.296 -2934.0506 -197.95326 11443.858 + 1100 300.12698 121.47618 -3382.5392 -2936.1231 -186.50374 11444.773 + 1110 297.09858 121.42122 -3380.7073 -2938.7957 -150.78343 11445.685 + 1120 294.34115 121.37517 -3379.1724 -2941.3623 -122.02811 11446.594 + 1130 293.53424 121.35643 -3378.5476 -2941.9377 -123.07716 11447.501 + 1140 294.65948 121.37256 -3379.0854 -2940.8018 -157.77335 11448.406 + 1150 296.41713 121.39221 -3379.7404 -2938.8424 -203.59962 11449.308 + 1160 297.79086 121.44045 -3381.3482 -2938.4069 -259.83749 11450.206 + 1170 298.3438 121.43053 -3381.0176 -2937.2538 -285.02956 11451.1 + 1180 297.93842 121.42704 -3380.9013 -2937.7405 -299.60577 11451.988 + 1190 297.07582 121.44281 -3381.4271 -2939.5494 -313.37642 11452.871 + 1200 296.14975 121.46928 -3382.3095 -2941.8092 -328.47519 11453.747 + 1210 294.6538 121.41447 -3380.4823 -2942.2071 -307.74563 11454.618 + 1220 292.18812 121.38165 -3379.3884 -2944.7807 -288.80549 11455.483 + 1230 289.29587 121.34108 -3378.0359 -2947.7302 -265.02817 11456.343 + 1240 287.36784 121.30996 -3376.9988 -2949.5609 -253.90577 11457.197 + 1250 287.9862 121.3445 -3378.15 -2949.7924 -288.61773 11458.047 + 1260 290.67163 121.38871 -3379.6236 -2947.2715 -343.01375 11458.891 + 1270 293.05086 121.45117 -3381.7057 -2945.8148 -398.8239 11459.729 + 1280 293.3269 121.47951 -3382.6504 -2946.3489 -421.98111 11460.559 + 1290 291.94616 121.43846 -3381.2819 -2947.0341 -404.2175 11461.381 + 1300 290.48513 121.38016 -3379.3388 -2947.2642 -379.01155 11462.196 + 1310 289.5049 121.38184 -3379.3947 -2948.7781 -380.76243 11463.004 + 1320 288.29266 121.33196 -3377.7319 -2948.9185 -361.06431 11463.804 + 1330 286.50603 121.30718 -3376.906 -2950.75 -346.08019 11464.598 + 1340 284.97315 121.283 -3376.1 -2952.2241 -334.73457 11465.385 + 1350 285.0942 121.29648 -3376.5494 -2952.4934 -353.9694 11466.166 + 1360 287.69797 121.3114 -3377.0467 -2949.1178 -398.93385 11466.94 + 1370 292.61732 121.4076 -3380.2532 -2945.0072 -497.14612 11467.706 + 1380 298.30665 121.52966 -3384.3222 -2940.6136 -612.2068 11468.463 + 1390 303.26834 121.61106 -3387.0353 -2935.9466 -704.13502 11469.208 + 1400 307.12598 121.70025 -3390.0083 -2933.1817 -784.07577 11469.94 + 1410 310.72443 111.7597 -3391.99 -2929.811 -903.86882 11470.656 + 1420 313.08093 111.8142 -3393.8066 -2928.1225 -949.86849 11471.355 + 1430 311.92564 111.78664 -3392.8879 -2928.9221 -935.79524 11472.035 + 1440 306.83176 111.65817 -3388.6057 -2932.2167 -853.53977 11472.698 + 1450 299.93438 111.55501 -3385.1671 -2939.0374 -768.71514 11473.345 + 1460 294.08607 111.47326 -3382.4419 -2945.0112 -703.16346 11473.976 + 1470 291.16992 111.42836 -3380.9452 -2947.852 -676.4699 11474.595 + 1480 290.9146 111.41574 -3380.5247 -2947.8112 -685.08466 11475.2 + 1490 291.6478 111.44708 -3381.5692 -2947.7652 -719.96734 11475.792 + 1500 292.27399 111.48844 -3382.948 -2948.2126 -757.50578 11476.37 + 1510 292.86392 111.49368 -3383.1228 -2947.5099 -780.5594 11476.933 + 1520 294.51586 111.45732 -3381.9108 -2943.8408 -796.1939 11477.482 + 1530 298.66244 111.51768 -3383.9226 -2939.6848 -867.25871 11478.016 + 1540 305.12499 111.6403 -3388.0099 -2934.1596 -977.38849 11478.532 + 1550 311.42743 111.76444 -3392.148 -2928.9233 -1082.1854 11479.03 + 1560 314.77279 111.82781 -3394.2604 -2926.0598 -1134.5441 11479.508 + 1570 313.56967 111.85893 -3395.2978 -2928.8866 -1134.3408 11479.963 + 1580 308.49422 111.77435 -3392.4784 -2933.6166 -1058.3039 11480.397 + 1590 302.44777 111.67376 -3389.1252 -2939.2571 -970.9529 11480.811 + 1600 298.4373 111.6071 -3386.9034 -2943.0005 -916.53485 11481.206 + 1610 295.87369 121.55327 -3385.1092 -2945.0195 -817.89925 11481.583 + 1620 291.72057 121.47052 -3382.3506 -2948.4384 -752.40186 11481.945 + 1630 286.42562 121.4184 -3380.6132 -2954.5768 -688.64573 11482.293 + 1640 282.3291 121.37996 -3379.3318 -2959.3887 -639.06203 11482.627 + 1650 280.69222 121.33456 -3377.8185 -2960.3101 -606.99734 11482.949 + 1660 280.73343 121.33917 -3377.9722 -2960.4025 -610.51545 11483.26 + 1670 281.20601 121.37523 -3379.1744 -2960.9017 -636.26727 11483.558 + 1680 281.60706 121.38227 -3379.4092 -2960.54 -658.93964 11483.845 + 1690 282.43713 121.3773 -3379.2432 -2959.1394 -689.31476 11484.119 + 1700 284.99283 121.42715 -3380.9051 -2956.9999 -761.53168 11484.379 + 1710 289.89742 121.53526 -3384.5086 -2953.3082 -877.31243 11484.625 + 1720 296.08582 121.6352 -3387.84 -2947.4348 -995.24633 11484.854 + 1730 301.07885 121.72156 -3390.7188 -2942.8868 -1085.6318 11485.064 + 1740 302.15032 121.73698 -3391.2328 -2941.8071 -1099.7073 11485.253 + 1750 298.07233 121.65919 -3388.6396 -2945.2796 -1020.8589 11485.421 + 1760 290.93001 121.53778 -3384.5926 -2951.8563 -892.10072 11485.569 + 1770 285.26086 121.43907 -3381.3025 -2956.9986 -786.22861 11485.7 + 1780 284.88806 121.44393 -3381.4644 -2957.715 -771.2769 11485.816 + 1790 289.64063 121.48617 -3382.8723 -2952.0538 -822.87904 11485.916 + 1800 296.30929 121.63817 -3387.9389 -2947.2013 -939.04947 11486.001 + 1810 301.13771 121.7255 -3390.85 -2942.9304 -1015.958 11486.068 + 1820 301.47473 121.71962 -3390.654 -2942.2332 -1015.8598 11486.115 + 1830 297.01319 121.63483 -3387.8276 -2946.043 -940.39404 11486.143 + 1840 289.90986 121.50232 -3383.4108 -2952.1919 -821.5028 11486.153 + 1850 283.25206 121.42911 -3380.9702 -2959.6543 -726.30851 11486.147 + 1860 278.66258 121.30252 -3376.7507 -2962.2613 -628.04206 11486.126 + 1870 276.10297 121.2717 -3375.7234 -2965.0412 -584.08784 11486.094 + 1880 274.89473 121.24934 -3374.9781 -2966.093 -555.60277 11486.05 + 1890 274.54298 121.29759 -3376.5863 -2968.2244 -563.00854 11485.995 + 1900 274.95571 121.28457 -3376.1525 -2967.1767 -557.57811 11485.93 + 1910 276.40533 121.29814 -3376.6048 -2965.4728 -577.78348 11485.854 + 1920 279.06323 121.37291 -3379.097 -2964.0116 -639.4629 11485.767 + 1930 282.47767 121.45348 -3381.7827 -2961.6186 -715.80565 11485.667 + 1940 285.48772 121.49126 -3383.042 -2958.4007 -775.98055 11485.553 + 1950 287.34844 121.52401 -3384.1337 -2956.7247 -823.07479 11485.425 + 1960 288.21792 121.54289 -3384.763 -2956.0607 -852.59982 11485.281 + 1970 288.98139 121.59131 -3386.3769 -2956.539 -886.92028 11485.12 + 1980 289.73397 121.61585 -3387.1949 -2956.2376 -905.93976 11484.942 + 1990 289.27373 121.6461 -3388.2033 -2957.9305 -911.54108 11484.747 + 2000 286.6494 121.61746 -3387.2487 -2960.8795 -874.00502 11484.534 + 2010 282.84677 121.53185 -3384.3952 -2963.682 -804.18466 11484.304 + 2020 279.86052 121.45311 -3381.7704 -2965.4991 -745.93595 11484.059 + 2030 279.04789 121.44124 -3381.3748 -2966.3122 -735.05376 11483.799 + 2040 280.2917 121.45065 -3381.6883 -2964.7756 -755.12459 11483.525 + 2050 282.94775 121.46183 -3382.0611 -2961.1977 -793.11303 11483.236 + 2060 286.43496 121.49593 -3383.1977 -2957.1474 -849.1452 11482.932 + 2070 290.31058 121.53707 -3384.569 -2952.754 -910.65909 11482.612 + 2080 294.64195 121.59833 -3386.6111 -2948.3535 -979.17141 11482.274 + 2090 299.01064 121.69984 -3389.9948 -2945.2392 -1053.971 11481.917 + 2100 301.45624 121.75479 -3391.8264 -2943.4331 -1081.5931 11481.54 + 2110 299.33318 121.6876 -3389.5868 -2944.3514 -1011.2385 11481.143 + 2120 291.86971 121.55923 -3385.3075 -2951.1734 -864.31793 11480.725 + 2130 281.47448 121.38978 -3379.6592 -2960.9873 -675.87884 11480.292 + 2140 272.37144 121.27133 -3375.7109 -2970.5791 -524.70277 11479.845 + 2150 268.35365 121.21294 -3373.7648 -2974.6091 -451.81594 11479.387 + 2160 270.64645 121.26049 -3375.3496 -2972.7835 -486.39362 11478.921 + 2170 277.31677 121.36729 -3378.9095 -2966.4219 -591.385 11478.445 + 2180 285.30388 121.502 -3383.3999 -2959.032 -722.74702 11477.958 + 2190 291.79802 121.60187 -3386.7289 -2952.7014 -825.424 11477.457 + 2200 294.31641 121.6694 -3388.9801 -2951.2068 -874.50856 11476.941 + 2210 292.77892 111.65778 -3388.5928 -2953.1063 -913.32777 11476.407 + 2220 289.58413 111.60686 -3386.8954 -2956.161 -866.51756 11475.855 + 2230 287.17962 111.57295 -3385.7649 -2958.607 -834.6994 11475.287 + 2240 287.43428 111.56342 -3385.4472 -2957.9105 -835.77909 11474.702 + 2250 290.43451 111.64681 -3388.227 -2956.2277 -896.43797 11474.102 + 2260 295.02086 111.75269 -3391.7563 -2952.9352 -980.6381 11473.484 + 2270 299.94988 111.80908 -3393.6358 -2947.4831 -1049.9623 11472.847 + 2280 304.16444 111.89709 -3396.5695 -2944.148 -1125.4267 11472.19 + 2290 306.92223 111.92938 -3397.6461 -2941.1226 -1167.0169 11471.511 + 2300 308.19066 111.92906 -3397.6354 -2939.2252 -1180.813 11470.81 + 2310 307.9191 111.94658 -3398.2194 -2940.2131 -1183.0998 11470.086 + 2320 305.32453 111.91365 -3397.1217 -2942.9746 -1140.9068 11469.339 + 2330 299.66922 111.84951 -3394.9838 -2949.2485 -1058.1081 11468.57 + 2340 291.79682 111.71689 -3390.5631 -2956.5375 -930.71925 11467.781 + 2350 284.43341 111.59284 -3386.4278 -2963.3547 -812.77226 11466.974 + 2360 280.82979 111.55085 -3385.0285 -2967.3154 -759.9031 11466.151 + 2370 282.30679 111.63265 -3387.755 -2967.845 -798.6093 11465.314 + 2380 287.03267 111.68954 -3389.6514 -2962.712 -856.47314 11464.461 + 2390 291.89673 111.73694 -3391.2312 -2957.0569 -911.66863 11463.592 + 2400 294.64485 111.83601 -3394.5337 -2956.2718 -968.71497 11462.705 + 2410 295.42926 111.86696 -3395.5654 -2956.1368 -982.65805 11461.799 + 2420 295.98859 111.86265 -3395.4218 -2955.1612 -979.87039 11460.875 + 2430 297.10217 111.88733 -3396.2442 -2954.3273 -990.2616 11459.932 + 2440 297.6178 111.89027 -3396.3423 -2953.6584 -986.07962 11458.97 + 2450 296.80823 111.89873 -3396.6242 -2955.1445 -971.31857 11457.988 + 2460 295.36269 111.85715 -3395.2382 -2955.9086 -927.73803 11456.989 + 2470 293.88534 111.85049 -3395.0163 -2957.8842 -895.48012 11455.971 + 2480 293.29695 111.8665 -3395.5502 -2959.2932 -881.6792 11454.936 + 2490 294.91662 111.86008 -3395.3361 -2956.67 -880.22849 11453.884 + 2500 298.56162 111.94661 -3398.2204 -2954.1326 -933.7456 11452.815 + 2510 302.46148 111.99409 -3399.803 -2949.9145 -975.50709 11451.728 + 2520 305.07752 112.02817 -3400.939 -2947.1593 -999.67287 11450.623 + 2530 306.10114 112.06735 -3402.2452 -2946.9429 -1006.4053 11449.498 + 2540 304.7862 112.03594 -3401.198 -2947.8516 -960.71627 11448.354 + 2550 300.29287 111.97927 -3399.3089 -2952.646 -877.11928 11447.192 + 2560 293.70338 111.83638 -3394.5461 -2957.6845 -744.87487 11446.013 + 2570 287.95381 111.73123 -3391.041 -2962.7316 -637.4158 11444.82 + 2580 285.77483 111.71296 -3390.432 -2965.3636 -597.36102 11443.614 + 2590 288.15131 111.74672 -3391.5574 -2962.9541 -620.6375 11442.397 + 2600 294.06753 111.83943 -3394.6475 -2957.2443 -699.18137 11441.168 + 2610 300.57217 111.95397 -3398.4658 -2951.3874 -789.85436 11439.925 + 2620 304.17259 112.00843 -3400.281 -2947.8474 -830.91952 11438.668 + 2630 303.97571 112.00362 -3400.1207 -2947.9799 -813.39826 11437.395 + 2640 301.9695 111.98053 -3399.3509 -2950.1942 -767.20275 11436.106 + 2650 300.36263 111.92461 -3397.4869 -2950.7202 -706.37935 11434.803 + 2660 299.63894 111.91099 -3397.033 -2951.3428 -667.88368 11433.486 + 2670 299.01226 111.90304 -3396.7681 -2952.01 -634.22798 11432.156 + 2680 297.68606 111.88733 -3396.2445 -2953.459 -593.96429 11430.815 + 2690 295.45836 111.82898 -3394.2993 -2954.8274 -532.41527 11429.462 + 2700 293.43299 111.76212 -3392.0706 -2955.6113 -469.64296 11428.099 + 2710 293.42006 111.74452 -3391.4841 -2955.044 -438.50365 11426.727 + 2720 295.8166 111.76706 -3392.2353 -2952.2306 -435.93616 11425.347 + 2730 298.77381 111.85815 -3395.2716 -2950.8681 -457.56066 11423.958 + 2740 300.02022 111.89712 -3396.5706 -2950.3132 -441.84628 11422.561 + 2750 298.80022 111.88713 -3396.2378 -2951.7951 -387.48096 11421.156 + 2760 295.95928 111.85406 -3395.1354 -2954.9184 -314.42527 11419.743 + 2770 293.14463 111.84911 -3394.9704 -2958.94 -259.67878 11418.324 + 2780 291.86902 111.82634 -3394.2112 -2960.0782 -220.19157 11416.9 + 2790 292.44361 111.84619 -3394.8731 -2959.8854 -220.47563 11415.472 + 2800 294.09756 111.89193 -3396.3977 -2958.9499 -245.0764 11414.04 + 2810 296.23007 111.88252 -3396.0841 -2955.4644 -252.2928 11412.604 + 2820 298.34896 111.91633 -3397.2111 -2953.4396 -271.90436 11411.162 + 2830 300.05456 111.93158 -3397.7192 -2951.4108 -275.3478 11409.716 + 2840 300.89125 111.94416 -3398.1387 -2950.5858 -264.31154 11408.264 + 2850 300.24842 111.92823 -3397.6075 -2951.0108 -227.16929 11406.808 + 2860 298.71648 111.91724 -3397.2414 -2952.9233 -185.26385 11405.347 + 2870 297.79873 111.92046 -3397.3485 -2954.3955 -156.48484 11403.883 + 2880 298.06838 111.95921 -3398.6404 -2955.2863 -157.05585 11402.417 + 2890 298.98148 112.00093 -3400.0311 -2955.3188 -171.4369 11400.947 + 2900 299.63675 112.02657 -3400.8857 -2955.1988 -183.76834 11399.474 + 2910 299.56489 112.06378 -3402.1262 -2956.5461 -198.1572 11397.998 + 2920 298.86408 112.05112 -3401.7039 -2957.1662 -190.39123 11396.519 + 2930 298.61141 112.03417 -3401.1389 -2956.9771 -184.9868 11395.036 + 2940 300.17383 112.06718 -3402.2393 -2955.7535 -208.70805 11393.549 + 2950 302.44718 112.08068 -3402.6893 -2952.8221 -221.34301 11392.059 + 2960 302.33021 112.06618 -3402.206 -2952.5128 -194.16368 11390.565 + 2970 298.81078 112.03445 -3401.1483 -2956.6899 -123.05823 11389.067 + 2980 293.5466 111.98544 -3399.5146 -2962.8863 -24.583667 11387.568 + 2990 289.40177 111.94719 -3398.2398 -2967.7766 63.39208 11386.068 + 3000 289.06398 111.95659 -3398.5529 -2968.5921 99.751425 11384.569 + 3010 292.4145 111.99192 -3399.7305 -2964.7861 89.927899 11383.073 + 3020 296.94675 112.04714 -3401.5713 -2959.8856 57.106368 11381.578 + 3030 300.22478 112.11319 -3403.7729 -2957.2112 28.55446 11380.084 + 3040 300.3298 112.09482 -3403.1605 -2956.4427 58.426621 11378.592 + 3050 296.89176 112.05288 -3401.7627 -2960.1587 125.48024 11377.101 + 3060 292.49321 112.00674 -3400.2248 -2965.1633 198.0849 11375.612 + 3070 291.20947 112.02836 -3400.9453 -2967.7933 212.72692 11374.128 + 3080 294.86673 112.02584 -3400.8614 -2962.2694 188.89767 11372.648 + 3090 301.58541 112.17329 -3405.7762 -2957.1907 76.490984 11371.171 + 3100 307.44213 112.26324 -3408.7747 -2951.4778 -8.3031637 11369.696 + 3110 309.56922 112.29514 -3409.8381 -2949.3774 -35.875096 11368.221 + 3120 307.9068 112.23234 -3407.7447 -2949.7567 12.779958 11366.747 + 3130 304.09288 112.12184 -3404.0614 -2951.7463 106.71404 11365.272 + 3140 299.68127 112.0803 -3402.6768 -2956.9236 183.14993 11363.8 + 3150 296.4291 112.0463 -3401.5433 -2960.6274 245.17217 11362.331 + 3160 295.94225 112.05058 -3401.686 -2961.4943 263.79109 11360.868 + 3170 298.50098 112.08609 -3402.8697 -2958.8721 239.91155 11359.409 + 3180 302.31288 112.16235 -3405.4116 -2955.7441 185.99363 11357.955 + 3190 304.58952 112.20928 -3406.976 -2953.9221 155.63081 11356.505 + 3200 302.98498 112.19157 -3406.3856 -2955.7184 182.98744 11355.058 + 3210 297.71373 112.15219 -3405.0731 -2962.2465 248.21637 11353.615 + 3220 292.21164 112.07734 -3402.5779 -2967.9352 326.71164 11352.177 + 3230 290.8998 112.07242 -3402.414 -2969.7226 343.79771 11350.746 + 3240 294.85128 112.13571 -3404.5236 -2965.9547 291.5268 11349.32 + 3250 301.25922 112.23631 -3407.877 -2959.7767 208.07519 11347.901 + 3260 305.99556 112.30939 -3410.313 -2955.1678 154.75535 11346.486 + 3270 306.88906 112.30266 -3410.0888 -2953.6145 173.56362 11345.074 + 3280 303.85961 112.26994 -3408.9979 -2957.0297 245.30291 11343.666 + 3290 298.17132 112.1771 -3405.9035 -2962.3962 369.42069 11342.263 + 3300 291.9362 112.08692 -3402.8973 -2968.6643 497.72457 11340.867 + 3310 287.35134 111.98077 -3399.3592 -2971.9458 614.2263 11339.481 + 3320 285.68778 111.95489 -3398.4963 -2973.5574 668.92899 11338.106 + 3330 286.76308 112.00229 -3400.0764 -2973.5381 666.18451 11336.744 + 3340 289.10994 112.05133 -3401.7109 -2971.6818 646.88903 11335.395 + 3350 291.22003 112.14385 -3404.7951 -2971.6273 611.43676 11334.058 + 3360 291.80795 112.14805 -3404.9351 -2970.8929 622.88562 11332.733 + 3370 290.77704 112.16865 -3405.6216 -2973.1128 640.45864 11331.42 + 3380 290.07961 112.19094 -3406.3647 -2974.8932 653.16419 11330.12 + 3390 291.67866 112.24374 -3408.1246 -2974.2747 632.69361 11328.831 + 3400 295.54663 112.30308 -3410.1026 -2970.4993 587.20344 11327.555 + 3410 300.14192 112.35102 -3411.7008 -2965.2624 538.00451 11326.29 + 3420 303.76176 112.40269 -3413.423 -2961.6004 496.22069 11325.036 + 3430 305.65976 112.46521 -3415.5071 -2960.8614 466.62741 11323.791 + 3440 306.03581 112.46292 -3415.4305 -2960.2254 478.77754 11322.556 + 3450 304.74635 112.47631 -3415.8771 -2962.59 503.62978 11321.329 + 3460 301.72265 112.38963 -3412.9876 -2964.198 583.43137 11320.113 + 3470 298.85604 112.34496 -3411.4986 -2966.9729 645.82873 11318.907 + 3480 298.63434 112.41159 -3413.7195 -2969.5236 642.12287 11317.714 + 3490 301.30777 112.48336 -3416.112 -2967.9395 611.21212 11316.533 + 3500 304.27556 112.55238 -3418.4127 -2965.8259 574.20245 11315.364 + 3510 306.78103 102.61357 -3420.4522 -2964.1387 475.95942 11314.205 + 3520 309.09732 102.65604 -3421.8681 -2962.1092 437.80037 11313.055 + 3530 310.32165 102.65431 -3421.8104 -2960.2305 421.21617 11311.914 + 3540 309.63685 102.67485 -3422.4951 -2961.9338 413.30554 11310.781 + 3550 306.93228 102.65657 -3421.8857 -2965.3472 443.55042 11309.656 + 3560 302.75063 102.54472 -3418.1572 -2967.8386 528.16818 11308.54 + 3570 298.47133 102.4628 -3415.4265 -2971.473 604.72059 11307.433 + 3580 296.83121 102.425 -3414.1667 -2972.6528 644.48871 11306.339 + 3590 299.40835 102.45142 -3415.0473 -2969.7001 628.07737 11305.256 + 3600 303.72416 102.54399 -3418.133 -2966.3663 573.08992 11304.186 + 3610 306.01634 102.6216 -3420.7201 -2965.5439 538.34849 11303.126 + 3620 305.55262 102.60977 -3420.3255 -2965.8392 556.11517 11302.077 + 3630 304.57163 102.63343 -3421.1144 -2968.0872 559.97157 11301.038 + 3640 305.30717 102.6951 -3423.17 -2969.0487 529.92646 11300.01 + 3650 308.72489 102.74655 -3424.8852 -2965.6803 476.14841 11298.992 + 3660 314.25616 102.81071 -3427.0236 -2959.5914 397.79693 11297.983 + 3670 318.77387 102.86748 -3428.9162 -2954.7642 329.90404 11296.982 + 3680 319.05704 102.93054 -3431.0179 -2956.4447 294.75869 11295.987 + 3690 315.13607 102.84231 -3428.077 -2959.3359 355.34816 11294.998 + 3700 309.84914 102.79615 -3426.5382 -2965.6611 418.30717 11294.016 + 3710 305.40551 102.7076 -3423.5867 -2969.3192 499.3939 11293.042 + 3720 301.34948 102.63663 -3421.2208 -2972.9863 575.88381 11292.077 + 3730 296.59888 102.59824 -3419.9415 -2978.7731 646.59307 11291.124 + 3740 292.28387 102.55445 -3418.4818 -2983.7317 716.58986 11290.182 + 3750 291.13037 102.5587 -3418.6234 -2985.589 742.7205 11289.254 + 3760 295.00271 102.61196 -3420.3988 -2981.6046 708.16035 11288.34 + 3770 302.82353 102.73753 -3424.5843 -2974.1572 610.82651 11287.44 + 3780 311.42994 102.83988 -3427.9959 -2964.7674 514.60375 11286.551 + 3790 317.21506 102.91893 -3430.6312 -2958.7978 450.92589 11285.672 + 3800 317.00037 102.91404 -3430.4681 -2958.9541 467.30843 11284.802 + 3810 310.53506 102.84492 -3428.164 -2966.2666 557.79774 11283.94 + 3820 301.55788 102.76479 -3425.4929 -2976.9484 671.50231 11283.09 + 3830 294.80905 102.67117 -3422.3725 -2983.8663 772.74358 11282.252 + 3840 291.64203 102.62422 -3420.8075 -2987.012 827.52378 11281.428 + 3850 290.50064 102.60013 -3420.0044 -2987.9067 853.66459 11280.62 + 3860 290.64227 102.64274 -3421.4246 -2989.1162 842.07487 11279.828 + 3870 291.69903 102.61638 -3420.546 -2986.6658 849.9664 11279.052 + 3880 293.02163 102.65721 -3421.907 -2986.0595 830.37322 11278.292 + 3890 294.48063 102.68813 -3422.9378 -2984.9202 812.22912 11277.547 + 3900 297.49383 102.723 -3424.0999 -2981.6004 779.41312 11276.818 + 3910 302.78986 102.79675 -3426.5583 -2976.1813 712.09521 11276.103 + 3920 308.98297 102.88823 -3429.6078 -2970.0191 629.21904 11275.402 + 3930 313.55015 102.92337 -3430.7789 -2964.3968 581.9915 11274.713 + 3940 314.33643 102.9164 -3430.5467 -2962.995 585.02367 11274.035 + 3950 311.12468 102.87571 -3429.1902 -2966.4158 637.54746 11273.367 + 3960 306.66509 102.79213 -3426.4042 -2970.2631 721.24494 11272.713 + 3970 304.19975 102.75473 -3425.1575 -2972.6834 775.62193 11272.071 + 3980 303.32362 102.74554 -3424.8514 -2973.6805 807.76484 11271.444 + 3990 301.69065 102.6812 -3422.7067 -2973.9648 863.5589 11270.832 + 4000 299.31354 102.65663 -3421.8877 -2976.6815 903.93135 11270.237 + 4010 298.4143 102.67825 -3422.6085 -2978.7398 910.07224 11269.659 + 4020 300.32501 102.70326 -3423.4418 -2976.7312 891.46626 11269.097 + 4030 303.70295 102.75858 -3425.2862 -2973.551 850.60668 11268.553 + 4040 306.03121 102.82936 -3427.6452 -2972.447 813.15901 11268.024 + 4050 305.85905 102.81853 -3427.2844 -2972.3422 828.55246 11267.511 + 4060 303.10961 102.76813 -3425.6043 -2974.7517 881.30926 11267.013 + 4070 298.87472 102.71861 -3423.9537 -2979.4002 945.43988 11266.532 + 4080 295.0944 102.66409 -3422.1363 -2983.2057 1006.3608 11266.068 + 4090 293.45363 102.67374 -3422.4579 -2985.9678 1023.4866 11265.623 + 4100 294.42695 102.7154 -3423.8465 -2985.9087 1006.7998 11265.197 + 4110 296.68701 102.75294 -3425.098 -2983.7986 981.27604 11264.791 + 4120 297.9877 102.75995 -3425.3315 -2982.0974 972.37347 11264.402 + 4130 297.67801 102.77783 -3425.9277 -2983.1542 967.36535 11264.032 + 4140 297.14688 102.77251 -3425.7503 -2983.7668 972.79396 11263.68 + 4150 296.45653 102.78842 -3426.2807 -2985.3241 974.10256 11263.346 + 4160 294.02518 102.72237 -3424.0791 -2986.7389 1023.2975 11263.03 + 4170 290.1258 102.66926 -3422.3085 -2990.7684 1077.3488 11262.734 + 4180 288.48138 102.64268 -3421.4226 -2992.3285 1101.1941 11262.458 + 4190 292.19589 102.72551 -3424.1836 -2989.5644 1037.7307 11262.202 + 4200 300.01777 102.85146 -3428.3819 -2982.1283 922.16275 11261.965 + 4210 306.9325 102.9835 -3432.7832 -2976.2444 808.36761 11261.746 + 4220 308.67703 102.99298 -3433.0993 -2973.9656 780.19559 11261.542 + 4230 305.52318 102.92733 -3430.9111 -2976.4686 818.56473 11261.353 + 4240 301.95285 102.86319 -3428.773 -2979.641 860.18942 11261.179 + 4250 302.09525 102.90323 -3430.1076 -2980.7638 833.60204 11261.02 + 4260 305.6598 102.98387 -3432.7955 -2978.1497 767.59215 11260.878 + 4270 308.48583 103.02162 -3434.0539 -2975.2046 728.84386 11260.75 + 4280 306.91152 102.99985 -3433.3282 -2976.8206 753.1284 11260.635 + 4290 300.67491 102.90661 -3430.2203 -2982.9891 844.4771 11260.535 + 4300 292.77221 102.78394 -3426.1312 -2990.6547 959.47941 11260.451 + 4310 287.067 102.6988 -3423.2934 -2996.303 1038.2947 11260.384 + 4320 285.77306 102.70469 -3423.4896 -2998.4238 1041.9517 11260.337 + 4330 287.93474 102.74366 -3424.7887 -2996.5076 1000.7499 11260.309 + 4340 290.8339 102.74648 -3424.8828 -2992.2894 964.87344 11260.3 + 4350 292.82345 102.8064 -3426.8801 -2991.3274 913.9036 11260.309 + 4360 294.9143 102.83229 -3427.743 -2989.0803 877.28107 11260.335 + 4370 299.08154 102.87024 -3429.0079 -2984.1468 826.15075 11260.377 + 4380 303.63153 102.97338 -3432.4461 -2980.8172 751.84766 11260.435 + 4390 304.57354 102.98572 -3432.8573 -2979.8273 741.21062 11260.507 + 4400 300.87895 102.93092 -3431.0308 -2983.4961 790.75992 11260.593 + 4410 295.65701 102.82513 -3427.5043 -2987.7369 867.72447 11260.694 + 4420 293.3237 102.77145 -3425.715 -2989.4182 901.03569 11260.811 + 4430 295.01563 102.80305 -3426.7682 -2987.9548 872.30894 11260.944 + 4440 298.4772 102.86957 -3428.9856 -2985.0233 820.68128 11261.094 + 4450 301.13223 102.9356 -3431.1868 -2983.2755 780.27045 11261.259 + 4460 301.89227 102.93713 -3431.2376 -2982.1957 785.3517 11261.439 + 4470 300.28387 102.89839 -3429.9464 -2983.2968 826.43396 11261.634 + 4480 297.05808 102.8191 -3427.3034 -2985.4521 894.62545 11261.844 + 4490 293.469 102.75012 -3425.004 -2988.4911 955.65019 11262.071 + 4500 290.78158 102.67514 -3422.5048 -2989.9892 1001.9136 11262.315 + 4510 290.73617 102.65235 -3421.7448 -2989.2968 998.35908 11262.579 + 4520 294.32778 102.7206 -3424.0201 -2986.2299 926.84673 11262.861 + 4530 299.91273 102.8312 -3427.7068 -2981.6093 823.14509 11263.16 + 4540 304.60313 102.95974 -3431.9914 -2978.9174 721.10776 11263.475 + 4550 307.28106 103.02595 -3434.1984 -2977.1411 662.38081 11263.803 + 4560 308.27577 103.06639 -3435.5465 -2977.0096 629.19874 11264.144 + 4570 307.99922 103.0479 -3434.9301 -2976.8046 630.6078 11264.496 + 4580 306.06231 103.00796 -3433.5987 -2978.3542 654.14529 11264.861 + 4590 302.26958 102.96323 -3432.1076 -2982.5045 694.51753 11265.238 + 4600 297.70252 102.91845 -3430.615 -2987.8051 741.34784 11265.627 + 4610 294.21609 102.84295 -3428.0984 -2990.4743 791.96867 11266.031 + 4620 293.29941 102.83043 -3427.6811 -2991.4205 799.28579 11266.45 + 4630 294.36308 102.85615 -3428.5383 -2990.6956 778.84808 11266.883 + 4640 294.8602 102.8337 -3427.7901 -2989.2079 782.10413 11267.332 + 4650 293.9871 102.8099 -3426.9968 -2989.7133 794.45711 11267.795 + 4660 294.1184 102.79348 -3426.4492 -2988.9704 792.59539 11268.272 + 4670 298.06081 102.84732 -3428.2439 -2984.901 733.63369 11268.765 + 4680 304.46101 102.93099 -3431.0331 -2978.1704 647.1506 11269.271 + 4690 307.80166 102.97961 -3432.6537 -2974.8221 602.56504 11269.79 + 4700 303.5284 102.91166 -3430.3888 -2978.9133 665.85341 11270.32 + 4710 292.93985 102.75505 -3425.1682 -2989.4424 813.38795 11270.863 + 4720 282.80864 102.56624 -3418.8747 -2998.2183 967.0581 11271.421 + 4730 279.54492 102.50596 -3416.8654 -3001.0635 1011.0276 11271.997 + 4740 283.59197 102.60055 -3420.0185 -2998.1969 928.41624 11272.592 + 4750 290.50915 102.72332 -3424.1105 -2992.0002 804.60394 11273.205 + 4760 295.55717 102.82938 -3427.6461 -2988.0273 697.47092 11273.832 + 4770 297.10237 102.83836 -3427.9452 -2986.028 658.0995 11274.473 + 4780 295.64549 102.80534 -3426.8448 -2987.0945 663.94863 11275.127 + 4790 293.06236 102.79851 -3426.6171 -2990.709 673.84809 11275.793 + 4800 291.67229 102.80203 -3426.7342 -2992.8938 674.67006 11276.471 + 4810 292.01249 102.83596 -3427.8653 -2993.5189 651.00747 11277.163 + 4820 292.63447 102.82289 -3427.4297 -2992.1581 640.8607 11277.866 + 4830 292.30669 102.78737 -3426.2456 -2991.4616 642.14476 11278.582 + 4840 291.10859 102.82136 -3427.3785 -2994.3766 619.77429 11279.31 + 4850 289.34267 102.79533 -3426.5111 -2996.1358 624.41149 11280.049 + 4860 287.57191 102.72187 -3424.0623 -2996.3208 650.59537 11280.801 + 4870 287.06384 102.66375 -3422.125 -2995.1393 667.75972 11281.564 + 4880 288.69681 102.68168 -3422.7227 -2993.3081 644.73165 11282.34 + 4890 292.42718 102.73555 -3424.5185 -2989.5552 594.08896 11283.129 + 4900 297.00112 102.8024 -3426.7467 -2984.98 532.96983 11283.928 + 4910 299.55415 102.84221 -3428.0736 -2982.5095 496.62665 11284.738 + 4920 297.87217 102.84879 -3428.2929 -2985.2306 503.37587 11285.557 + 4930 292.6425 102.81295 -3427.0984 -2991.8148 549.50182 11286.386 + 4940 287.7722 102.74708 -3424.9028 -2996.8635 599.60806 11287.225 + 4950 287.31659 102.75275 -3425.0917 -2997.73 586.06984 11288.076 + 4960 291.10982 102.79172 -3426.3908 -2993.387 522.5188 11288.937 + 4970 296.42623 102.90141 -3430.0471 -2989.1356 414.7098 11289.809 + 4980 300.98261 102.9245 -3430.8166 -2983.1278 344.98924 11290.688 + 4990 302.86292 102.95 -3431.6667 -2981.1811 296.67739 11291.574 + 5000 301.01123 102.91432 -3430.4774 -2982.746 304.12386 11292.466 + 5010 296.3546 102.83968 -3427.9892 -2987.1842 351.81281 11293.363 + 5020 291.32993 102.76736 -3425.5787 -2992.2475 406.46754 11294.268 + 5030 287.69938 102.73615 -3424.5384 -2996.6074 437.69657 11295.18 + 5040 286.37909 102.74418 -3424.806 -2998.8388 436.62613 11296.1 + 5050 287.48476 102.72615 -3424.2048 -2996.593 426.36155 11297.029 + 5060 290.3183 102.75806 -3425.2688 -2993.4423 380.56619 11297.966 + 5070 294.11349 102.79285 -3426.4284 -2988.9568 322.74018 11298.91 + 5080 297.77497 102.84136 -3428.0454 -2985.1277 257.61977 11299.861 + 5090 300.40045 102.8819 -3429.3967 -2982.5738 201.03387 11300.816 + 5100 302.29794 102.91521 -3430.5071 -2980.8618 154.274 11301.776 + 5110 303.40011 102.9719 -3432.3968 -2981.1121 109.47884 11302.739 + 5120 302.19662 102.91048 -3430.3492 -2980.8547 133.71393 11303.704 + 5130 297.41517 102.83769 -3427.923 -2985.5405 192.61156 11304.672 + 5140 290.27364 102.72099 -3424.0328 -2992.2728 286.99693 11305.644 + 5150 283.45449 102.59355 -3419.7851 -2998.168 381.158 11306.621 + 5160 279.72588 102.52516 -3417.5055 -3001.4345 424.2805 11307.606 + 5170 280.8935 102.56302 -3418.7674 -3000.9596 383.74801 11308.598 + 5180 286.18395 102.64008 -3421.3359 -2995.659 292.93391 11309.598 + 5190 292.92078 102.75493 -3425.1643 -2989.4669 176.1603 11310.603 + 5200 298.81333 102.86878 -3428.9592 -2984.497 71.617996 11311.612 + 5210 302.19486 102.95214 -3431.7379 -2982.2459 5.4927756 11312.623 + 5220 302.04623 102.96964 -3432.3214 -2983.0505 -1.0094129 11313.634 + 5230 297.93396 102.90274 -3430.0914 -2986.9373 57.871921 11314.645 + 5240 290.98403 102.78856 -3426.2852 -2993.4685 153.62321 11315.657 + 5250 284.39054 102.69177 -3423.0591 -3000.0497 233.59997 11316.672 + 5260 281.67736 102.61888 -3420.6293 -3001.6556 267.31106 11317.692 + 5270 283.36341 102.65257 -3421.7522 -3000.2706 221.01361 11318.717 + 5280 286.49224 102.68058 -3422.6861 -2996.5506 163.86532 11319.746 + 5290 288.19121 102.67876 -3422.6252 -2993.9626 130.73552 11320.778 + 5300 287.74054 102.65984 -3421.9947 -2994.0024 126.94546 11321.814 + 5310 285.29287 102.59467 -3419.8224 -2995.4708 162.02022 11322.851 + 5320 281.8684 102.50672 -3416.8907 -2997.6328 216.92939 11323.892 + 5330 278.91383 102.4811 -3416.0368 -3001.1736 240.8676 11324.937 + 5340 278.10707 102.49214 -3416.4046 -3002.7414 224.80012 11325.987 + 5350 280.36091 102.51033 -3417.0109 -2999.9953 171.30705 11327.041 + 5360 285.2526 102.60024 -3420.008 -2995.7164 60.515301 11328.099 + 5370 292.09667 102.73968 -3424.656 -2990.1844 -86.424317 11329.158 + 5380 299.12795 102.84264 -3428.0879 -2983.1578 -213.43302 11330.215 + 5390 302.39757 102.95118 -3431.7059 -2981.9124 -303.34721 11331.268 + 5400 299.8741 102.91567 -3430.5222 -2984.4822 -283.50833 11332.317 + 5410 293.89376 102.83519 -3427.8397 -2990.695 -212.62637 11333.36 + 5420 288.95625 102.7274 -3424.2466 -2994.4461 -135.25128 11334.399 + 5430 288.06458 102.70911 -3423.637 -2995.1628 -122.97817 11335.436 + 5440 289.82962 102.74321 -3424.7738 -2993.6742 -155.42134 11336.47 + 5450 290.72452 102.76396 -3425.4653 -2993.0346 -183.09002 11337.502 + 5460 289.85167 102.73395 -3424.4652 -2993.3328 -181.08338 11338.531 + 5470 288.28697 102.68943 -3422.9811 -2994.1761 -166.97236 11339.556 + 5480 286.89076 102.69878 -3423.2925 -2996.5643 -171.61527 11340.578 + 5490 285.84108 102.63047 -3421.0158 -2995.8489 -146.60972 11341.597 + 5500 285.30311 102.63307 -3421.1025 -2996.7357 -152.79946 11342.614 + 5510 285.5978 102.69969 -3423.3229 -2998.5178 -191.44492 11343.628 + 5520 285.83015 102.69034 -3423.0113 -2997.8606 -202.92716 11344.638 + 5530 285.16401 102.66322 -3422.1072 -2997.9474 -202.7585 11345.645 + 5540 284.35771 102.64574 -3421.5248 -2998.5642 -206.07273 11346.648 + 5550 284.63863 102.68039 -3422.6798 -2999.3014 -235.77834 11347.647 + 5560 285.89293 102.67851 -3422.617 -2997.3729 -258.1661 11348.643 + 5570 287.55492 102.66703 -3422.2343 -2994.5182 -281.48109 11349.633 + 5580 289.60524 102.68511 -3422.8369 -2992.0711 -320.52223 11350.618 + 5590 291.86661 102.68268 -3422.7559 -2988.6264 -353.51999 11351.598 + 5600 293.4497 102.69343 -3423.1142 -2986.63 -384.69324 11352.571 + 5610 293.34834 102.70217 -3423.4058 -2987.0724 -398.78241 11353.537 + 5620 291.35867 102.6899 -3422.9966 -2989.6226 -384.95457 11354.495 + 5630 287.83065 102.66993 -3422.331 -2994.2048 -350.6656 11355.447 + 5640 283.71021 102.54628 -3418.2095 -2996.2121 -268.99849 11356.392 + 5650 281.01145 102.4799 -3415.9967 -2998.0135 -219.86423 11357.332 + 5660 281.24007 102.46201 -3415.4004 -2997.0771 -214.00936 11358.268 + 5670 284.02025 102.52634 -3417.5447 -2995.0861 -262.19499 11359.2 + 5680 287.7393 102.605 -3420.1665 -2992.1761 -324.34073 11360.128 + 5690 291.07227 102.654 -3421.7998 -2988.8519 -370.67418 11361.049 + 5700 292.90437 102.65636 -3421.8788 -2986.2057 -385.78559 11361.964 + 5710 292.44841 102.63562 -3421.1873 -2986.1925 -374.61721 11362.871 + 5720 290.38941 102.59428 -3419.8093 -2987.877 -346.88008 11363.772 + 5730 288.8309 102.57242 -3419.0806 -2989.4665 -336.55935 11364.666 + 5740 289.30518 102.56581 -3418.8602 -2988.5407 -353.54544 11365.554 + 5750 291.27561 102.61351 -3420.4502 -2987.1998 -407.7506 11366.435 + 5760 293.11126 102.63542 -3421.1806 -2985.1998 -452.90197 11367.308 + 5770 293.76552 102.59585 -3419.8618 -2982.9079 -464.45974 11368.174 + 5780 293.98517 102.62851 -3420.9504 -2983.6698 -496.30844 11369.03 + 5790 294.79738 102.65046 -3421.6821 -2983.1933 -522.44577 11369.877 + 5800 295.75086 102.65482 -3421.8274 -2981.9204 -538.6221 11370.715 + 5810 296.29947 102.68056 -3422.6852 -2981.9622 -558.65718 11371.542 + 5820 296.29036 102.69782 -3423.2607 -2982.5513 -571.30028 11372.359 + 5830 294.69545 102.66712 -3422.2372 -2983.9001 -555.10048 11373.165 + 5840 290.56056 102.59769 -3419.9231 -2987.7363 -509.12766 11373.961 + 5850 285.00457 102.52634 -3417.5447 -2993.622 -456.46619 11374.748 + 5860 280.23316 102.47882 -3415.9606 -2999.1351 -424.07226 11375.525 + 5870 277.90143 102.46456 -3415.4855 -3002.1281 -427.60892 11376.295 + 5880 278.54267 102.45614 -3415.2046 -3000.8935 -457.13825 11377.057 + 5890 281.52596 102.47543 -3415.8477 -2997.0991 -512.44016 11377.81 + 5900 285.54471 102.53402 -3417.8008 -2993.0746 -584.6133 11378.554 + 5910 289.23852 102.61921 -3420.6403 -2990.4199 -655.43944 11379.286 + 5920 291.75741 102.65876 -3421.9587 -2987.9916 -690.86619 11380.006 + 5930 293.30238 102.68341 -3422.7802 -2986.5152 -705.51468 11380.714 + 5940 294.33881 102.70817 -3423.6057 -2985.799 -712.11586 11381.407 + 5950 294.5518 102.68878 -3422.9593 -2984.8358 -695.23232 11382.088 + 5960 293.07947 102.6515 -3421.7165 -2985.783 -661.85952 11382.755 + 5970 289.74344 102.62131 -3420.7103 -2989.7389 -622.87451 11383.41 + 5980 285.69313 102.59201 -3419.7338 -2994.7869 -582.85836 11384.054 + 5990 282.0695 102.49833 -3416.6109 -2997.0539 -524.75642 11384.686 + 6000 279.42138 102.45112 -3415.0373 -2999.4192 -496.44729 11385.309 + 6010 277.82348 102.4435 -3414.7833 -3001.5419 -498.53529 11385.922 + 6020 277.57172 102.4287 -3414.2899 -3001.423 -518.60672 11386.525 + 6030 279.84719 102.44453 -3414.8177 -2998.5663 -577.02096 11387.119 + 6040 284.83657 102.48979 -3416.3263 -2992.6534 -669.54284 11387.702 + 6050 290.70571 102.60752 -3420.2508 -2987.8481 -793.49699 11388.273 + 6060 295.34169 102.6393 -3421.31 -2982.0116 -866.47331 11388.828 + 6070 298.41874 102.67088 -3422.3626 -2978.4873 -912.09931 11389.367 + 6080 299.63728 102.68196 -3422.732 -2977.0443 -919.06473 11389.889 + 6090 297.70256 102.68037 -3422.6789 -2979.8689 -887.21583 11390.393 + 6100 292.98635 102.60875 -3420.2915 -2984.4965 -807.7782 11390.881 + 6110 288.55882 102.53375 -3417.7916 -2988.5822 -736.39378 11391.353 + 6120 287.37525 102.50433 -3416.8111 -2989.3622 -716.08887 11391.811 + 6130 289.39273 102.51186 -3417.0621 -2986.6123 -743.32192 11392.256 + 6140 292.09369 102.53766 -3417.9221 -2983.4548 -788.34731 11392.686 + 6150 292.90106 102.56998 -3418.9994 -2983.3313 -821.60495 11393.101 + 6160 290.70028 102.4775 -3415.9167 -2983.522 -781.25782 11393.501 + 6170 286.49137 102.40573 -3413.5245 -2987.3903 -729.4183 11393.886 + 6180 283.14203 102.36692 -3412.2306 -2991.0783 -691.43515 11394.257 + 6190 282.85212 102.37865 -3412.6215 -2991.9004 -691.88835 11394.615 + 6200 285.18497 102.41841 -3413.9471 -2989.7561 -720.47221 11394.96 + 6210 288.27751 102.48148 -3416.0493 -2987.2583 -761.18663 11395.292 + 6220 290.33969 102.51303 -3417.1008 -2985.2426 -781.07751 11395.608 + 6230 290.45346 102.50693 -3416.8978 -2984.8703 -772.28146 11395.91 + 6240 289.08807 102.4704 -3415.6799 -2985.6833 -741.46843 11396.198 + 6250 287.83513 102.43956 -3414.6519 -2986.519 -716.42639 11396.471 + 6260 288.24221 102.46551 -3415.5169 -2986.7784 -728.5064 11396.731 + 6270 289.95482 102.48373 -3416.1244 -2984.8385 -753.19019 11396.977 + 6280 292.01035 102.51101 -3417.0337 -2982.6904 -790.45256 11397.208 + 6290 294.20337 102.54834 -3418.2778 -2980.6726 -837.11547 11397.425 + 6300 296.76693 102.5715 -3419.0502 -2977.6318 -883.05972 11397.626 + 6310 299.63726 102.62694 -3420.8979 -2975.2102 -943.1663 11397.81 + 6320 302.55718 102.68706 -3422.9019 -2972.871 -1001.3141 11397.976 + 6330 304.90575 102.72666 -3424.2219 -2970.6977 -1038.3687 11398.123 + 6340 304.75194 102.70051 -3423.3502 -2970.0547 -1020.4021 11398.25 + 6350 300.1298 102.62185 -3420.7283 -2974.308 -941.08767 11398.358 + 6360 292.02615 102.49819 -3416.6062 -2982.2394 -815.39152 11398.449 + 6370 284.01831 102.4038 -3413.46 -2991.0043 -702.29368 11398.523 + 6380 279.67562 102.32986 -3410.9954 -2994.9991 -628.80036 11398.585 + 6390 281.18172 102.39236 -3413.0787 -2994.8422 -658.05247 11398.634 + 6400 288.53322 102.47676 -3415.8922 -2986.7209 -743.81572 11398.671 + 6410 298.1921 102.59243 -3419.7478 -2976.2096 -858.33812 11398.693 + 6420 304.51134 102.669 -3422.3001 -2969.3626 -929.15205 11398.7 + 6430 304.12426 102.6392 -3421.3068 -2968.945 -904.31564 11398.689 + 6440 298.65073 102.53179 -3417.7263 -2973.506 -807.42063 11398.661 + 6450 293.25184 102.44151 -3414.7171 -2978.5272 -715.07185 11398.617 + 6460 291.90537 102.39338 -3413.1126 -2978.9255 -671.48835 11398.56 + 6470 294.88616 102.43491 -3414.4968 -2975.876 -698.70206 11398.49 + 6480 299.58928 102.47505 -3415.835 -2970.2186 -739.80674 11398.407 + 6490 302.61691 102.54081 -3418.027 -2967.9073 -776.87225 11398.309 + 6500 301.89212 102.5494 -3418.3134 -2969.2718 -760.06071 11398.197 + 6510 297.62657 102.48037 -3416.0124 -2973.3154 -683.47541 11398.071 + 6520 292.33201 102.39185 -3413.0617 -2978.24 -590.24581 11397.932 + 6530 289.09 102.31504 -3410.5012 -2980.5017 -517.99719 11397.781 + 6540 288.8981 102.3044 -3410.1468 -2980.4328 -496.84858 11397.621 + 6550 290.32727 102.29466 -3409.822 -2977.9821 -493.22484 11397.452 + 6560 292.3073 102.33273 -3411.0911 -2976.3062 -517.78449 11397.272 + 6570 294.59013 102.37547 -3412.5158 -2974.3353 -550.72464 11397.083 + 6580 296.13458 102.39118 -3413.0394 -2972.5617 -571.877 11396.884 + 6590 296.2391 102.3968 -3413.2265 -2972.5933 -580.99193 11396.674 + 6600 295.43874 102.43203 -3414.401 -2974.9583 -595.49387 11396.453 + 6610 294.77231 102.43775 -3414.5916 -2976.1401 -597.40056 11396.22 + 6620 294.17655 102.41304 -3413.7679 -2976.2026 -585.15194 11395.976 + 6630 293.41712 102.38197 -3412.7322 -2976.2965 -567.49553 11395.721 + 6640 294.30524 102.39562 -3413.1874 -2975.4307 -577.07658 11395.455 + 6650 298.612 102.39834 -3413.2779 -2969.1152 -605.12841 11395.179 + 6660 304.79804 102.477 -3415.8999 -2962.5359 -673.6084 11394.89 + 6670 309.09897 102.53482 -3417.8275 -2958.0662 -718.59771 11394.589 + 6680 309.71078 102.50092 -3416.6975 -2956.0262 -698.73639 11394.275 + 6690 308.42017 102.48931 -3416.3103 -2957.5587 -668.34212 11393.947 + 6700 307.77134 102.43233 -3414.4111 -2956.6245 -619.65168 11393.607 + 6710 307.44095 102.40401 -3413.4671 -2956.172 -582.51113 11393.255 + 6720 305.25599 102.39479 -3413.1596 -2959.1144 -541.38087 11392.892 + 6730 300.35201 102.34822 -3411.6073 -2964.8565 -471.52037 11392.518 + 6740 294.72664 102.26315 -3408.7716 -2970.388 -390.15216 11392.136 + 6750 292.47444 102.23741 -3407.9135 -2972.88 -365.34144 11391.746 + 6760 295.99248 102.30354 -3410.1179 -2969.8515 -424.01586 11391.35 + 6770 302.81856 102.41209 -3413.7362 -2963.3166 -526.15142 11390.945 + 6780 308.52268 102.47197 -3415.7322 -2956.8281 -601.84902 11390.53 + 6790 310.37251 102.48494 -3416.1647 -2954.5091 -627.45475 11390.104 + 6800 307.77881 102.45396 -3415.132 -2957.3343 -598.73379 11389.666 + 6810 302.58974 102.39541 -3413.1805 -2963.1012 -536.38668 11389.217 + 6820 298.11924 102.33213 -3411.0709 -2967.6411 -475.77401 11388.757 + 6830 297.19996 102.35455 -3411.8183 -2969.7559 -471.10691 11388.289 + 6840 298.80901 102.41162 -3413.7206 -2969.2648 -491.95652 11387.811 + 6850 299.20162 102.37517 -3412.5057 -2967.466 -462.1126 11387.324 + 6860 297.27569 102.36257 -3412.0857 -2969.9106 -420.11484 11386.828 + 6870 294.56435 102.29464 -3409.8213 -2971.6792 -351.93976 11386.325 + 6880 292.83898 102.26158 -3408.7194 -2973.1437 -314.41064 11385.814 + 6890 294.20038 102.28332 -3409.4438 -2971.8431 -334.56745 11385.298 + 6900 299.95571 102.34834 -3411.6113 -2965.4499 -414.64515 11384.775 + 6910 307.96006 102.51081 -3417.0271 -2958.9599 -549.87725 11384.244 + 6920 312.72734 102.5649 -3418.8298 -2953.6716 -612.35313 11383.703 + 6930 310.28648 102.5367 -3417.89 -2956.3624 -579.93706 11383.151 + 6940 301.76461 102.39218 -3413.0727 -2964.2207 -450.20944 11382.588 + 6950 292.92636 102.27069 -3409.023 -2973.3172 -324.60247 11382.016 + 6960 289.70602 102.21126 -3407.042 -2976.1263 -267.6365 11381.438 + 6970 293.35449 102.26417 -3408.8055 -2972.463 -310.52773 11380.854 + 6980 300.41626 102.40853 -3413.6176 -2966.7712 -419.25685 11380.265 + 6990 306.8396 102.51962 -3417.3206 -2960.9199 -511.65861 11379.668 + 7000 310.48581 102.56867 -3418.9557 -2957.1316 -554.8734 11379.061 + 7010 310.3007 102.53965 -3417.9884 -2956.4396 -533.23529 11378.444 + 7020 307.02651 102.5109 -3417.0301 -2960.3514 -483.5752 11377.817 + 7030 303.70897 102.45787 -3415.2624 -2963.5183 -421.58125 11377.181 + 7040 303.2335 102.43241 -3414.4137 -2963.3769 -391.55477 11376.537 + 7050 305.80912 102.45745 -3415.2484 -2960.3805 -404.92655 11375.885 + 7060 309.04239 102.5084 -3416.9467 -2957.2695 -434.74848 11375.226 + 7070 310.47519 102.53194 -3417.7313 -2955.923 -439.07319 11374.558 + 7080 308.84866 102.48126 -3416.0422 -2956.6532 -387.43659 11373.882 + 7090 304.04224 102.41766 -3413.922 -2961.6822 -303.15701 11373.199 + 7100 297.78265 102.34778 -3411.5928 -2968.6637 -204.17877 11372.511 + 7110 293.69044 102.29125 -3409.7082 -2972.866 -129.03456 11371.818 + 7120 294.80763 102.34562 -3411.5206 -2973.0166 -139.73853 11371.123 + 7130 300.33107 102.40398 -3413.4661 -2966.7464 -189.91063 11370.425 + 7140 307.20638 102.53824 -3417.9413 -2960.9951 -281.46093 11369.724 + 7150 312.14166 102.5996 -3419.9866 -2955.6995 -329.89027 11369.017 + 7160 313.18599 102.58982 -3419.6606 -2953.8201 -319.00735 11368.304 + 7170 310.85187 102.51276 -3417.0919 -2954.7233 -255.86266 11367.586 + 7180 308.02603 102.47957 -3415.9858 -2957.8205 -206.84935 11366.862 + 7190 307.24387 102.4579 -3415.2634 -2958.2614 -178.46827 11366.134 + 7200 308.16321 102.44639 -3414.8796 -2956.5102 -168.32518 11365.403 + 7210 308.81882 102.43881 -3414.6269 -2955.2823 -159.24063 11364.669 + 7220 307.91831 102.40754 -3413.5846 -2955.5794 -130.03909 11363.932 + 7230 305.50804 102.38886 -3412.9619 -2958.5419 -94.136285 11363.193 + 7240 302.74889 102.37275 -3412.425 -2962.109 -56.349148 11362.451 + 7250 300.67433 102.33156 -3411.0521 -2963.8218 -17.874515 11361.709 + 7260 299.98012 102.29175 -3409.7248 -2963.5272 3.5852577 11360.966 + 7270 300.90835 102.31212 -3410.4039 -2962.8256 -16.335859 11360.224 + 7280 302.94732 102.30602 -3410.2007 -2959.5895 -35.978287 11359.481 + 7290 304.89802 102.31831 -3410.6104 -2957.0977 -58.125644 11358.737 + 7300 305.49867 102.32591 -3410.8638 -2956.4577 -63.088242 11357.993 + 7310 304.6715 102.33938 -3411.3127 -2958.1369 -52.990711 11357.247 + 7320 303.54554 102.35623 -3411.8742 -2960.3732 -36.573098 11356.5 + 7330 303.66451 102.38021 -3412.6738 -2960.9959 -29.543309 11355.752 + 7340 305.00297 102.39914 -3413.3048 -2959.636 -29.319608 11355.004 + 7350 305.29656 102.39039 -3413.0131 -2958.9076 -12.410285 11354.255 + 7360 302.92792 102.38453 -3412.8175 -2962.2352 19.416182 11353.506 + 7370 299.34051 102.32458 -3410.8194 -2965.573 77.043732 11352.758 + 7380 297.61221 102.29295 -3409.765 -2967.0894 106.66016 11352.011 + 7390 299.22994 102.29259 -3409.753 -2964.6712 97.562063 11351.266 + 7400 302.44796 102.32443 -3410.8142 -2960.9458 62.696434 11350.522 + 7410 304.46075 102.33399 -3411.1331 -2958.2709 45.744739 11349.78 + 7420 304.15103 102.33978 -3411.3258 -2958.9242 50.463129 11349.039 + 7430 302.9416 102.32003 -3410.6677 -2960.065 75.01504 11348.299 + 7440 303.78875 102.35531 -3411.8437 -2959.981 68.163674 11347.56 + 7450 307.89927 102.44037 -3414.6788 -2956.702 22.918055 11346.822 + 7460 313.18213 102.57894 -3419.2981 -2953.4634 -49.251303 11346.085 + 7470 316.30318 102.66093 -3422.031 -2951.554 -83.261671 11345.347 + 7480 315.72383 102.61144 -3420.3814 -2950.7661 -42.052289 11344.607 + 7490 313.43708 102.57152 -3419.0506 -2952.8367 5.0236186 11343.867 + 7500 312.15684 102.57064 -3419.0214 -2954.7118 27.395999 11343.127 + 7510 312.26283 102.55548 -3418.516 -2954.0487 41.72175 11342.387 + 7520 313.02704 102.56623 -3418.8743 -2953.2703 41.094085 11341.648 + 7530 313.04204 102.58926 -3419.6421 -2954.0158 44.642314 11340.91 + 7540 310.52379 102.53025 -3417.6752 -2955.7945 103.46046 11340.173 + 7550 304.96488 102.4092 -3413.6401 -2960.028 213.43937 11339.438 + 7560 298.4161 102.34018 -3411.3394 -2967.4681 311.41905 11338.707 + 7570 294.74265 102.27553 -3409.1842 -2970.7769 383.93114 11337.981 + 7580 296.2301 102.30127 -3410.0424 -2969.4225 379.52609 11337.263 + 7590 301.35094 102.33889 -3411.2964 -2963.0598 337.11238 11336.552 + 7600 307.34561 102.43722 -3414.5741 -2957.4208 257.94209 11335.847 + 7610 313.21271 102.52976 -3417.6586 -2951.7784 179.39609 11335.147 + 7620 318.7195 102.60906 -3420.3019 -2946.2308 112.05943 11334.45 + 7630 322.5781 102.65939 -3421.9796 -2942.1691 76.802074 11333.756 + 7640 322.74627 102.67316 -3422.4386 -2942.378 91.399486 11333.063 + 7650 318.81366 102.6086 -3420.2868 -2946.0756 171.91931 11332.372 + 7660 312.93389 102.52623 -3417.541 -2952.0755 274.985 11331.685 + 7670 308.21944 102.47365 -3415.7882 -2957.3351 356.59115 11331.002 + 7680 305.76779 102.43972 -3414.6574 -2959.8509 409.9989 11330.326 + 7690 304.74532 102.40791 -3413.597 -2960.3115 444.64681 11329.658 + 7700 304.79016 102.35688 -3411.896 -2958.5437 470.94979 11328.999 + 7710 305.94588 102.38584 -3412.8613 -2957.7899 453.04417 11328.348 + 7720 307.77312 102.40142 -3413.3808 -2955.5916 434.9041 11327.705 + 7730 309.60552 102.43689 -3414.5629 -2954.0482 411.05804 11327.071 + 7740 311.0115 102.45402 -3415.1338 -2952.5278 401.13576 11326.445 + 7750 311.42123 102.47184 -3415.7279 -2952.5124 400.9542 11325.826 + 7760 310.83835 102.49427 -3416.4755 -2954.127 405.15451 11325.214 + 7770 310.069 102.49408 -3416.4694 -2955.2653 415.46654 11324.611 + 7780 309.84344 102.49043 -3416.3476 -2955.479 418.64027 11324.015 + 7790 310.50426 102.5043 -3416.8101 -2954.9586 406.43594 11323.427 + 7800 311.79257 102.53176 -3417.7252 -2953.9574 385.82094 11322.846 + 7810 313.08619 102.58693 -3419.5643 -2953.8723 360.3397 11322.273 + 7820 313.38204 102.57164 -3419.0548 -2952.9228 375.40755 11321.707 + 7830 312.18231 102.51388 -3417.1293 -2952.7818 423.08013 11321.148 + 7840 309.61034 102.49837 -3416.6123 -2956.0904 466.15342 11320.597 + 7850 306.86177 102.42493 -3414.1643 -2957.7307 527.65457 11320.054 + 7860 305.91729 102.41059 -3413.6864 -2958.6577 542.46581 11319.522 + 7870 307.99567 102.41871 -3413.957 -2955.8368 515.65864 11318.999 + 7880 312.0185 102.47619 -3415.873 -2951.7691 446.68695 11318.487 + 7890 315.40555 102.55956 -3418.652 -2949.5101 369.51939 11317.982 + 7900 316.24138 102.59625 -3419.8749 -2949.4899 332.38484 11317.485 + 7910 315.18057 102.63158 -3421.0527 -2952.2455 319.83534 11316.994 + 7920 313.29156 102.60314 -3420.1045 -2954.1071 350.26571 11316.51 + 7930 310.48776 102.51729 -3417.243 -2955.416 420.13333 11316.031 + 7940 306.76664 102.45311 -3415.1038 -2958.8117 490.99516 11315.561 + 7950 303.87007 102.43298 -3414.4325 -2962.4488 533.62495 11315.1 + 7960 303.80004 102.40472 -3413.4905 -2961.611 548.73851 11314.649 + 7970 306.342 102.42813 -3414.2709 -2958.6104 512.87814 11314.209 + 7980 309.49469 102.5358 -3417.8599 -2957.51 429.60361 11313.778 + 7990 312.40023 102.59242 -3419.7475 -2955.0758 362.30936 11313.355 + 8000 315.73781 102.63216 -3421.0721 -2951.436 300.24782 11312.939 + 8010 318.85447 102.66858 -3422.286 -2948.0142 248.9851 11312.528 + 8020 319.88824 102.67714 -3422.5712 -2946.7617 233.66577 11312.122 + 8030 318.80625 102.65744 -3421.9148 -2947.7147 255.76097 11311.721 + 8040 317.14853 102.64873 -3421.6243 -2949.8899 290.21355 11311.324 + 8050 315.49427 102.64801 -3421.6002 -2952.3264 331.66499 11310.933 + 8060 313.19631 102.61381 -3420.4602 -2954.6044 397.31105 11310.549 + 8070 309.51858 102.57039 -3419.013 -2958.6276 476.85304 11310.171 + 8080 305.64428 102.50821 -3416.9402 -2962.3175 560.45124 11309.803 + 8090 303.54652 102.4653 -3415.5101 -2964.0077 616.12582 11309.445 + 8100 303.38025 102.48343 -3416.1142 -2964.8591 623.97214 11309.099 + 8110 303.96645 102.50123 -3416.7075 -2964.5805 616.11466 11308.764 + 8120 304.38271 102.52819 -3417.6065 -2964.8603 597.55836 11308.441 + 8130 304.10334 102.49331 -3416.4435 -2964.1129 604.05719 11308.129 + 8140 302.64268 102.49961 -3416.6536 -2966.4955 604.21027 11307.829 + 8150 300.17325 102.42345 -3414.1151 -2967.6301 645.89862 11307.54 + 8160 298.3032 102.3998 -3413.3265 -2969.6231 664.7576 11307.263 + 8170 298.82647 102.39688 -3413.2294 -2968.7477 657.91079 11306.998 + 8180 302.17444 102.41336 -3413.7786 -2964.317 620.46197 11306.746 + 8190 307.5646 102.49253 -3416.4176 -2958.9385 540.41729 11306.506 + 8200 313.53945 102.55314 -3418.4381 -2952.072 462.62665 11306.275 + 8210 318.13256 102.60072 -3420.0242 -2946.8261 405.66436 11306.054 + 8220 319.3135 102.63179 -3421.0598 -2946.1052 389.74684 11305.84 + 8230 316.4501 102.59325 -3419.7749 -2949.0793 438.23794 11305.633 + 8240 311.41531 102.54136 -3418.0455 -2954.8388 510.20467 11305.435 + 8250 307.49553 102.49861 -3416.6203 -2959.244 567.1487 11305.246 + 8260 306.99857 102.48835 -3416.2783 -2959.6412 578.53767 11305.068 + 8270 310.08291 102.52095 -3417.3649 -2956.14 540.93381 11304.901 + 8280 314.65311 102.57231 -3419.0769 -2951.0543 486.42168 11304.744 + 8290 316.60623 102.60359 -3420.1196 -2949.1919 466.39209 11304.596 + 8300 312.90183 102.53274 -3417.7579 -2952.3402 535.34718 11304.458 + 8310 305.50434 102.39385 -3413.1283 -2958.7137 661.5729 11304.329 + 8320 300.11671 102.30144 -3410.0479 -2963.647 753.27998 11304.213 + 8330 301.04861 102.28154 -3409.3848 -2961.5978 764.30692 11304.111 + 8340 307.56636 102.39808 -3413.2694 -2955.7877 673.32594 11304.023 + 8350 315.64299 102.53071 -3417.6902 -2948.1952 560.91912 11303.947 + 8360 320.82215 102.63505 -3421.1682 -2943.9695 482.82386 11303.883 + 8370 320.3142 102.60932 -3420.3106 -2943.8675 502.19091 11303.827 + 8380 314.68179 102.51725 -3417.2416 -2949.1763 590.29164 11303.781 + 8390 307.35495 102.40341 -3413.447 -2956.2798 699.30425 11303.747 + 8400 302.89319 102.36482 -3412.1607 -2961.63 754.08676 11303.725 + 8410 303.80412 102.3844 -3412.8135 -2960.9279 739.65753 11303.717 + 8420 308.39229 102.47984 -3415.9947 -2957.2846 662.48339 11303.724 + 8430 311.35051 102.53872 -3417.9575 -2954.8472 607.56917 11303.742 + 8440 309.00725 102.50882 -3416.9608 -2957.3359 625.21106 11303.773 + 8450 303.0839 102.42442 -3414.1472 -2963.3329 690.65223 11303.815 + 8460 298.68768 102.34742 -3411.5808 -2967.3055 741.02779 11303.87 + 8470 299.69771 102.33702 -3411.234 -2965.4564 722.14167 11303.939 + 8480 306.42395 102.43354 -3414.4515 -2958.6691 616.27733 11304.022 + 8490 315.54597 102.57861 -3419.2869 -2949.9362 475.09286 11304.116 + 8500 321.89652 102.66975 -3422.325 -2943.5284 382.37676 11304.219 + 8510 321.57223 102.66699 -3422.2328 -2943.9185 385.37704 11304.33 + 8520 314.37995 102.55545 -3418.5151 -2950.8988 487.49536 11304.448 + 8530 304.4993 102.41487 -3413.829 -2960.9093 619.0431 11304.575 + 8540 298.74772 102.37673 -3412.5576 -2968.193 672.80222 11304.714 + 8550 301.6323 102.43471 -3414.4904 -2965.8352 617.93598 11304.865 + 8560 310.32824 102.59331 -3419.777 -2958.1872 478.78791 11305.028 + 8570 317.01382 102.71744 -3423.9148 -2952.3808 367.87015 11305.201 + 8580 317.74303 102.6954 -3423.1801 -2950.5615 362.18441 11305.38 + 8590 313.77817 102.63333 -3421.1109 -2954.3897 412.41062 11305.566 + 8600 308.02194 102.58339 -3419.4464 -2961.2871 471.90335 11305.76 + 8610 303.59903 102.53406 -3417.802 -2966.2214 516.13965 11305.963 + 8620 302.88535 102.49798 -3416.5992 -2966.0802 522.29894 11306.176 + 8630 305.55276 102.56481 -3418.827 -2964.3404 461.82938 11306.398 + 8640 308.64857 102.64618 -3421.5395 -2962.4481 394.00001 11306.63 + 8650 309.66225 102.68389 -3422.7963 -2962.1972 361.61444 11306.868 + 8660 308.97524 102.6601 -3422.0035 -2962.4262 370.01922 11307.114 + 8670 308.49234 102.6395 -3421.3167 -2962.4577 378.11175 11307.367 + 8680 309.48674 102.64157 -3421.3858 -2961.0478 368.75306 11307.628 + 8690 310.80383 102.59733 -3419.9111 -2957.6139 376.32587 11307.895 + 8700 310.12893 102.57787 -3419.2625 -2957.9692 389.78683 11308.17 + 8710 307.06701 102.48648 -3416.216 -2959.4771 448.52691 11308.452 + 8720 304.3609 102.45189 -3415.063 -2962.3493 481.69787 11308.743 + 8730 304.92854 102.45141 -3415.0472 -2961.4891 477.72206 11309.043 + 8740 307.78938 102.4899 -3416.33 -2958.5166 441.74481 11309.352 + 8750 309.79398 102.53908 -3417.9693 -2957.1743 408.87798 11309.669 + 8760 309.56515 102.52265 -3417.4217 -2956.967 417.35016 11309.995 + 8770 308.71966 102.53231 -3417.7435 -2958.5464 420.92664 11310.328 + 8780 308.42018 102.55428 -3418.476 -2959.7244 414.78707 11310.669 + 8790 308.18917 102.54973 -3418.3244 -2959.9163 416.50939 11311.018 + 8800 306.85617 102.54025 -3418.0084 -2961.5831 425.37989 11311.375 + 8810 302.68015 112.45 -3415.0001 -2964.7863 550.58788 11311.741 + 8820 295.06565 112.32197 -3410.7324 -2971.8446 657.43846 11312.118 + 8830 287.43599 112.21335 -3407.1117 -2979.5725 755.53188 11312.507 + 8840 283.26323 112.19013 -3406.3378 -2985.0052 788.92367 11312.91 + 8850 283.69155 112.16477 -3405.4924 -2983.5227 780.74579 11313.328 + 8860 286.84529 112.23221 -3407.7404 -2981.0798 711.83515 11313.761 + 8870 289.65279 112.27021 -3409.0069 -2978.1703 655.90054 11314.208 + 8880 290.55954 112.2646 -3408.8201 -2976.6349 635.60483 11314.667 + 8890 289.53001 112.23842 -3407.9475 -2977.2935 644.74225 11315.138 + 8900 287.92832 112.22627 -3407.5424 -2979.2708 659.46236 11315.622 + 8910 287.68299 112.26053 -3408.6842 -2980.7776 650.61043 11316.118 + 8920 289.37074 112.27963 -3409.3211 -2978.904 633.34421 11316.627 + 8930 292.16422 112.34091 -3411.3636 -2976.7915 590.4675 11317.147 + 8940 294.83192 112.3803 -3412.6767 -2974.1366 556.06469 11317.68 + 8950 296.43376 112.39792 -3413.264 -2972.3412 536.95027 11318.223 + 8960 296.42915 112.36904 -3412.3012 -2971.3853 548.12171 11318.776 + 8970 293.90232 112.35321 -3411.7735 -2974.6161 573.68475 11319.34 + 8980 288.2636 112.28139 -3409.3796 -2980.6094 640.65982 11319.915 + 8990 281.75228 112.19099 -3406.3663 -2987.2811 714.63362 11320.502 + 9000 278.06342 112.11221 -3403.7402 -2990.142 757.54946 11321.103 + 9010 278.24044 112.13347 -3404.4491 -2990.5875 729.73225 11321.718 + 9020 280.952 112.1771 -3405.9035 -2988.0087 672.71807 11322.347 + 9030 285.18758 112.30539 -3410.1796 -2985.9846 571.95995 11322.989 + 9040 290.15188 112.36692 -3412.2308 -2980.6518 493.24984 11323.642 + 9050 293.91993 112.43618 -3414.5395 -2977.3558 421.59165 11324.305 + 9060 295.30954 112.45378 -3415.1261 -2975.8755 388.52328 11324.976 + 9070 295.28214 112.4634 -3415.4465 -2976.2367 369.82512 11325.655 + 9080 294.96454 112.44453 -3414.8176 -2976.0802 366.59152 11326.341 + 9090 293.74912 112.43032 -3414.3441 -2977.4145 371.66433 11327.034 + 9100 290.97422 112.38218 -3412.7395 -2979.9374 406.79415 11327.735 + 9110 286.9424 112.30902 -3410.3007 -2983.4957 465.04465 11328.444 + 9120 282.66758 112.25547 -3408.5157 -2988.069 517.9831 11329.162 + 9130 279.35291 112.17445 -3405.8151 -2990.2988 568.90646 11329.889 + 9140 278.47221 112.13019 -3404.3398 -2990.1335 577.72471 11330.628 + 9150 280.90917 112.16882 -3405.6274 -2987.7963 521.12054 11331.378 + 9160 285.19775 112.23827 -3407.9424 -2983.7323 432.87276 11332.138 + 9170 288.55689 112.29617 -3409.8725 -2980.666 356.14862 11332.906 + 9180 289.10316 112.3386 -3411.2866 -2981.2675 310.78215 11333.682 + 9190 287.29679 112.28248 -3409.416 -2982.0838 325.33627 11334.464 + 9200 285.14294 112.26393 -3408.7977 -2984.6692 329.51839 11335.252 + 9210 284.58233 112.24429 -3408.143 -2984.8483 318.88752 11336.047 + 9220 287.12216 112.26454 -3408.8181 -2981.7457 264.88469 11336.848 + 9230 292.71659 112.38709 -3412.903 -2977.5092 144.48506 11337.654 + 9240 299.31723 112.47919 -3415.973 -2970.7613 28.37347 11338.464 + 9250 304.06172 112.52849 -3417.6162 -2965.3474 -52.428205 11339.274 + 9260 305.28094 112.56615 -3418.8716 -2964.7893 -94.2179 11340.084 + 9270 302.91079 112.51217 -3417.0724 -2966.5155 -63.846936 11340.893 + 9280 297.44503 112.42611 -3414.2037 -2971.7768 10.963101 11341.7 + 9290 289.61498 112.30066 -3410.022 -2979.2416 124.10785 11342.509 + 9300 280.67249 112.20401 -3406.8005 -2989.3214 234.83036 11343.32 + 9310 272.96168 112.12012 -3404.0039 -2997.994 326.4095 11344.136 + 9320 269.3507 112.09667 -3403.2222 -3002.5834 355.32865 11344.958 + 9330 270.67998 112.16748 -3405.5826 -3002.9667 303.79862 11345.787 + 9340 274.2089 112.18408 -3406.1362 -2998.2712 250.76017 11346.623 + 9350 277.36176 112.22981 -3407.6604 -2995.1058 186.80097 11347.463 + 9360 280.24475 112.26306 -3408.7688 -2991.9259 130.92224 11348.307 + 9370 284.24341 112.34255 -3411.4183 -2988.6278 55.455202 11349.154 + 9380 288.70811 112.42198 -3414.0659 -2984.6345 -13.589289 11350.002 + 9390 290.91585 112.48333 -3416.111 -2983.3957 -49.55133 11350.851 + 9400 289.42707 112.48282 -3416.0941 -2985.5933 -28.179186 11351.699 + 9410 285.61854 112.41981 -3413.9937 -2989.1578 34.413115 11352.548 + 9420 282.26883 112.36048 -3412.016 -2992.1625 86.438158 11353.397 + 9430 281.11902 112.3301 -3411.0033 -2992.8601 103.34353 11354.248 + 9440 281.58792 112.23988 -3407.9961 -2989.1554 126.40651 11355.101 + 9450 281.58545 112.17727 -3405.9089 -2987.0719 142.0187 11355.958 + 9460 279.76031 112.10388 -3403.4628 -2987.3405 175.6611 11356.817 + 9470 277.11445 112.07592 -3402.5305 -2990.3438 195.66591 11357.681 + 9480 276.24296 112.04597 -3401.5324 -2990.642 198.1981 11358.548 + 9490 279.18678 112.10317 -3403.4391 -2988.1699 131.96405 11359.419 + 9500 285.62232 112.22262 -3407.4208 -2982.5793 10.204022 11360.293 + 9510 292.92138 112.3565 -3411.8834 -2976.185 -123.66568 11361.168 + 9520 298.23784 112.46912 -3415.6372 -2972.031 -228.96731 11362.04 + 9530 300.44065 112.50596 -3416.8654 -2969.9827 -274.29938 11362.909 + 9540 300.1625 112.49335 -3416.4449 -2969.976 -274.83813 11363.774 + 9550 298.28409 112.46184 -3415.3945 -2971.7195 -250.45164 11364.633 + 9560 294.90643 112.39352 -3413.1173 -2974.4663 -199.16543 11365.488 + 9570 290.40096 112.35764 -3411.9212 -2979.9718 -153.13997 11366.34 + 9580 286.29201 112.28111 -3409.3702 -2983.5326 -96.522516 11367.19 + 9590 284.43019 112.24597 -3408.199 -2985.1307 -75.265393 11368.037 + 9600 285.51121 112.27581 -3409.1935 -2984.5173 -102.42078 11368.884 + 9610 288.45354 112.34055 -3411.3518 -2982.299 -156.37938 11369.729 + 9620 291.32182 112.36459 -3412.1531 -2978.834 -190.69292 11370.572 + 9630 292.39753 112.34679 -3411.5598 -2976.6406 -191.05473 11371.411 + 9640 290.87259 112.30526 -3410.1755 -2977.5245 -160.91219 11372.247 + 9650 287.48488 112.2523 -3408.4099 -2980.7979 -115.67801 11373.081 + 9660 284.41335 112.17697 -3405.899 -2982.8557 -70.471247 11373.913 + 9670 282.82188 112.12245 -3404.0816 -2983.4055 -50.822758 11374.744 + 9680 281.77842 112.0967 -3403.2233 -2984.0993 -52.622951 11375.574 + 9690 280.6217 112.08879 -3402.9598 -2985.5563 -63.071851 11376.404 + 9700 280.01648 112.12156 -3404.052 -2987.5487 -91.259523 11377.233 + 9710 280.63233 112.14306 -3404.7687 -2987.3494 -118.33614 11378.06 + 9720 281.49434 112.20016 -3406.672 -2987.9705 -153.2615 11378.886 + 9730 280.71403 112.17063 -3405.6877 -2988.1468 -136.13915 11379.709 + 9740 277.5257 112.10088 -3403.3627 -2990.5643 -84.219794 11380.53 + 9750 273.74903 112.0686 -3402.2867 -2995.1057 -47.262096 11381.349 + 9760 272.67467 112.06158 -3402.0527 -2996.4698 -49.270034 11382.169 + 9770 275.6271 112.12555 -3404.1849 -2994.2105 -119.5902 11382.987 + 9780 281.13705 112.2005 -3406.6835 -2988.5134 -221.11487 11383.804 + 9790 286.56255 112.26217 -3408.7391 -2982.499 -317.76211 11384.616 + 9800 289.42587 112.30862 -3410.2875 -2979.7884 -383.72706 11385.424 + 9810 288.5641 112.31641 -3410.5471 -2981.3299 -398.67032 11386.224 + 9820 285.09817 112.22935 -3407.6449 -2983.583 -349.83673 11387.017 + 9830 281.59835 112.18914 -3406.3045 -2987.4483 -315.26173 11387.804 + 9840 280.34136 112.17445 -3405.8149 -2988.8283 -309.04836 11388.586 + 9850 282.1127 112.15105 -3405.0351 -2985.4139 -326.8267 11389.361 + 9860 285.987 112.19812 -3406.604 -2981.22 -392.3815 11390.131 + 9870 290.07281 112.22891 -3407.6304 -2976.1691 -455.73 11390.894 + 9880 292.98661 112.26374 -3408.7915 -2972.9961 -509.79706 11391.648 + 9890 293.4771 112.31718 -3410.5726 -2974.0477 -548.94532 11392.394 + 9900 291.01583 112.25031 -3408.3435 -2975.4795 -514.96551 11393.129 + 9910 288.3146 102.21877 -3407.2925 -2978.4464 -552.6703 11393.854 + 9920 289.81888 102.2633 -3408.7767 -2977.6931 -595.04901 11394.569 + 9930 295.20081 102.38658 -3412.8859 -2973.797 -695.95493 11395.273 + 9940 301.57118 102.49325 -3416.4418 -2967.8775 -794.95454 11395.964 + 9950 305.76964 102.57072 -3419.024 -2964.2148 -862.85164 11396.64 + 9960 306.04627 102.5489 -3418.2967 -2963.076 -860.0359 11397.301 + 9970 302.3909 102.50659 -3416.8863 -2967.1028 -818.623 11397.945 + 9980 296.77037 102.38306 -3412.7686 -2971.3451 -733.14902 11398.574 + 9990 292.19734 102.29924 -3409.9746 -2975.3532 -674.286 11399.189 + 10000 290.66211 102.28541 -3409.5137 -2977.1759 -667.48493 11399.792 +Loop time of 2.5027 on 1 procs for 10000 steps with 500 atoms -Performance: 265.040 ns/day, 0.091 hours/ns, 3067.591 timesteps/s -99.9% CPU use with 1 MPI tasks x no OpenMP threads +Performance: 345.227 ns/day, 0.070 hours/ns, 3995.683 timesteps/s +99.9% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 2.5437 | 2.5437 | 2.5437 | 0.0 | 78.03 -Bond | 0.00067735 | 0.00067735 | 0.00067735 | 0.0 | 0.02 -Neigh | 0.017251 | 0.017251 | 0.017251 | 0.0 | 0.53 -Comm | 0.08847 | 0.08847 | 0.08847 | 0.0 | 2.71 -Output | 0.0010381 | 0.0010381 | 0.0010381 | 0.0 | 0.03 -Modify | 0.33628 | 0.33628 | 0.33628 | 0.0 | 10.32 -Other | | 0.2725 | | | 8.36 +Pair | 2.0312 | 2.0312 | 2.0312 | 0.0 | 81.16 +Bond | 0.00069571 | 0.00069571 | 0.00069571 | 0.0 | 0.03 +Neigh | 0.013799 | 0.013799 | 0.013799 | 0.0 | 0.55 +Comm | 0.073534 | 0.073534 | 0.073534 | 0.0 | 2.94 +Output | 0.0074136 | 0.0074136 | 0.0074136 | 0.0 | 0.30 +Modify | 0.23759 | 0.23759 | 0.23759 | 0.0 | 9.49 +Other | | 0.1385 | | | 5.53 Nlocal: 500 ave 500 max 500 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -171,4 +1076,4 @@ Neighbor list builds = 18 Dangerous builds = 0 #write_data ${rep}/lj-out.data -Total wall time: 0:00:03 +Total wall time: 0:00:02 diff --git a/examples/USER/misc/grem/lj-temper/3/log.lammps.3 b/examples/USER/misc/grem/lj-temper/3/log.lammps.3 index 2bd265431d051434107df73aca1ec81a44581b98..4d4060558f54899e4680bcbffc089165920504bb 100644 --- a/examples/USER/misc/grem/lj-temper/3/log.lammps.3 +++ b/examples/USER/misc/grem/lj-temper/3/log.lammps.3 @@ -16,10 +16,6 @@ read_data 3/lj.data #dump dump all xyz 1000 ${rep}/dump.xyz -thermo 100 -thermo_style custom step temp pe etotal press vol -timestep 1.0 - fix fxnpt all npt temp ${T0} ${T0} 1000.0 iso ${press} ${press} 10000.0 fix fxnpt all npt temp 300 ${T0} 1000.0 iso ${press} ${press} 10000.0 fix fxnpt all npt temp 300 300 1000.0 iso ${press} ${press} 10000.0 @@ -27,135 +23,1044 @@ fix fxnpt all npt temp 300 300 1000.0 iso 0 ${press} 10000.0 fix fxnpt all npt temp 300 300 1000.0 iso 0 0 10000.0 fix fxgREM all grem ${lambda} -.03 -30000 fxnpt fix fxgREM all grem 930 -.03 -30000 fxnpt + +thermo 10 +thermo_style custom step temp f_fxgREM pe etotal press vol thermo_modify press fxgREM_press +timestep 1.0 temper/grem 10000 100 ${lambda} fxgREM fxnpt 10294 98392 #${walker} temper/grem 10000 100 930 fxgREM fxnpt 10294 98392 Neighbor list info ... - 1 neighbor list requests update every 1 steps, delay 10 steps, check yes max neighbors/atom: 2000, page size: 100000 master list distance cutoff = 7 ghost atom cutoff = 7 - binsize = 3.5 -> bins = 7 7 7 -Memory usage per processor = 5.38075 Mbytes -Step Temp PotEng TotEng Press Volume - 0 290.839 -3356.3647 -2923.7637 13.376266 11488.157 - 100 285.69508 -3350.6878 -2925.738 167.78531 11487.986 - 200 303.33832 -3362.4442 -2911.2515 -285.77881 11487.825 - 300 296.08102 -3356.4942 -2916.0962 -14.958212 11487.267 - 400 293.47704 -3361.3097 -2924.7849 -10.620913 11486.528 - 500 299.4194 -3361.7955 -2916.4318 -50.507149 11485.642 - 600 305.29691 -3363.5524 -2909.4464 -108.7678 11484.625 - 700 303.76824 -3363.3253 -2911.493 -104.4867 11483.395 - 800 294.51417 -3359.4431 -2921.3756 69.012038 11481.959 - 900 317.06683 -3376.5605 -2904.9476 -394.53967 11480.603 - 1000 295.23442 -3359.4678 -2920.3289 140.84436 11479.118 - 1100 295.40528 -3359.5295 -2920.1365 238.50053 11477.82 - 1200 296.4057 -3358.2915 -2917.4104 187.0124 11476.739 - 1300 301.90051 -3364.4524 -2915.3982 37.401199 11475.788 - 1400 292.85338 -3358.8438 -2923.2466 166.77089 11474.892 - 1500 306.57141 -3367.7234 -2911.7217 -37.617278 11474.193 - 1600 303.60667 -3363.5842 -2911.9923 59.354855 11473.503 - 1700 309.11038 -3368.2542 -2908.4759 -77.354272 11473.036 - 1800 300.88534 -3360.8762 -2913.3321 157.35644 11472.739 - 1900 300.76857 -3362.1419 -2914.7714 64.474646 11472.846 - 2000 304.83747 -3362.6904 -2909.2678 74.697763 11473.205 - 2100 303.13383 -3361.2408 -2910.3522 23.427206 11473.697 - 2200 310.28177 -3369.0928 -2907.5722 -54.59024 11474.389 - 2300 301.49445 -3364.3099 -2915.8597 57.698577 11475.107 - 2400 302.32605 -3363.1745 -2913.4874 -101.51258 11475.951 - 2500 312.1335 -3371.5673 -2907.2924 -298.61384 11476.757 - 2600 311.48564 -3369.8118 -2906.5005 -248.29721 11477.348 - 2700 302.70772 -3366.2003 -2915.9455 -112.41764 11477.885 - 2800 296.91863 -3362.2907 -2920.6467 -11.436148 11478.398 - 2900 295.20791 -3360.1207 -2921.0213 28.151935 11478.849 - 3000 298.7181 -3363.3108 -2918.9903 -24.285938 11479.232 - 3100 299.63105 -3361.6655 -2915.987 -85.460877 11479.532 - 3200 299.2326 -3362.7204 -2917.6346 -1.4187391 11479.74 - 3300 292.01842 -3359.6831 -2925.3278 111.54019 11479.801 - 3400 300.40417 -3364.1372 -2917.3087 -35.829416 11479.821 - 3500 303.01655 -3366.1156 -2915.4014 -213.6144 11479.867 - 3600 300.80325 -3367.3344 -2919.9124 -108.94713 11479.727 - 3700 301.93871 -3365.3419 -2916.231 -108.25614 11479.485 - 3800 298.59542 -3364.1482 -2920.0102 -73.755956 11479.116 - 3900 296.82227 -3366.2208 -2924.7202 -102.3413 11478.537 - 4000 298.85366 -3363.1199 -2918.5977 -141.73373 11477.785 - 4100 285.93508 -3361.0953 -2935.7885 33.105452 11476.764 - 4200 294.13198 -3366.6662 -2929.1672 -61.017319 11475.527 - 4300 289.62881 -3360.4118 -2929.6109 72.508715 11474.117 - 4400 292.16548 -3363.2769 -2928.7029 69.460552 11472.679 - 4500 296.10391 -3364.7776 -2924.3455 -44.256514 11471.222 - 4600 298.19488 -3367.9838 -2924.4415 -79.787912 11469.536 - 4700 285.80539 -3358.8854 -2933.7716 197.88821 11467.664 - 4800 306.36811 -3371.2406 -2915.5413 -88.96311 11465.916 - 4900 292.49926 -3361.8643 -2926.7938 185.57775 11464.23 - 5000 290.88226 -3361.5861 -2928.9208 259.04414 11462.735 - 5100 307.58823 -3374.2434 -2916.7292 -29.448909 11461.464 - 5200 302.16928 -3370.5539 -2921.1 -33.25034 11460.298 - 5300 316.06674 -3376.741 -2906.6157 -242.91365 11458.915 - 5400 303.96005 -3370.8575 -2918.7399 -141.06744 11457.186 - 5500 311.74544 -3377.0044 -2913.3067 -274.23168 11455.241 - 5600 300.49781 -3366.7267 -2919.7589 112.99595 11452.946 - 5700 292.28467 -3363.6982 -2928.9469 345.58668 11450.884 - 5800 306.41131 -3375.5954 -2919.8318 44.031034 11449.306 - 5900 304.59946 -3373.6282 -2920.5596 11.550291 11447.886 - 6000 311.8961 -3377.6423 -2913.7205 -109.77328 11446.479 - 6100 291.20342 -3368.383 -2935.24 203.83576 11445.011 - 6200 297.89834 -3373.1747 -2930.0735 26.610471 11443.642 - 6300 300.56911 -3378.1564 -2931.0826 -35.208037 11442.15 - 6400 307.53502 -3382.2265 -2924.7914 -91.070456 11440.514 - 6500 303.68877 -3381.5242 -2929.8102 -47.863863 11438.647 - 6600 307.2743 -3383.4914 -2926.4441 -176.63601 11436.802 - 6700 294.30769 -3379.6274 -2941.8671 24.164729 11434.849 - 6800 303.13931 -3381.7922 -2930.8954 -111.95234 11432.875 - 6900 296.26164 -3381.8788 -2941.2121 -57.549381 11430.738 - 7000 289.01191 -3378.9354 -2949.0521 126.23987 11428.513 - 7100 302.81602 -3388.9706 -2938.5547 -237.78388 11426.322 - 7200 294.50663 -3383.9162 -2945.8599 -39.728782 11423.859 - 7300 290.35305 -3382.6487 -2950.7705 94.790713 11421.387 - 7400 282.82774 -3377.867 -2957.1822 250.1436 11419.087 - 7500 300.09907 -3388.5306 -2942.1559 -155.8278 11416.997 - 7600 297.08335 -3388.2429 -2946.354 -74.771931 11414.718 - 7700 296.10041 -3389.0183 -2948.5914 39.612836 11412.516 - 7800 302.26382 -3392.6735 -2943.079 -108.12678 11410.56 - 7900 299.45316 -3389.6586 -2944.2447 -67.016831 11408.644 - 8000 279.75796 -3383.3458 -2967.227 212.04894 11406.742 - 8100 296.1234 -3391.5243 -2951.0632 14.909942 11405.025 - 8200 283.90018 -3385.3372 -2963.0572 242.009 11403.233 - 8300 294.40491 -3390.3819 -2952.4769 144.49126 11401.685 - 8400 284.83989 -3386.3812 -2962.7034 294.44578 11400.452 - 8500 310.91803 -3399.32 -2936.853 -187.72128 11399.607 - 8600 280.73817 -3381.818 -2964.2413 368.28406 11398.828 - 8700 293.03064 -3389.802 -2953.9412 60.228584 11398.442 - 8800 284.38203 -3385.0501 -2962.0534 283.35374 11398.194 - 8900 296.26156 -3391.0805 -2950.4139 105.69366 11398.307 - 9000 293.15104 -3388.0276 -2951.9876 176.06272 11398.732 - 9100 296.37884 -3387.4524 -2946.6113 188.95802 11399.501 - 9200 299.22093 -3389.1839 -2944.1154 146.14576 11400.688 - 9300 284.90575 -3379.3036 -2955.5278 411.30367 11402.307 - 9400 300.14961 -3385.9354 -2939.4856 205.76471 11404.592 - 9500 298.23305 -3382.4734 -2938.8744 283.06153 11407.279 - 9600 282.24345 -3371.4137 -2951.598 615.47273 11410.426 - 9700 318.39873 -3390.0399 -2916.4459 -28.07165 11414.311 - 9800 311.94009 -3386.2127 -2922.2255 70.507396 11418.547 - 9900 317.20428 -3383.9541 -2912.1367 -26.751232 11423.218 - 10000 318.36799 -3382.5532 -2909.0049 -46.362454 11428.313 -Loop time of 3.25989 on 1 procs for 10000 steps with 500 atoms + binsize = 3.5, bins = 7 7 7 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.726 | 5.726 | 5.726 Mbytes +Step Temp f_fxgREM PotEng TotEng Press Volume + 0 290.839 130.69094 -3356.3647 -2923.7637 13.376266 11488.157 + 10 298.49016 130.86737 -3362.2456 -2918.2641 -139.01604 11488.157 + 20 305.83037 130.96693 -3365.5642 -2910.6647 -254.45224 11488.154 + 30 310.23182 131.06065 -3368.6884 -2907.2421 -332.60751 11488.147 + 40 310.27284 131.0497 -3368.3232 -2906.8159 -322.86391 11488.133 + 50 306.53667 130.92563 -3364.1878 -2908.2377 -229.32421 11488.113 + 60 301.05575 130.86235 -3362.0784 -2914.2808 -139.22921 11488.089 + 70 295.62185 130.74887 -3358.2958 -2918.5807 -30.488671 11488.062 + 80 291.00386 130.62365 -3354.1217 -2921.2755 71.019816 11488.035 + 90 287.64085 130.55052 -3351.684 -2923.84 136.24318 11488.009 + 100 285.69508 130.52063 -3350.6878 -2925.738 167.78531 11487.986 + 110 285.01446 130.47978 -3349.3259 -2925.3885 187.55076 11487.966 + 120 285.5602 130.50213 -3350.0709 -2925.3218 166.71919 11487.95 + 130 287.3227 130.55196 -3351.7321 -2924.3613 117.78775 11487.937 + 140 290.25957 130.63244 -3354.4147 -2922.6756 40.299682 11487.926 + 150 294.42021 130.70913 -3356.9709 -2919.0432 -49.667147 11487.916 + 160 299.06616 130.7992 -3359.9734 -2915.1351 -148.10202 11487.905 + 170 302.27193 130.85741 -3361.9138 -2912.3072 -219.90367 11487.892 + 180 303.13784 130.86602 -3362.2007 -2911.3062 -249.85871 11487.874 + 190 302.80628 130.87507 -3362.5024 -2912.101 -267.67656 11487.852 + 200 303.33832 130.87333 -3362.4442 -2911.2515 -285.77881 11487.825 + 210 305.5023 130.89943 -3363.3144 -2908.9029 -322.79381 11487.792 + 220 307.85065 130.92123 -3364.0411 -2906.1366 -351.67602 11487.753 + 230 308.22104 130.93218 -3364.406 -2905.9506 -348.91637 11487.707 + 240 305.74564 130.91929 -3363.9764 -2909.2029 -303.52406 11487.654 + 250 301.78413 130.84598 -3361.5326 -2912.6516 -216.19532 11487.596 + 260 298.46788 130.79715 -3359.9048 -2915.9565 -141.52764 11487.534 + 270 296.63349 130.7376 -3357.92 -2916.7002 -78.205751 11487.469 + 280 295.89932 130.72322 -3357.4406 -2917.3128 -46.177821 11487.402 + 290 295.70212 130.70344 -3356.7815 -2916.947 -22.759891 11487.335 + 300 296.08102 130.69483 -3356.4942 -2916.0962 -14.958212 11487.267 + 310 297.44455 130.7448 -3358.1599 -2915.7337 -42.692278 11487.199 + 320 299.60732 130.81428 -3360.4761 -2914.833 -85.527147 11487.13 + 330 301.41392 130.84528 -3361.5094 -2913.179 -109.23287 11487.059 + 340 301.95602 130.87141 -3362.3802 -2913.2435 -116.52949 11486.987 + 350 301.04313 130.87719 -3362.573 -2914.7942 -99.370101 11486.912 + 360 298.75171 130.86396 -3362.1319 -2917.7614 -61.477019 11486.836 + 370 295.31919 130.80085 -3360.0283 -2920.7634 2.7109805 11486.758 + 380 292.01147 130.74417 -3358.1391 -2923.7942 56.679543 11486.68 + 390 291.02267 130.76412 -3358.8039 -2925.9297 52.137334 11486.604 + 400 293.47704 130.83929 -3361.3097 -2924.7849 -10.620913 11486.528 + 410 298.04072 130.90419 -3363.4731 -2920.1601 -92.539319 11486.453 + 420 302.65763 130.9386 -3364.6201 -2914.4398 -161.34189 11486.375 + 430 305.58577 130.94802 -3364.9342 -2910.3985 -199.69054 11486.295 + 440 306.15192 130.96265 -3365.4217 -2910.0439 -210.24137 11486.211 + 450 304.70394 130.95 -3365.0002 -2911.7761 -185.30124 11486.122 + 460 302.72281 130.89534 -3363.178 -2912.9008 -135.5125 11486.031 + 470 301.565 130.87256 -3362.4188 -2913.8637 -103.42872 11485.936 + 480 301.09466 130.88295 -3362.7649 -2914.9094 -90.679055 11485.84 + 490 300.36785 130.88439 -3362.8128 -2916.0384 -75.984213 11485.742 + 500 299.4194 130.85387 -3361.7955 -2916.4318 -50.507149 11485.642 + 510 299.01319 130.85365 -3361.7883 -2917.0289 -42.892731 11485.542 + 520 299.3401 130.84795 -3361.5982 -2916.3525 -40.095555 11485.441 + 530 299.80119 130.82122 -3360.7075 -2914.7759 -29.794671 11485.339 + 540 299.83338 130.81373 -3360.4575 -2914.4781 -22.047344 11485.237 + 550 299.4037 130.80908 -3360.3026 -2914.9623 -9.5687775 11485.134 + 560 298.26531 130.81036 -3360.3454 -2916.6984 6.360308 11485.031 + 570 296.7951 130.74597 -3358.1989 -2916.7387 46.627791 11484.928 + 580 296.86944 130.75106 -3358.3685 -2916.7977 42.347178 11484.827 + 590 299.99401 130.76813 -3358.9377 -2912.7193 1.8547224 11484.726 + 600 305.29691 130.90657 -3363.5524 -2909.4464 -108.7678 11484.625 + 610 310.53262 131.03199 -3367.7329 -2905.8391 -214.98857 11484.522 + 620 313.26727 131.11668 -3370.5559 -2904.5946 -282.25441 11484.415 + 630 312.85585 131.1203 -3370.6767 -2905.3274 -288.19236 11484.303 + 640 311.07096 131.06565 -3368.8551 -2906.1606 -255.80573 11484.185 + 650 309.45998 131.02234 -3367.4112 -2907.1129 -224.31879 11484.062 + 660 307.97753 131.01314 -3367.1047 -2909.0115 -203.40277 11483.936 + 670 306.45975 130.95203 -3365.0678 -2909.2321 -160.61231 11483.805 + 680 305.20067 130.95516 -3365.1719 -2911.2091 -144.94169 11483.671 + 690 304.53788 130.9068 -3363.5599 -2910.5829 -116.21585 11483.534 + 700 303.76824 130.89976 -3363.3253 -2911.493 -104.4867 11483.395 + 710 302.08413 130.82356 -3360.7855 -2911.4582 -59.828868 11483.254 + 720 299.94321 130.81746 -3360.5821 -2914.4393 -37.62374 11483.112 + 730 298.41328 130.79559 -3359.8529 -2915.9857 -15.228868 11482.97 + 740 298.24134 130.78445 -3359.4815 -2915.8701 -8.1016813 11482.827 + 750 299.35763 130.83783 -3361.261 -2915.9892 -34.63294 11482.684 + 760 300.68466 130.88347 -3362.7823 -2915.5367 -56.018851 11482.54 + 770 300.46889 130.91881 -3363.9604 -2917.0357 -58.629961 11482.395 + 780 298.60969 130.84715 -3361.5716 -2917.4123 -6.2774696 11482.25 + 790 296.34564 130.80308 -3360.1027 -2919.3111 38.355568 11482.104 + 800 294.51417 130.78329 -3359.4431 -2921.3756 69.012038 11481.959 + 810 292.88964 130.75567 -3358.5224 -2922.8713 100.11987 11481.815 + 820 291.00259 130.68345 -3356.1148 -2923.2705 149.04294 11481.674 + 830 289.35715 130.65539 -3355.1797 -2924.7829 177.16152 11481.535 + 840 288.98728 130.64016 -3354.6719 -2924.8252 184.02622 11481.399 + 850 290.93491 130.73708 -3357.9026 -2925.159 120.8681 11481.268 + 860 295.66458 130.80129 -3360.0429 -2920.2643 38.832849 11481.138 + 870 302.5264 130.93467 -3364.4891 -2914.504 -92.377319 11481.009 + 880 309.52977 131.07094 -3369.0313 -2908.6292 -225.81971 11480.879 + 890 314.79857 131.22892 -3374.2973 -2906.0583 -345.569 11480.744 + 900 317.06683 131.29681 -3376.5605 -2904.9476 -394.53967 11480.603 + 910 315.46623 131.26165 -3375.3882 -2906.1561 -358.20311 11480.454 + 920 309.86806 131.12874 -3370.9581 -2910.0529 -239.56321 11480.299 + 930 301.86853 130.97293 -3365.7644 -2916.7579 -84.084273 11480.139 + 940 293.87732 130.75829 -3358.6098 -2921.4896 99.699044 11479.978 + 950 287.08521 130.63024 -3354.3413 -2927.3238 241.77818 11479.819 + 960 281.87799 130.53697 -3351.2325 -2931.9603 352.23356 11479.664 + 970 278.97756 130.50825 -3350.2749 -2935.317 408.09623 11479.516 + 980 279.5637 130.51545 -3350.5149 -2934.6851 405.94257 11479.376 + 990 285.08084 130.59476 -3353.1586 -2929.1224 318.75105 11479.244 + 1000 295.23442 130.78403 -3359.4678 -2920.3289 140.84436 11479.118 + 1010 306.39662 131.00504 -3366.8348 -2911.093 -57.300039 11478.994 + 1020 313.99268 131.17269 -3372.4229 -2905.3825 -196.39271 11478.87 + 1030 316.09081 131.17397 -3372.4657 -2902.3045 -212.08799 11478.742 + 1040 313.79676 131.11716 -3370.5719 -2903.823 -154.55598 11478.611 + 1050 309.25763 131.03639 -3367.8795 -2907.8822 -60.074771 11478.476 + 1060 304.39064 130.9296 -3364.3202 -2911.5621 49.662879 11478.34 + 1070 300.46253 130.88855 -3362.9515 -2916.0363 125.17653 11478.205 + 1080 297.4991 130.81897 -3360.6323 -2918.125 196.95214 11478.073 + 1090 295.54632 130.78088 -3359.3627 -2919.76 239.22128 11477.944 + 1100 295.40528 130.78588 -3359.5295 -2920.1365 238.50053 11477.82 + 1110 297.95408 130.85115 -3361.7049 -2918.5208 183.78695 11477.701 + 1120 302.00232 130.9575 -3365.2501 -2916.0445 97.189305 11477.585 + 1130 304.60024 131.02776 -3367.5921 -2914.5223 37.346294 11477.471 + 1140 303.80623 131.0082 -3366.9401 -2915.0513 44.708909 11477.358 + 1150 300.46145 130.90347 -3363.449 -2916.5353 112.27252 11477.246 + 1160 296.94083 130.8387 -3361.29 -2919.613 172.17491 11477.136 + 1170 294.37574 130.7611 -3358.7032 -2920.8416 229.88782 11477.03 + 1180 292.71093 130.70489 -3356.8296 -2921.4443 265.86124 11476.928 + 1190 292.90482 130.70449 -3356.8165 -2921.1428 254.19865 11476.831 + 1200 296.4057 130.74874 -3358.2915 -2917.4104 187.0124 11476.739 + 1210 302.45194 130.92539 -3364.1797 -2914.3053 40.161125 11476.651 + 1220 308.3282 131.03577 -3367.8591 -2909.2443 -82.068523 11476.563 + 1230 311.66885 131.11256 -3370.4188 -2906.835 -163.94032 11476.475 + 1240 311.46467 131.10841 -3370.2803 -2907.0002 -175.20411 11476.383 + 1250 308.80936 131.07626 -3369.2086 -2909.8781 -142.6043 11476.287 + 1260 305.85569 130.98704 -3366.2346 -2911.2974 -76.438773 11476.19 + 1270 304.35502 130.97354 -3365.7847 -2913.0797 -45.262845 11476.09 + 1280 303.96208 130.99745 -3366.5817 -2914.4612 -35.282643 11475.99 + 1290 303.32656 130.98902 -3366.3007 -2915.1254 -10.999505 11475.889 + 1300 301.90051 130.93357 -3364.4524 -2915.3982 37.401199 11475.788 + 1310 300.25344 130.85018 -3361.6726 -2915.0684 96.658435 11475.688 + 1320 299.48263 130.84699 -3361.5664 -2916.1087 115.8203 11475.59 + 1330 299.78757 130.84109 -3361.3697 -2915.4584 123.59173 11475.494 + 1340 300.29823 130.88265 -3362.7551 -2916.0843 107.88605 11475.4 + 1350 300.14288 130.89152 -3363.0508 -2916.611 106.87383 11475.309 + 1360 298.8976 130.83568 -3361.1895 -2916.6019 135.89122 11475.219 + 1370 296.68816 130.80841 -3360.2802 -2918.9791 158.16094 11475.133 + 1380 294.36704 130.78293 -3359.4309 -2921.5823 177.02215 11475.049 + 1390 292.94543 130.76316 -3358.7718 -2923.0377 182.42177 11474.969 + 1400 292.85338 130.76531 -3358.8438 -2923.2466 166.77089 11474.892 + 1410 293.5964 130.77579 -3359.1929 -2922.4905 140.56649 11474.819 + 1420 294.92318 130.78177 -3359.3925 -2920.7166 111.83731 11474.748 + 1430 297.41943 130.83024 -3361.008 -2918.6192 59.562034 11474.68 + 1440 301.23374 130.86102 -3362.0338 -2913.9715 6.214788 11474.613 + 1450 305.13101 130.90918 -3363.6393 -2909.7801 -48.828293 11474.546 + 1460 307.22712 130.96576 -3365.5255 -2908.5484 -85.101507 11474.478 + 1470 306.96483 131.00001 -3366.6671 -2910.0802 -86.139204 11474.409 + 1480 305.8449 130.98596 -3366.1988 -2911.2776 -55.589876 11474.338 + 1490 305.6905 131.00263 -3366.7544 -2912.063 -40.882616 11474.266 + 1500 306.57141 131.0317 -3367.7234 -2911.7217 -37.617278 11474.193 + 1510 306.95952 131.05113 -3368.3709 -2911.7918 -27.410295 11474.12 + 1520 305.73248 131.0509 -3368.3635 -2913.6096 1.195061 11474.046 + 1530 303.03251 130.99439 -3366.4796 -2915.7417 59.099761 11473.972 + 1540 300.15414 130.94623 -3364.8742 -2918.4176 108.88078 11473.899 + 1550 298.85693 130.90522 -3363.5073 -2918.9802 135.20102 11473.829 + 1560 300.13411 130.91829 -3363.9432 -2917.5164 112.29006 11473.761 + 1570 303.22588 130.98259 -3366.0865 -2915.0609 50.329011 11473.696 + 1580 305.89685 130.98743 -3366.2478 -2911.2494 14.365342 11473.631 + 1590 306.23031 130.99796 -3366.5988 -2911.1044 1.2446683 11473.567 + 1600 303.60667 130.90752 -3363.5842 -2911.9923 59.354855 11473.503 + 1610 299.03827 130.7833 -3359.4433 -2914.6466 152.13579 11473.441 + 1620 294.81902 130.65398 -3355.1326 -2916.6117 247.34217 11473.381 + 1630 292.42753 130.63922 -3354.6405 -2919.6767 282.09527 11473.326 + 1640 292.09794 130.67238 -3355.7459 -2921.2723 272.26409 11473.276 + 1650 294.32074 130.71165 -3357.0549 -2919.2751 228.26827 11473.232 + 1660 299.26386 130.81579 -3360.5264 -2915.3941 128.57357 11473.192 + 1670 305.64564 130.96379 -3365.4596 -2910.8349 -1.8828442 11473.154 + 1680 310.71597 131.07069 -3369.0229 -2906.8564 -100.61886 11473.117 + 1690 311.98908 131.07991 -3369.3304 -2905.2703 -120.24223 11473.077 + 1700 309.11038 131.04763 -3368.2542 -2908.4759 -77.354272 11473.036 + 1710 303.97711 130.95966 -3365.3219 -2913.179 14.978929 11472.993 + 1720 299.01252 130.89874 -3363.2915 -2918.533 101.51893 11472.951 + 1730 295.30596 130.81496 -3360.4985 -2921.2533 185.79882 11472.91 + 1740 293.43947 130.75915 -3358.6385 -2922.1695 241.06449 11472.874 + 1750 293.66468 130.77555 -3359.185 -2922.381 245.47771 11472.841 + 1760 295.6381 130.82202 -3360.7341 -2920.9949 216.20049 11472.814 + 1770 298.29248 130.83443 -3361.1476 -2917.4601 188.52175 11472.79 + 1780 300.34955 130.91017 -3363.6722 -2916.925 138.40069 11472.77 + 1790 301.23848 130.87638 -3362.546 -2914.4766 138.99244 11472.753 + 1800 300.88534 130.82629 -3360.8762 -2913.3321 157.35644 11472.739 + 1810 299.3665 130.80872 -3360.2907 -2915.0057 175.12057 11472.728 + 1820 297.0091 130.74328 -3358.1093 -2916.3307 218.64624 11472.72 + 1830 294.54143 130.72697 -3357.5657 -2919.4576 244.70058 11472.716 + 1840 292.15559 130.71232 -3357.0772 -2922.5179 269.23523 11472.718 + 1850 289.60023 130.65095 -3355.0317 -2924.2733 310.65443 11472.724 + 1860 287.41209 130.61247 -3353.749 -2926.2453 336.4858 11472.737 + 1870 287.17473 130.58419 -3352.8065 -2925.6558 338.1041 11472.755 + 1880 290.02881 130.62781 -3354.2603 -2922.8644 282.10412 11472.781 + 1890 295.29776 130.73811 -3357.9371 -2918.7041 177.50768 11472.812 + 1900 300.76857 130.86426 -3362.1419 -2914.7714 64.474646 11472.846 + 1910 304.49633 130.92492 -3364.1639 -2911.2487 -6.1656434 11472.882 + 1920 305.80688 130.93859 -3364.6198 -2909.7552 -32.003556 11472.918 + 1930 305.17371 130.94209 -3364.7364 -2910.8136 -28.799682 11472.953 + 1940 303.71908 130.93745 -3364.5815 -2912.8224 -8.2385492 11472.988 + 1950 302.77768 130.91517 -3363.8389 -2913.48 20.026775 11473.023 + 1960 303.11865 130.90997 -3363.6657 -2912.7996 33.465685 11473.058 + 1970 304.57575 130.92261 -3364.087 -2911.0537 30.128888 11473.094 + 1980 306.11061 130.95654 -3365.2181 -2909.9018 16.371808 11473.131 + 1990 306.46526 130.97145 -3365.715 -2909.8711 18.380298 11473.168 + 2000 304.83747 130.88071 -3362.6904 -2909.2678 74.697763 11473.205 + 2010 301.69967 130.87889 -3362.6298 -2913.8744 106.07778 11473.244 + 2020 299.17067 130.79034 -3359.6779 -2914.6842 159.02318 11473.286 + 2030 299.51037 130.82078 -3360.6926 -2915.1937 138.17309 11473.33 + 2040 301.5266 130.87994 -3362.6648 -2914.1668 89.963429 11473.377 + 2050 301.99504 130.905 -3363.5001 -2914.3053 66.472441 11473.425 + 2060 300.14589 130.81785 -3360.5951 -2914.1509 102.80165 11473.476 + 2070 298.71217 130.76247 -3358.7489 -2914.4372 121.482 11473.528 + 2080 299.91031 130.76625 -3358.8749 -2912.7811 95.062776 11473.583 + 2090 302.49571 130.82068 -3360.6893 -2910.7498 41.01224 11473.639 + 2100 303.13383 130.83722 -3361.2408 -2910.3522 23.427206 11473.697 + 2110 300.40954 130.77251 -3359.0836 -2912.2472 71.576117 11473.755 + 2120 296.14255 130.6542 -3355.1401 -2914.6505 158.29426 11473.815 + 2130 293.09838 130.63075 -3354.3585 -2918.3968 200.9025 11473.877 + 2140 292.64931 130.65767 -3355.2557 -2919.962 201.04168 11473.944 + 2150 294.87742 130.70895 -3356.9652 -2918.3573 166.24166 11474.014 + 2160 298.87969 130.83426 -3361.142 -2916.5811 86.526015 11474.088 + 2170 303.45681 130.91954 -3363.9846 -2912.6156 19.146581 11474.163 + 2180 307.56553 130.99976 -3366.6585 -2909.1781 -36.644732 11474.239 + 2190 310.02966 131.05186 -3368.3953 -2907.2497 -62.166387 11474.314 + 2200 310.28177 131.07278 -3369.0928 -2907.5722 -54.59024 11474.389 + 2210 309.15718 131.06293 -3368.7644 -2908.9166 -24.708919 11474.462 + 2220 307.97358 131.05989 -3368.6629 -2910.5755 -3.3583599 11474.535 + 2230 307.65448 131.06582 -3368.8606 -2911.2479 -1.6211 11474.608 + 2240 308.22298 131.04964 -3368.3214 -2909.8631 -7.9527772 11474.681 + 2250 309.32404 131.09118 -3369.7059 -2909.6098 -45.66514 11474.754 + 2260 310.15986 131.05666 -3368.5555 -2907.2162 -52.914932 11474.826 + 2270 309.78841 131.04592 -3368.1974 -2907.4106 -55.647873 11474.897 + 2280 307.7735 131.03083 -3367.6942 -2909.9045 -37.650301 11474.967 + 2290 304.74577 131.01176 -3367.0585 -2913.7723 -4.582535 11475.037 + 2300 301.49445 130.9293 -3364.3099 -2915.8597 57.698577 11475.107 + 2310 297.881 130.83192 -3361.0641 -2917.9887 129.32173 11475.177 + 2320 293.52211 130.74771 -3358.2571 -2921.6652 198.8544 11475.251 + 2330 289.25398 130.6613 -3355.3767 -2925.1334 259.51123 11475.328 + 2340 287.50158 130.61982 -3353.9941 -2926.3572 270.71895 11475.411 + 2350 290.18977 130.67254 -3355.7512 -2924.1159 198.68119 11475.498 + 2360 296.3715 130.78931 -3359.6436 -2918.8135 67.296756 11475.589 + 2370 302.48158 130.90474 -3363.4915 -2913.5731 -63.023443 11475.682 + 2380 305.55626 130.94929 -3364.9764 -2910.4847 -133.58402 11475.774 + 2390 305.06472 130.93626 -3364.5418 -2910.7812 -139.04918 11475.864 + 2400 302.32605 130.89524 -3363.1745 -2913.4874 -101.51258 11475.951 + 2410 299.20708 130.84293 -3361.4311 -2916.3832 -47.837723 11476.036 + 2420 297.203 130.8321 -3361.0701 -2919.0031 -15.463524 11476.12 + 2430 296.73469 130.81561 -3360.5204 -2919.15 4.0741132 11476.204 + 2440 297.75847 130.83601 -3361.2004 -2918.3072 -9.8604944 11476.288 + 2450 299.96908 130.95058 -3365.0194 -2918.8381 -75.48141 11476.372 + 2460 302.42452 130.94371 -3364.7902 -2914.9567 -101.23698 11476.455 + 2470 304.77245 130.9939 -3366.4635 -2913.1376 -150.61701 11476.536 + 2480 307.33171 131.03635 -3367.8784 -2910.7458 -200.31856 11476.614 + 2490 310.10497 131.1068 -3370.2266 -2908.9689 -261.0083 11476.688 + 2500 312.1335 131.14702 -3371.5673 -2907.2924 -298.61384 11476.757 + 2510 311.75944 131.10892 -3370.2973 -2906.5787 -278.09336 11476.821 + 2520 307.90347 131.00295 -3366.7652 -2908.7821 -193.38869 11476.879 + 2530 301.46283 130.88613 -3362.871 -2914.4679 -74.979163 11476.934 + 2540 294.83614 130.73429 -3357.8096 -2919.2632 61.98391 11476.987 + 2550 290.19621 130.63296 -3354.432 -2922.7871 159.34756 11477.042 + 2560 289.26187 130.63136 -3354.3786 -2924.1235 177.55175 11477.1 + 2570 292.74168 130.72332 -3357.4441 -2922.013 108.83249 11477.161 + 2580 299.59155 130.83356 -3361.1188 -2915.4991 -6.7110008 11477.224 + 2590 306.86704 131.01538 -3367.1794 -2910.7379 -158.98516 11477.288 + 2600 311.48564 131.09435 -3369.8118 -2906.5005 -248.29721 11477.348 + 2610 311.88958 131.14245 -3371.4149 -2907.5028 -279.43866 11477.404 + 2620 307.76879 131.09448 -3369.8161 -2912.0333 -220.33351 11477.454 + 2630 300.02705 130.89709 -3363.2363 -2916.9688 -61.29918 11477.5 + 2640 291.53337 130.74048 -3358.016 -2924.3823 94.448801 11477.546 + 2650 285.66978 130.62737 -3354.2456 -2929.3335 209.40953 11477.593 + 2660 283.71498 130.62171 -3354.0569 -2932.0524 241.44526 11477.644 + 2670 285.16496 130.64659 -3354.8865 -2930.7252 219.5018 11477.7 + 2680 289.69134 130.7589 -3358.6301 -2927.7362 125.80168 11477.76 + 2690 296.39671 130.89793 -3363.2642 -2922.3966 -4.044639 11477.823 + 2700 302.70772 130.98601 -3366.2003 -2915.9455 -112.41764 11477.885 + 2710 306.21308 131.03133 -3367.7108 -2912.2421 -176.3084 11477.946 + 2720 306.58537 131.05111 -3368.3704 -2912.3479 -195.88015 11478.003 + 2730 304.81136 131.03699 -3367.8995 -2914.5157 -176.33187 11478.057 + 2740 302.11262 130.95355 -3365.1182 -2915.7485 -115.45847 11478.107 + 2750 299.17119 130.86547 -3362.1823 -2917.1878 -46.555747 11478.155 + 2760 296.28047 130.84152 -3361.3839 -2920.6891 -2.4143151 11478.203 + 2770 294.10063 130.76827 -3358.9425 -2921.4901 51.715754 11478.25 + 2780 293.35974 130.7585 -3358.6168 -2922.2664 65.084083 11478.298 + 2790 294.3672 130.83411 -3361.1371 -2923.2882 27.210865 11478.348 + 2800 296.91863 130.86872 -3362.2907 -2920.6467 -11.436148 11478.398 + 2810 300.43642 130.93793 -3364.5978 -2917.7214 -72.939042 11478.448 + 2820 303.75796 131.00341 -3366.7804 -2914.9635 -129.86307 11478.497 + 2830 305.43691 130.99351 -3366.4502 -2912.1359 -138.8722 11478.543 + 2840 304.32854 130.94508 -3364.836 -2912.1703 -103.5191 11478.587 + 2850 300.76892 130.88441 -3362.8136 -2915.4426 -38.819536 11478.63 + 2860 296.72672 130.84196 -3361.3985 -2920.04 23.508817 11478.671 + 2870 293.96011 130.80472 -3360.1573 -2922.9139 69.227868 11478.713 + 2880 292.78904 130.77089 -3359.0296 -2923.5281 92.541744 11478.757 + 2890 292.99538 130.73617 -3357.8722 -2922.0638 93.286645 11478.802 + 2900 295.20791 130.80362 -3360.1207 -2921.0213 28.151935 11478.849 + 2910 299.32216 130.90023 -3363.3411 -2918.1221 -68.773951 11478.897 + 2920 303.02174 130.93359 -3364.4531 -2913.7312 -136.10445 11478.943 + 2930 304.34696 130.96411 -3365.4704 -2912.7774 -175.20189 11478.987 + 2940 303.7276 130.90933 -3363.6443 -2911.8725 -154.66599 11479.028 + 2950 302.1532 130.9268 -3364.2268 -2914.7968 -142.37405 11479.066 + 2960 299.99771 130.83823 -3361.2743 -2915.0505 -76.965317 11479.101 + 2970 297.9876 130.81967 -3360.6556 -2917.4216 -34.914508 11479.134 + 2980 297.09445 130.84644 -3361.5481 -2919.6427 -19.451046 11479.167 + 2990 297.66828 130.90179 -3363.3931 -2920.6341 -29.121314 11479.2 + 3000 298.7181 130.89932 -3363.3108 -2918.9903 -24.285938 11479.232 + 3010 299.0098 130.90971 -3363.657 -2918.9026 -22.182029 11479.264 + 3020 298.28074 130.88048 -3362.6827 -2919.0127 -1.0821783 11479.296 + 3030 297.0716 130.8854 -3362.8465 -2920.975 5.7985467 11479.327 + 3040 296.16681 130.8528 -3361.7601 -2921.2344 17.618492 11479.359 + 3050 296.61863 130.86238 -3362.0793 -2920.8815 -5.0533974 11479.391 + 3060 298.88076 130.91903 -3363.9678 -2919.4053 -65.659031 11479.423 + 3070 301.66578 130.95833 -3365.2777 -2916.5727 -123.02088 11479.454 + 3080 302.52079 131.01129 -3367.0428 -2917.0661 -164.61217 11479.483 + 3090 301.11032 130.92933 -3364.3109 -2916.4321 -128.84217 11479.509 + 3100 299.63105 130.84997 -3361.6655 -2915.987 -85.460877 11479.532 + 3110 299.42967 130.83902 -3361.3005 -2915.9216 -71.054569 11479.554 + 3120 299.67316 130.89193 -3363.0643 -2917.3232 -77.718472 11479.574 + 3130 299.65925 130.89636 -3363.2122 -2917.4917 -57.170356 11479.593 + 3140 299.34393 130.88709 -3362.903 -2917.6516 -25.782567 11479.611 + 3150 298.35178 130.86863 -3362.2877 -2918.5121 15.361343 11479.628 + 3160 296.26907 130.84468 -3361.4895 -2920.8117 64.970352 11479.646 + 3170 293.5713 130.74561 -3358.187 -2921.522 139.41891 11479.666 + 3180 291.86165 130.75248 -3358.4159 -2924.2938 151.47297 11479.688 + 3190 293.33675 130.77405 -3359.1351 -2922.819 116.04533 11479.713 + 3200 299.2326 130.88161 -3362.7204 -2917.6346 -1.4187391 11479.74 + 3210 307.71653 131.05955 -3368.6517 -2910.9467 -171.77245 11479.767 + 3220 314.77001 131.17029 -3372.3428 -2904.1463 -301.0368 11479.79 + 3230 317.82191 131.23858 -3374.6194 -2901.8834 -369.6049 11479.809 + 3240 316.74953 131.24222 -3374.7407 -2903.5998 -364.48531 11479.82 + 3250 313.09527 131.14033 -3371.3443 -2905.6389 -282.8935 11479.824 + 3260 308.46587 131.05905 -3368.6349 -2909.8153 -191.24287 11479.824 + 3270 303.72023 130.97546 -3365.8488 -2914.088 -95.248106 11479.819 + 3280 299.15178 130.91918 -3363.9728 -2919.0072 -14.495937 11479.813 + 3290 295.04318 130.84822 -3361.6074 -2922.753 60.195411 11479.806 + 3300 292.01842 130.79049 -3359.6831 -2925.3278 111.54019 11479.801 + 3310 290.52318 130.71907 -3357.3022 -2925.171 146.88153 11479.798 + 3320 290.42542 130.73786 -3357.9286 -2925.9428 129.48514 11479.798 + 3330 291.568 130.80843 -3360.2809 -2926.5956 79.888951 11479.8 + 3340 293.60282 130.86087 -3362.029 -2925.317 31.931234 11479.803 + 3350 295.7318 130.88352 -3362.7841 -2922.9054 -1.2153566 11479.808 + 3360 297.58722 130.91269 -3363.7564 -2921.118 -30.068107 11479.812 + 3370 299.54946 130.94795 -3364.9316 -2919.3745 -56.975377 11479.816 + 3380 301.42185 130.97356 -3365.7855 -2917.4433 -75.659903 11479.819 + 3390 302.05306 130.97105 -3365.7018 -2916.4208 -72.77122 11479.82 + 3400 300.40417 130.92411 -3364.1372 -2917.3087 -35.829416 11479.821 + 3410 296.26066 130.85227 -3361.7423 -2921.077 26.223681 11479.82 + 3420 290.73171 130.70224 -3356.7414 -2924.3 122.24943 11479.82 + 3430 285.76751 130.61453 -3353.8177 -2928.7601 180.12679 11479.823 + 3440 283.7407 130.56811 -3352.2703 -2930.2275 186.40432 11479.829 + 3450 286.76354 130.65915 -3355.3049 -2928.7658 91.43927 11479.839 + 3460 293.42258 130.79738 -3359.9126 -2923.4687 -51.518599 11479.85 + 3470 299.95193 130.92524 -3364.1746 -2918.0188 -182.31788 11479.86 + 3480 304.11798 130.99535 -3366.5116 -2914.1591 -256.09455 11479.868 + 3490 305.03989 131.02597 -3367.5323 -2913.8086 -269.23029 11479.87 + 3500 303.01655 130.98347 -3366.1156 -2915.4014 -213.6144 11479.867 + 3510 299.61043 130.92192 -3364.064 -2918.4162 -130.15675 11479.861 + 3520 296.80003 130.87593 -3362.5308 -2921.0633 -58.90307 11479.852 + 3530 296.08565 130.8629 -3362.0966 -2921.6917 -25.652986 11479.841 + 3540 297.7315 130.92342 -3364.1141 -2921.2611 -51.471 11479.831 + 3550 300.76188 131.01095 -3367.0317 -2919.6712 -108.18423 11479.819 + 3560 302.99404 131.0356 -3367.8533 -2917.1726 -138.56298 11479.806 + 3570 302.8702 131.03795 -3367.9317 -2917.4352 -140.30855 11479.789 + 3580 301.21849 131.00268 -3366.756 -2918.7163 -113.16149 11479.771 + 3590 300.31923 131.00967 -3366.989 -2920.2869 -106.2352 11479.75 + 3600 300.80325 131.02003 -3367.3344 -2919.9124 -108.94713 11479.727 + 3610 301.14244 130.99991 -3366.6637 -2918.7371 -95.105108 11479.702 + 3620 300.05559 130.92846 -3364.2819 -2917.9719 -45.399787 11479.675 + 3630 297.94069 130.88029 -3362.6765 -2919.5123 7.9145598 11479.648 + 3640 295.96781 130.87041 -3362.347 -2922.1173 45.581116 11479.621 + 3650 294.71305 130.81528 -3360.5093 -2922.146 89.403749 11479.595 + 3660 294.19875 130.82981 -3360.9938 -2923.3954 91.611011 11479.57 + 3670 294.44084 130.8196 -3360.6532 -2922.6948 86.169104 11479.547 + 3680 295.43683 130.84379 -3361.4596 -2922.0197 48.973182 11479.526 + 3690 297.90051 130.86104 -3362.0345 -2918.9301 -6.6180676 11479.506 + 3700 301.93871 130.96026 -3365.3419 -2916.231 -108.25614 11479.485 + 3710 305.34973 131.02375 -3367.4585 -2913.2739 -186.14295 11479.463 + 3720 305.99126 131.04896 -3368.2986 -2913.1598 -216.49993 11479.437 + 3730 304.13309 130.99394 -3366.4646 -2914.0896 -186.52294 11479.408 + 3740 301.75982 130.99593 -3366.5309 -2917.686 -167.55228 11479.374 + 3750 300.5013 130.95653 -3365.2178 -2918.2449 -139.84654 11479.338 + 3760 300.8053 130.9555 -3365.1832 -2917.7581 -139.85413 11479.299 + 3770 302.10779 130.96613 -3365.5377 -2916.1753 -151.07312 11479.258 + 3780 302.93292 131.01089 -3367.0295 -2916.4398 -167.58837 11479.213 + 3790 301.79914 130.95433 -3365.1442 -2916.2409 -125.39602 11479.166 + 3800 298.59542 130.92445 -3364.1482 -2920.0102 -73.755956 11479.116 + 3810 294.45779 130.8764 -3362.5467 -2924.5631 -9.5283189 11479.064 + 3820 291.18165 130.81367 -3360.4555 -2927.3449 46.749383 11479.013 + 3830 290.71614 130.82919 -3360.9731 -2928.5548 39.475256 11478.962 + 3840 294.29087 130.86414 -3362.1379 -2924.4026 -18.925024 11478.912 + 3850 301.00088 130.99575 -3366.525 -2918.809 -145.30363 11478.862 + 3860 308.11767 131.12371 -3370.7903 -2912.4887 -273.02479 11478.809 + 3870 312.82048 131.22835 -3374.2783 -2908.9815 -362.99836 11478.751 + 3880 312.65898 131.22627 -3374.2091 -2909.1526 -358.25591 11478.686 + 3890 306.66293 131.14191 -3371.3972 -2915.2593 -262.44889 11478.614 + 3900 296.82227 130.98663 -3366.2208 -2924.7202 -102.3413 11478.537 + 3910 287.2544 130.80535 -3360.1783 -2932.9091 61.826448 11478.458 + 3920 282.43822 130.68901 -3356.3005 -2936.195 152.36422 11478.38 + 3930 284.34417 130.71125 -3357.0416 -2934.1012 125.57177 11478.305 + 3940 290.39852 130.83391 -3361.1303 -2929.1845 20.650974 11478.233 + 3950 295.95003 130.89164 -3363.0547 -2922.8514 -55.290362 11478.161 + 3960 297.73309 130.8957 -3363.1899 -2920.3345 -77.315317 11478.088 + 3970 295.98671 130.87449 -3362.4831 -2922.2253 -60.81939 11478.014 + 3980 294.0053 130.80528 -3360.176 -2922.8654 -28.522737 11477.938 + 3990 294.99199 130.82537 -3360.8455 -2922.0673 -61.305861 11477.862 + 4000 298.85366 130.8936 -3363.1199 -2918.5977 -141.73373 11477.785 + 4010 302.78774 131.02642 -3367.5473 -2917.1735 -248.53999 11477.705 + 4020 304.23424 131.07525 -3369.175 -2916.6496 -298.60815 11477.62 + 4030 302.61544 131.08804 -3369.6013 -2919.4838 -302.14922 11477.53 + 4040 299.71969 131.03251 -3367.7505 -2921.9401 -261.03287 11477.434 + 4050 298.18422 130.99171 -3366.3903 -2922.8639 -229.71196 11477.332 + 4060 298.8956 130.99476 -3366.492 -2921.9075 -227.22972 11477.227 + 4070 299.99908 131.09221 -3369.7404 -2923.5145 -255.3286 11477.117 + 4080 298.35947 131.02966 -3367.6553 -2923.8682 -192.17463 11477.002 + 4090 292.75349 130.95292 -3365.0973 -2929.6487 -89.28638 11476.884 + 4100 285.93508 130.83286 -3361.0953 -2935.7885 33.105452 11476.764 + 4110 282.7985 130.75682 -3358.5605 -2937.9192 94.135627 11476.644 + 4120 285.8656 130.75418 -3358.4725 -2933.2691 62.810862 11476.526 + 4130 292.20652 130.8824 -3362.7467 -2928.1116 -54.607253 11476.41 + 4140 296.9924 131.00788 -3366.9293 -2925.1756 -158.73439 11476.292 + 4150 298.0388 131.00928 -3366.9761 -2923.6659 -177.36674 11476.171 + 4160 296.70893 130.97616 -3365.8721 -2924.54 -152.15357 11476.048 + 4170 295.37789 130.98653 -3366.2175 -2926.8653 -132.47649 11475.921 + 4180 294.87064 131.02475 -3367.4915 -2928.8938 -122.72641 11475.792 + 4190 294.67543 131.03748 -3367.9161 -2929.6087 -102.53126 11475.66 + 4200 294.13198 130.99999 -3366.6662 -2929.1672 -61.017319 11475.527 + 4210 293.11419 130.94927 -3364.9756 -2928.9905 -16.980228 11475.392 + 4220 292.6765 130.90978 -3363.6592 -2928.3251 7.5452974 11475.257 + 4230 294.26558 130.95314 -3365.1048 -2927.407 -26.516046 11475.123 + 4240 297.98659 130.9828 -3366.0933 -2922.8608 -80.580238 11474.987 + 4250 302.32563 131.07598 -3369.1995 -2919.513 -165.99612 11474.851 + 4260 305.09156 131.11874 -3370.6246 -2916.824 -215.25056 11474.711 + 4270 304.70351 131.11971 -3370.657 -2917.4337 -214.07107 11474.567 + 4280 301.07235 131.02203 -3367.4011 -2919.5788 -137.78603 11474.419 + 4290 295.34863 130.94669 -3364.8897 -2925.581 -45.016822 11474.268 + 4300 289.62881 130.81235 -3360.4118 -2929.6109 72.508715 11474.117 + 4310 286.15929 130.70998 -3356.9993 -2931.3591 155.63141 11473.966 + 4320 286.17972 130.67548 -3355.8493 -2930.1786 177.1863 11473.819 + 4330 288.9101 130.76856 -3358.9521 -2929.2202 121.15766 11473.675 + 4340 292.34642 130.86752 -3362.2508 -2927.4076 51.526417 11473.534 + 4350 295.54369 130.91838 -3363.9458 -2924.347 0.14273852 11473.393 + 4360 298.4863 130.99982 -3366.6606 -2922.6848 -59.174353 11473.253 + 4370 300.53895 131.05611 -3368.5369 -2921.508 -96.097134 11473.111 + 4380 300.21324 131.00293 -3366.7642 -2920.2197 -64.844663 11472.968 + 4390 296.95771 130.9715 -3365.7166 -2924.0145 -12.024469 11472.824 + 4400 292.16548 130.89831 -3363.2769 -2928.7029 69.460552 11472.679 + 4410 288.49395 130.86663 -3362.2209 -2933.1081 118.90158 11472.536 + 4420 288.16332 130.86447 -3362.149 -2933.5279 117.89137 11472.395 + 4430 291.6436 130.90502 -3363.5005 -2929.7028 57.972156 11472.256 + 4440 297.66952 131.02665 -3367.5551 -2924.7943 -60.502859 11472.118 + 4450 304.09016 131.1206 -3370.6868 -2918.3758 -171.41098 11471.979 + 4460 308.5534 131.21197 -3373.7323 -2914.7825 -258.9135 11471.837 + 4470 309.48587 131.21082 -3373.694 -2913.3573 -273.57827 11471.69 + 4480 306.77989 131.18212 -3372.7372 -2916.4254 -238.64856 11471.538 + 4490 301.56146 131.06895 -3368.9651 -2920.4152 -144.88084 11471.381 + 4500 296.10391 130.94333 -3364.7776 -2924.3455 -44.256514 11471.222 + 4510 292.60484 130.9151 -3363.8367 -2928.6092 -0.72306844 11471.061 + 4520 291.75826 130.87928 -3362.6426 -2928.6743 16.82528 11470.9 + 4530 293.36463 130.93065 -3364.355 -2927.9973 -25.58661 11470.74 + 4540 296.89643 130.9919 -3366.3967 -2924.7858 -91.228856 11470.579 + 4550 301.44559 131.09673 -3369.8911 -2921.5136 -180.95354 11470.416 + 4560 305.45654 131.19907 -3373.3025 -2918.959 -259.91193 11470.25 + 4570 307.37517 131.22514 -3374.1713 -2916.9741 -283.00323 11470.079 + 4580 306.32829 131.20119 -3373.373 -2917.7328 -252.96221 11469.902 + 4590 302.84387 131.11781 -3370.5935 -2920.1362 -171.64318 11469.721 + 4600 298.19488 131.03951 -3367.9838 -2924.4415 -79.787912 11469.536 + 4610 294.12917 130.95675 -3365.2251 -2927.7303 4.3581537 11469.35 + 4620 292.3211 130.87068 -3362.3559 -2927.5504 61.439978 11469.164 + 4630 293.38764 130.93928 -3364.6427 -2928.2508 25.27214 11468.978 + 4640 296.47233 131.0037 -3366.7902 -2925.81 -33.985853 11468.794 + 4650 299.74757 131.04823 -3368.2744 -2922.4226 -88.468533 11468.608 + 4660 300.94964 131.08389 -3369.463 -2921.8232 -116.29775 11468.421 + 4670 298.75028 131.02165 -3367.3882 -2923.0198 -67.323696 11468.232 + 4680 293.86279 130.9228 -3364.0934 -2926.9948 28.453601 11468.041 + 4690 288.81593 130.83668 -3361.2227 -2931.6309 125.96905 11467.851 + 4700 285.80539 130.76656 -3358.8854 -2933.7716 197.88821 11467.664 + 4710 285.60651 130.75504 -3358.5012 -2933.6832 215.64034 11467.48 + 4720 287.98726 130.78581 -3359.527 -2931.1678 187.07185 11467.3 + 4730 291.58245 130.86386 -3362.1287 -2928.4219 125.83544 11467.123 + 4740 294.29771 130.90897 -3363.6324 -2925.8868 83.804817 11466.949 + 4750 295.66712 130.94757 -3364.9189 -2925.1364 56.698622 11466.777 + 4760 297.36575 130.98872 -3366.2906 -2923.9816 27.083048 11466.606 + 4770 300.4781 131.04162 -3368.0541 -2921.1156 -18.299929 11466.435 + 4780 304.08974 131.11214 -3370.4046 -2918.0941 -74.333186 11466.263 + 4790 306.53442 131.16397 -3372.1323 -2916.1856 -110.15633 11466.091 + 4800 306.36811 131.13722 -3371.2406 -2915.5413 -88.96311 11465.916 + 4810 303.63247 131.09587 -3369.8624 -2918.2321 -35.479611 11465.739 + 4820 299.49779 131.09034 -3369.6779 -2924.1977 17.850254 11465.562 + 4830 295.2261 130.99537 -3366.5125 -2927.386 103.74338 11465.385 + 4840 291.85433 130.91013 -3363.6708 -2929.5596 172.07555 11465.21 + 4850 290.58794 130.88947 -3362.9824 -2930.7549 190.99507 11465.039 + 4860 291.87236 130.89483 -3363.1611 -2929.0231 174.70316 11464.871 + 4870 294.12292 130.89537 -3363.1789 -2925.6933 154.00303 11464.706 + 4880 294.92133 130.91097 -3363.6991 -2925.026 142.12291 11464.545 + 4890 293.91622 130.85984 -3361.9948 -2924.8167 171.99049 11464.386 + 4900 292.49926 130.85593 -3361.8643 -2926.7938 185.57775 11464.23 + 4910 292.21299 130.85029 -3361.6765 -2927.0318 183.5048 11464.078 + 4920 294.3465 130.89173 -3363.0576 -2925.2395 136.05449 11463.93 + 4930 298.91621 131.00827 -3366.9423 -2922.3271 36.222028 11463.783 + 4940 304.2075 131.11525 -3370.5083 -2918.0227 -64.58561 11463.638 + 4950 308.02135 131.18562 -3372.8539 -2914.6955 -131.3879 11463.491 + 4960 308.90373 131.20676 -3373.5587 -2914.0878 -141.96205 11463.342 + 4970 306.52679 131.14857 -3371.6191 -2915.6837 -80.691087 11463.191 + 4980 301.51197 131.06657 -3368.8856 -2920.4094 22.969986 11463.037 + 4990 295.43275 130.96386 -3365.4621 -2926.0283 146.8903 11462.885 + 5000 290.88226 130.84758 -3361.5861 -2928.9208 259.04414 11462.735 + 5010 289.63823 130.8707 -3362.3566 -2931.5417 281.85705 11462.59 + 5020 290.86894 130.90255 -3363.4182 -2930.7727 270.69373 11462.45 + 5030 292.52764 130.95051 -3365.017 -2929.9043 241.84573 11462.316 + 5040 293.49658 130.95735 -3365.245 -2928.6911 228.45229 11462.186 + 5050 294.50147 131.00831 -3366.9436 -2928.895 192.46946 11462.061 + 5060 297.26288 131.04619 -3368.2062 -2926.0502 142.82543 11461.939 + 5070 302.05552 131.12366 -3370.7886 -2921.5039 59.794349 11461.82 + 5080 306.91444 131.21441 -3373.8136 -2917.3016 -25.907428 11461.702 + 5090 309.27791 131.2683 -3375.61 -2915.5825 -68.087454 11461.584 + 5100 307.58823 131.2273 -3374.2434 -2916.7292 -29.448909 11461.464 + 5110 302.95111 121.09595 -3369.865 -2919.2482 9.6514083 11461.343 + 5120 298.75108 120.99657 -3366.5523 -2922.1827 91.43846 11461.223 + 5130 296.72323 120.97582 -3365.8606 -2924.5073 120.71994 11461.104 + 5140 297.56659 120.98698 -3366.2327 -2923.6249 109.71929 11460.988 + 5150 300.59444 121.0848 -3369.4933 -2922.3818 44.711417 11460.873 + 5160 304.04375 121.17444 -3372.4812 -2920.2392 -22.708377 11460.76 + 5170 305.95904 121.22461 -3374.1536 -2919.0627 -63.352791 11460.646 + 5180 305.18561 121.24609 -3374.8697 -2920.9292 -73.945296 11460.531 + 5190 302.88285 121.19269 -3373.0896 -2922.5744 -48.880571 11460.415 + 5200 302.16928 121.11662 -3370.5539 -2921.1 -33.25034 11460.298 + 5210 305.06142 121.15295 -3371.7651 -2918.0094 -92.508454 11460.18 + 5220 310.35533 121.24048 -3374.6826 -2913.0526 -189.64565 11460.06 + 5230 315.46436 121.29318 -3376.4393 -2907.21 -266.73056 11459.937 + 5240 319.05378 121.31464 -3377.1545 -2902.5862 -310.55336 11459.808 + 5250 320.52058 121.33733 -3377.911 -2901.161 -328.05391 11459.674 + 5260 319.43519 121.35737 -3378.579 -2903.4433 -319.1583 11459.533 + 5270 317.11353 121.31969 -3377.323 -2905.6407 -278.13423 11459.386 + 5280 316.00777 121.26065 -3375.355 -2905.3174 -240.16245 11459.234 + 5290 316.44789 121.29673 -3376.5576 -2905.8653 -250.42775 11459.077 + 5300 316.06674 121.30223 -3376.741 -2906.6157 -242.91365 11458.915 + 5310 313.5689 121.29322 -3376.4406 -2910.0306 -212.74759 11458.749 + 5320 309.70787 121.19262 -3373.0875 -2912.4205 -137.23259 11458.579 + 5330 306.0735 121.10017 -3370.0058 -2914.7447 -68.129282 11458.406 + 5340 303.18825 121.11198 -3370.3993 -2919.4298 -46.178889 11458.231 + 5350 299.78241 121.12628 -3370.8759 -2924.9723 -23.295201 11458.056 + 5360 295.15266 121.03172 -3367.724 -2928.7068 44.959761 11457.88 + 5370 291.70542 120.94668 -3364.8893 -2930.9996 92.593977 11457.705 + 5380 292.53528 120.97403 -3365.8011 -2930.677 56.151318 11457.532 + 5390 297.49289 121.02837 -3367.6124 -2925.1143 -28.267604 11457.359 + 5400 303.96005 121.12572 -3370.8575 -2918.7399 -141.06744 11457.186 + 5410 309.37113 121.20031 -3373.3438 -2913.1777 -231.5606 11457.011 + 5420 312.58872 121.25116 -3375.0387 -2910.0867 -286.06388 11456.831 + 5430 313.5163 121.28837 -3376.2789 -2909.9472 -304.47872 11456.645 + 5440 311.86622 121.27133 -3375.7109 -2911.8335 -269.39813 11456.454 + 5450 307.60137 121.20318 -3373.4392 -2915.9055 -187.70841 11456.258 + 5460 302.39663 121.11417 -3370.4722 -2920.6801 -92.893029 11456.058 + 5470 299.26197 121.06252 -3368.7508 -2923.6213 -38.239136 11455.855 + 5480 300.21333 121.0529 -3368.4301 -2921.8855 -46.314443 11455.653 + 5490 305.11139 121.17761 -3372.5871 -2918.7571 -149.37388 11455.448 + 5500 311.74544 121.31013 -3377.0044 -2913.3067 -274.23168 11455.241 + 5510 317.173 121.42895 -3380.9651 -2909.1943 -379.47236 11455.029 + 5520 319.40399 121.42705 -3380.9018 -2905.8125 -398.2285 11454.81 + 5530 317.44315 121.37311 -3379.1038 -2906.9312 -343.90889 11454.583 + 5540 311.69023 121.23901 -3374.6336 -2911.018 -211.67522 11454.349 + 5550 304.10649 121.09182 -3369.7273 -2917.3919 -50.301438 11454.111 + 5560 297.19518 120.93722 -3364.574 -2922.5187 107.02212 11453.872 + 5570 293.62951 120.87045 -3362.3484 -2925.5967 194.64351 11453.635 + 5580 294.48004 120.89094 -3363.0312 -2925.0145 200.22187 11453.402 + 5590 297.7944 120.91632 -3363.8775 -2920.9309 170.75997 11453.173 + 5600 300.49781 121.0018 -3366.7267 -2919.7589 112.99595 11452.946 + 5610 300.26089 131.04183 -3368.0611 -2921.4458 153.68553 11452.722 + 5620 297.53468 131.02558 -3367.5195 -2924.9592 180.89854 11452.502 + 5630 295.58172 130.9574 -3365.2466 -2925.5912 225.16305 11452.284 + 5640 296.24255 130.97567 -3365.8555 -2925.2172 219.04513 11452.071 + 5650 298.09499 131.02898 -3367.6325 -2924.2388 194.7517 11451.862 + 5660 298.5637 131.04353 -3368.1177 -2924.0269 201.28599 11451.656 + 5670 296.71226 130.98525 -3366.1751 -2924.8381 257.11663 11451.455 + 5680 293.68491 130.94733 -3364.9109 -2928.0768 311.81304 11451.258 + 5690 291.75542 130.9269 -3364.2299 -2930.2659 343.96372 11451.068 + 5700 292.28467 130.91095 -3363.6982 -2928.9469 345.58668 11450.884 + 5710 294.90983 130.96347 -3365.4489 -2926.7928 297.72534 11450.706 + 5720 297.71999 131.0354 -3367.8466 -2925.0106 238.56796 11450.534 + 5730 299.08742 131.0793 -3369.3099 -2924.44 203.3616 11450.366 + 5740 298.59283 131.03163 -3367.7209 -2923.5867 220.93483 11450.203 + 5750 297.08532 131.02731 -3367.577 -2925.6851 232.15745 11450.044 + 5760 296.2497 131.02818 -3367.6061 -2926.9571 235.79951 11449.889 + 5770 297.37636 131.07528 -3369.176 -2926.8512 203.29734 11449.738 + 5780 300.40437 131.15157 -3371.719 -2924.8903 142.80676 11449.592 + 5790 303.91428 131.21367 -3373.7889 -2921.7395 84.724621 11449.448 + 5800 306.41131 131.26786 -3375.5954 -2919.8318 44.031034 11449.306 + 5810 307.99958 121.306 -3376.8666 -2918.7405 -37.286254 11449.164 + 5820 309.32233 121.28008 -3376.0027 -2915.9092 -27.605463 11449.021 + 5830 309.80388 121.31805 -3377.2683 -2916.4585 -32.028086 11448.878 + 5840 308.86701 121.31067 -3377.0222 -2917.6059 -7.4947014 11448.734 + 5850 306.95204 121.26991 -3375.6638 -2919.0959 34.445852 11448.59 + 5860 305.29171 121.23775 -3374.5916 -2920.4933 63.564857 11448.447 + 5870 304.64939 121.2201 -3374.0034 -2920.8606 69.314887 11448.305 + 5880 304.58889 121.18528 -3372.8428 -2919.7899 67.203245 11448.164 + 5890 304.55129 121.17067 -3372.3556 -2919.3586 50.431353 11448.025 + 5900 304.59946 121.20885 -3373.6282 -2920.5596 11.550291 11447.886 + 5910 304.92 121.20857 -3373.619 -2920.0736 -11.291606 11447.747 + 5920 305.40412 121.23101 -3374.3671 -2920.1017 -37.74416 11447.608 + 5930 305.42742 121.27287 -3375.7624 -2921.4623 -60.476392 11447.469 + 5940 304.56688 121.26329 -3375.4429 -2922.4228 -49.265064 11447.329 + 5950 303.2093 121.24479 -3374.8263 -2923.8254 -25.645761 11447.187 + 5960 302.07547 121.19089 -3373.0297 -2923.7154 12.561304 11447.045 + 5970 301.83992 121.17685 -3372.5616 -2923.5976 27.242561 11446.903 + 5980 303.54603 121.17419 -3372.4731 -2920.9714 18.473904 11446.762 + 5990 307.45628 121.27844 -3375.948 -2918.6301 -52.10681 11446.621 + 6000 311.8961 121.32927 -3377.6423 -2913.7205 -109.77328 11446.479 + 6010 314.83582 121.38461 -3379.4869 -2911.1925 -159.28121 11446.335 + 6020 315.58209 121.42817 -3380.939 -2911.5346 -184.82384 11446.188 + 6030 313.81726 121.34754 -3378.2515 -2911.4721 -138.25592 11446.037 + 6040 309.02473 121.29909 -3376.6365 -2916.9856 -72.361288 11445.883 + 6050 301.06128 121.18569 -3372.8565 -2925.0506 49.842156 11445.728 + 6060 291.53831 121.04419 -3368.1396 -2934.4984 198.28214 11445.574 + 6070 283.82991 120.93153 -3364.3842 -2942.2087 318.78689 11445.424 + 6080 281.29552 120.88428 -3362.8095 -2944.4037 364.89586 11445.28 + 6090 284.5337 120.93699 -3364.5662 -2941.3439 315.41058 11445.142 + 6100 291.20342 121.05149 -3368.383 -2935.24 203.83576 11445.011 + 6110 298.21078 121.13274 -3371.0914 -2927.5255 92.90772 11444.883 + 6120 304.25842 121.25998 -3375.3326 -2922.7712 -32.696944 11444.757 + 6130 309.58145 121.34665 -3378.2216 -2917.7427 -137.71781 11444.631 + 6140 314.29275 121.41862 -3380.6208 -2913.1341 -226.21487 11444.501 + 6150 316.94078 121.44563 -3381.5209 -2910.0955 -270.49864 11444.368 + 6160 315.79682 121.42782 -3380.9272 -2911.2034 -255.09515 11444.229 + 6170 310.80014 121.37988 -3379.3294 -2917.0377 -186.40129 11444.086 + 6180 304.0717 121.28897 -3376.2991 -2924.0155 -81.122875 11443.939 + 6190 298.86928 121.17037 -3372.3456 -2927.8002 20.603056 11443.79 + 6200 297.89834 121.19524 -3373.1747 -2930.0735 26.610471 11443.642 + 6210 300.80627 121.25459 -3375.1531 -2927.7266 -19.252516 11443.494 + 6220 304.37547 121.34965 -3378.3218 -2925.5864 -87.649549 11443.346 + 6230 305.31663 121.34208 -3378.0693 -2923.934 -94.536877 11443.196 + 6240 302.63117 121.28093 -3376.0309 -2925.89 -48.075602 11443.044 + 6250 297.59821 121.209 -3373.6334 -2930.9786 25.407798 11442.892 + 6260 292.52948 121.16797 -3372.2658 -2937.1504 88.509049 11442.74 + 6270 289.66654 121.11933 -3370.6443 -2939.7873 135.05547 11442.589 + 6280 290.39869 121.1552 -3371.8399 -2939.8939 117.57692 11442.441 + 6290 294.71759 121.25578 -3375.1928 -2936.8227 45.417061 11442.295 + 6300 300.56911 121.34469 -3378.1564 -2931.0826 -35.208037 11442.15 + 6310 305.99952 121.43795 -3381.265 -2926.1139 -113.62659 11442.004 + 6320 310.54751 121.5273 -3384.2434 -2922.3275 -182.57938 11441.856 + 6330 314.3461 121.60725 -3386.9084 -2919.3424 -239.70042 11441.704 + 6340 317.09976 121.61716 -3387.2385 -2915.5767 -258.05513 11441.548 + 6350 318.35227 121.68686 -3389.5621 -2916.0372 -284.10667 11441.387 + 6360 317.97511 121.68375 -3389.4583 -2916.4944 -268.915 11441.221 + 6370 316.10609 121.63793 -3387.9311 -2917.7472 -227.23547 11441.049 + 6380 312.95347 121.53341 -3384.4469 -2918.9524 -156.9496 11440.874 + 6390 309.51177 121.47109 -3382.3696 -2921.9943 -105.45254 11440.695 + 6400 307.53502 121.46679 -3382.2265 -2924.7914 -91.070456 11440.514 + 6410 307.90695 121.47344 -3382.4481 -2924.4599 -100.10431 11440.331 + 6420 309.32994 121.51892 -3383.9639 -2923.8591 -128.77822 11440.146 + 6430 309.03436 121.47428 -3382.476 -2922.8109 -104.60225 11439.958 + 6440 305.55234 121.42335 -3380.7782 -2926.2922 -47.578059 11439.769 + 6450 299.92297 121.33762 -3377.9206 -2931.8079 41.863859 11439.578 + 6460 295.1951 121.27205 -3375.7349 -2936.6546 112.91612 11439.389 + 6470 294.17694 121.29715 -3376.5716 -2939.0057 112.58632 11439.201 + 6480 297.10778 121.30281 -3376.7605 -2934.8352 81.276592 11439.015 + 6490 301.33476 121.38324 -3379.4412 -2931.2286 6.2663379 11438.831 + 6500 303.68877 121.44573 -3381.5242 -2929.8102 -47.863863 11438.647 + 6510 302.89678 121.43955 -3381.3182 -2930.7822 -49.578009 11438.462 + 6520 299.93088 121.39801 -3379.9335 -2933.8091 -18.668086 11438.276 + 6530 296.42314 121.33096 -3377.6986 -2936.7916 29.155622 11438.09 + 6540 293.36857 121.26419 -3375.4729 -2939.1094 74.522104 11437.904 + 6550 291.79984 121.27939 -3375.9796 -2941.9495 75.96533 11437.719 + 6560 292.6847 121.28759 -3376.253 -2940.9066 57.694103 11437.536 + 6570 296.03132 121.32839 -3377.6131 -2937.2889 5.3331973 11437.354 + 6580 300.52329 121.41761 -3380.5871 -2933.5815 -74.944059 11437.172 + 6590 304.55451 121.47098 -3382.3661 -2929.3643 -136.73527 11436.989 + 6600 307.2743 121.50474 -3383.4914 -2926.4441 -176.63601 11436.802 + 6610 308.38455 121.5411 -3384.7035 -2926.0048 -198.23882 11436.613 + 6620 307.19155 121.53189 -3384.3965 -2927.4723 -177.0664 11436.42 + 6630 302.92923 121.4751 -3382.5033 -2931.919 -107.6746 11436.223 + 6640 296.67175 121.35465 -3378.4883 -2937.2116 1.9001733 11436.024 + 6650 291.93091 121.26543 -3375.5144 -2941.2893 82.710156 11435.825 + 6660 291.21529 121.25278 -3375.0928 -2941.9321 95.765608 11435.628 + 6670 293.45813 121.30145 -3376.7151 -2940.2184 58.227332 11435.432 + 6680 295.67156 121.37732 -3379.244 -2939.455 10.022993 11435.237 + 6690 296.00316 121.37179 -3379.0596 -2938.7773 11.670165 11435.043 + 6700 294.30769 121.38882 -3379.6274 -2941.8671 24.164729 11434.849 + 6710 292.20141 121.34314 -3378.1046 -2943.4771 63.68431 11434.655 + 6720 292.25163 121.35609 -3378.5362 -2943.8341 59.285121 11434.462 + 6730 295.89269 121.43397 -3381.1325 -2941.0145 -4.4397088 11434.27 + 6740 301.3787 121.47825 -3382.6085 -2934.3305 -75.829688 11434.078 + 6750 305.48487 121.53316 -3384.4386 -2930.053 -142.81772 11433.885 + 6760 306.39519 121.53825 -3384.6083 -2928.8687 -163.62906 11433.688 + 6770 304.80661 121.50213 -3383.4043 -2930.0276 -145.12063 11433.489 + 6780 302.98524 121.46575 -3382.1916 -2931.5241 -120.47126 11433.287 + 6790 302.50573 121.44177 -3381.3924 -2931.438 -106.81655 11433.082 + 6800 303.13931 121.45376 -3381.7922 -2930.8954 -111.95234 11432.875 + 6810 303.82879 121.49441 -3383.147 -2931.2247 -123.31911 11432.666 + 6820 303.37447 121.49862 -3383.2872 -2932.0407 -107.30036 11432.455 + 6830 301.59672 121.47887 -3382.6292 -2934.0269 -69.497907 11432.241 + 6840 299.31891 121.44611 -3381.537 -2936.3228 -23.627886 11432.026 + 6850 297.17748 121.41973 -3380.6578 -2938.6288 13.097409 11431.811 + 6860 295.43791 121.4124 -3380.4133 -2940.9718 28.729536 11431.595 + 6870 294.45232 121.34377 -3378.1255 -2940.15 51.510046 11431.38 + 6880 294.1054 121.36947 -3378.9824 -2941.5229 26.250084 11431.166 + 6890 294.55514 121.4331 -3381.1033 -2942.9749 -20.620084 11430.952 + 6900 296.26164 121.45637 -3381.8788 -2941.2121 -57.549381 11430.738 + 6910 298.57641 121.47094 -3382.3645 -2938.2547 -86.966966 11430.522 + 6920 300.22613 121.5682 -3385.6067 -2939.0431 -132.27618 11430.305 + 6930 301.18876 121.58993 -3386.3309 -2938.3354 -134.00474 11430.085 + 6940 301.66797 121.59115 -3386.3717 -2937.6635 -116.59159 11429.863 + 6950 300.85733 121.55323 -3385.1077 -2937.6052 -71.93642 11429.638 + 6960 298.41992 121.50952 -3383.6506 -2939.7736 -14.110922 11429.411 + 6970 294.96406 121.47503 -3382.5009 -2943.7642 41.710586 11429.185 + 6980 291.88038 121.3801 -3379.3368 -2945.1868 108.9979 11428.959 + 6990 289.98593 121.34608 -3378.2027 -2946.8706 135.00575 11428.734 + 7000 289.01191 121.36806 -3378.9354 -2949.0521 126.23987 11428.513 + 7010 288.71783 121.3628 -3378.7599 -2949.314 121.24175 11428.293 + 7020 289.25853 121.41595 -3380.5316 -2950.2815 88.693883 11428.076 + 7030 290.66594 121.42949 -3380.9831 -2948.6395 64.924354 11427.86 + 7040 292.91592 121.45823 -3381.9409 -2946.2507 26.846784 11427.645 + 7050 296.29505 121.53507 -3384.5025 -2943.786 -41.800514 11427.431 + 7060 300.48752 121.58324 -3386.1082 -2939.1558 -108.99671 11427.215 + 7070 303.74584 121.6541 -3388.4701 -2936.6712 -179.87714 11426.998 + 7080 304.56786 121.65329 -3388.4429 -2935.4213 -205.54748 11426.777 + 7090 303.52464 121.66603 -3388.8676 -2937.3977 -222.70063 11426.552 + 7100 302.81602 121.66912 -3388.9706 -2938.5547 -237.78388 11426.322 + 7110 303.24794 121.64942 -3388.314 -2937.2557 -250.07838 11426.089 + 7120 303.30002 121.66235 -3388.7451 -2937.6094 -263.64553 11425.85 + 7130 301.32657 121.63456 -3387.8188 -2939.6184 -233.47729 11425.606 + 7140 297.52117 121.54997 -3384.9991 -2942.4589 -154.04973 11425.358 + 7150 292.59989 121.45312 -3381.7705 -2946.5504 -52.124211 11425.106 + 7160 287.00639 121.34796 -3378.2654 -2951.3651 58.6958 11424.854 + 7170 283.06501 121.26859 -3375.6197 -2954.582 139.26322 11424.602 + 7180 284.06217 121.33351 -3377.7837 -2955.2628 116.18041 11424.353 + 7190 289.08005 121.41983 -3380.661 -2950.6763 44.400243 11424.106 + 7200 294.50663 121.51748 -3383.9162 -2945.8599 -39.728782 11423.859 + 7210 297.67329 121.55071 -3385.0238 -2942.2573 -80.261956 11423.612 + 7220 297.64527 121.54585 -3384.8617 -2942.1369 -75.07525 11423.363 + 7230 295.02577 121.52965 -3384.3217 -2945.4933 -39.053373 11423.113 + 7240 291.38328 121.47121 -3382.3736 -2948.963 26.280082 11422.862 + 7250 287.93465 121.3964 -3379.8801 -2951.5992 97.748966 11422.611 + 7260 285.87651 121.39794 -3379.9312 -2954.7116 128.23397 11422.362 + 7270 285.96876 121.41065 -3380.3549 -2954.9981 134.2647 11422.115 + 7280 287.75241 121.43418 -3381.1393 -2953.1294 120.10633 11421.871 + 7290 289.62691 121.47117 -3382.3724 -2951.5743 98.356048 11421.628 + 7300 290.35305 121.47946 -3382.6487 -2950.7705 94.790713 11421.387 + 7310 289.97261 121.46373 -3382.1244 -2950.8121 106.80703 11421.149 + 7320 289.30744 121.42708 -3380.9028 -2950.5799 123.98134 11420.912 + 7330 289.66677 121.40882 -3380.294 -2949.4366 121.41687 11420.677 + 7340 291.5067 121.4213 -3380.71 -2947.1159 94.485262 11420.444 + 7350 293.55914 121.45334 -3381.7779 -2945.131 63.701441 11420.213 + 7360 293.6809 121.47809 -3382.6031 -2945.7751 59.685975 11419.984 + 7370 290.97879 121.43596 -3381.1987 -2948.3898 111.65499 11419.755 + 7380 286.63033 121.36012 -3378.6707 -2952.3298 192.64797 11419.528 + 7390 283.18256 121.34386 -3378.1287 -2956.9161 239.60658 11419.305 + 7400 282.82774 121.33601 -3377.867 -2957.1822 250.1436 11419.087 + 7410 285.40796 121.38898 -3379.6326 -2955.1099 203.62621 11418.873 + 7420 289.35595 121.4548 -3381.8266 -2951.4315 133.17099 11418.662 + 7430 293.35276 121.53336 -3384.4453 -2948.1053 55.780444 11418.454 + 7440 296.16942 121.60248 -3386.7494 -2946.2198 -6.6397032 11418.247 + 7450 296.98265 121.59587 -3386.5289 -2944.7897 -21.038062 11418.04 + 7460 295.84641 121.55012 -3385.0041 -2944.9549 -3.5386439 11417.833 + 7470 294.06356 121.57541 -3385.8469 -2948.4496 -11.295845 11417.625 + 7480 293.63529 121.58964 -3386.3212 -2949.561 -32.0014 11417.417 + 7490 295.76551 121.59491 -3386.4971 -2946.5684 -74.796971 11417.208 + 7500 300.09907 121.65592 -3388.5306 -2942.1559 -155.8278 11416.997 + 7510 304.72319 121.74595 -3391.5317 -2938.279 -241.96792 11416.784 + 7520 307.0639 121.81835 -3393.9448 -2937.2106 -288.03329 11416.565 + 7530 305.57223 121.78183 -3392.7278 -2938.2123 -247.40827 11416.342 + 7540 301.12418 121.71544 -3390.5145 -2942.6151 -161.9015 11416.113 + 7550 295.70904 121.6631 -3388.7701 -2948.9253 -71.675086 11415.881 + 7560 291.31571 121.55023 -3385.0076 -2951.6976 26.176276 11415.648 + 7570 289.94149 121.5157 -3383.8566 -2952.5906 56.421451 11415.415 + 7580 292.00244 121.5235 -3384.1166 -2949.7851 30.692101 11415.183 + 7590 295.36552 121.61805 -3387.2683 -2947.9344 -43.532367 11414.951 + 7600 297.08335 121.64729 -3388.2429 -2946.354 -74.771931 11414.718 + 7610 295.44109 121.60784 -3386.928 -2947.4817 -40.67608 11414.484 + 7620 290.90997 121.52396 -3384.1321 -2951.4256 47.314762 11414.249 + 7630 285.14289 121.42956 -3380.9853 -2956.8569 158.19803 11414.015 + 7640 279.72666 121.30201 -3376.7337 -2960.6615 281.07617 11413.784 + 7650 276.16427 121.26709 -3375.5697 -2964.7963 348.05018 11413.558 + 7660 275.93824 121.28837 -3376.2791 -2965.8419 357.49442 11413.339 + 7670 279.27107 121.35008 -3378.3359 -2962.9414 314.50092 11413.126 + 7680 284.71696 121.44599 -3381.533 -2958.0381 232.91616 11412.919 + 7690 290.68756 121.59089 -3386.363 -2953.9873 122.22241 11412.716 + 7700 296.10041 121.67055 -3389.0183 -2948.5914 39.612836 11412.516 + 7710 300.03593 121.7591 -3391.9702 -2945.6894 -30.258862 11412.316 + 7720 301.45274 121.78629 -3392.8763 -2944.4882 -47.750063 11412.116 + 7730 299.65146 121.74337 -3391.4457 -2945.7368 -3.8693104 11411.914 + 7740 295.05515 121.66949 -3388.9831 -2950.111 77.889818 11411.713 + 7750 289.44153 121.59302 -3386.434 -2955.9116 164.60653 11411.513 + 7760 285.29252 121.53123 -3384.3742 -2960.0232 222.53442 11411.316 + 7770 284.85561 121.54429 -3384.8096 -2961.1085 207.17973 11411.123 + 7780 289.04505 121.57392 -3385.7974 -2955.8648 135.63591 11410.934 + 7790 296.1664 121.69225 -3389.7415 -2949.2164 1.5360308 11410.747 + 7800 302.26382 121.78021 -3392.6735 -2943.079 -108.12678 11410.56 + 7810 303.92684 121.81559 -3393.8529 -2941.7848 -151.33486 11410.371 + 7820 301.04451 121.7607 -3392.0233 -2944.2424 -109.55118 11410.179 + 7830 296.13626 121.66911 -3388.9702 -2948.4899 -25.777949 11409.985 + 7840 291.90332 121.58194 -3386.0647 -2951.8806 55.326282 11409.79 + 7850 289.71974 121.53181 -3384.3936 -2953.4575 101.90278 11409.596 + 7860 289.88815 121.54781 -3384.9271 -2953.7404 96.042181 11409.404 + 7870 292.18894 121.58655 -3386.2184 -2951.6095 57.07195 11409.214 + 7880 295.54672 121.64836 -3388.2788 -2948.6754 -3.3499373 11409.024 + 7890 298.32641 121.66431 -3388.8102 -2945.0723 -41.845709 11408.835 + 7900 299.45316 121.68976 -3389.6586 -2944.2447 -67.016831 11408.644 + 7910 298.70351 121.70488 -3390.1626 -2945.8638 -68.419794 11408.453 + 7920 296.64481 121.70811 -3390.2704 -2949.0337 -50.530105 11408.259 + 7930 294.28124 121.6506 -3388.3534 -2950.6323 -3.663579 11408.065 + 7940 292.23868 121.64621 -3388.207 -2953.5241 21.918533 11407.871 + 7950 290.26549 121.60359 -3386.7863 -2955.0384 62.708742 11407.677 + 7960 287.55244 121.57122 -3385.7073 -2957.9949 106.10688 11407.484 + 7970 283.85929 121.52896 -3384.2987 -2962.0795 160.54675 11407.294 + 7980 280.28801 121.47709 -3382.5697 -2965.6625 216.12416 11407.106 + 7990 278.53347 121.48552 -3382.8508 -2968.5534 229.84025 11406.922 + 8000 279.75796 121.50037 -3383.3458 -2967.227 212.04894 11406.742 + 8010 283.74104 121.58874 -3386.2913 -2964.248 141.49549 11406.566 + 8020 288.45384 121.67064 -3389.0213 -2959.9681 67.849066 11406.393 + 8030 291.63232 121.70103 -3390.0342 -2956.2532 28.33064 11406.221 + 8040 292.92102 121.7051 -3390.1699 -2954.4721 16.029088 11406.049 + 8050 293.64951 121.69111 -3389.7036 -2952.9222 17.805687 11405.878 + 8060 294.45326 121.73768 -3391.2561 -2953.2792 -0.74205142 11405.707 + 8070 294.71121 121.71912 -3390.6375 -2952.2769 13.245101 11405.536 + 8080 294.34348 121.69706 -3389.9021 -2952.0885 34.430767 11405.365 + 8090 294.45508 121.69157 -3389.719 -2951.7394 44.1154 11405.195 + 8100 296.1234 121.74573 -3391.5243 -2951.0632 14.909942 11405.025 + 8110 299.70525 121.80303 -3393.4342 -2947.6454 -37.833815 11404.856 + 8120 304.85386 121.8967 -3396.5565 -2943.1095 -121.98657 11404.686 + 8130 309.66679 122.02391 -3400.7971 -2940.1912 -216.54558 11404.513 + 8140 311.76665 122.03881 -3401.2937 -2937.5644 -240.97428 11404.337 + 8150 309.70469 122.03337 -3401.1123 -2940.4501 -214.62609 11404.155 + 8160 303.41268 121.89999 -3396.6664 -2945.363 -97.380968 11403.97 + 8170 294.94373 121.75097 -3391.6991 -2952.9927 47.27671 11403.783 + 8180 287.50022 121.63641 -3387.8803 -2960.2455 167.66664 11403.596 + 8190 283.70748 121.56422 -3385.4739 -2963.4805 236.79462 11403.412 + 8200 283.90018 121.56012 -3385.3372 -2963.0572 242.009 11403.233 + 8210 286.36582 121.5892 -3386.3068 -2960.3593 215.13031 11403.058 + 8220 288.37568 121.63764 -3387.9214 -2958.9844 188.64663 11402.887 + 8230 288.13232 121.66502 -3388.8341 -2960.2591 194.39534 11402.72 + 8240 285.92758 121.63311 -3387.7702 -2962.4746 244.76763 11402.556 + 8250 283.23425 121.56477 -3385.4924 -2964.2029 312.21471 11402.397 + 8260 282.66236 121.52683 -3384.2276 -2963.7888 343.91086 11402.244 + 8270 285.85627 121.5688 -3385.6267 -2960.4372 303.60474 11402.097 + 8280 291.18962 121.67319 -3389.1062 -2955.9837 212.56952 11401.956 + 8290 294.72899 121.74904 -3391.6346 -2953.2476 141.3782 11401.819 + 8300 294.40491 121.71146 -3390.3819 -2952.4769 144.49126 11401.685 + 8310 291.9809 121.69504 -3389.8348 -2955.5353 160.42483 11401.553 + 8320 290.84863 121.61522 -3387.174 -2954.5587 192.98202 11401.424 + 8330 293.05664 121.65498 -3388.4993 -2952.5998 153.82616 11401.299 + 8340 297.7363 121.73555 -3391.1852 -2948.325 79.304352 11401.177 + 8350 302.45783 121.85243 -3395.0809 -2945.1978 -6.3667234 11401.056 + 8360 304.62272 121.90331 -3396.777 -2943.6738 -40.770371 11400.935 + 8370 302.84356 121.9063 -3396.8766 -2946.4198 -19.498551 11400.813 + 8380 297.89437 121.82033 -3394.011 -2950.9157 67.484687 11400.691 + 8390 291.35453 121.72142 -3390.7139 -2957.3461 174.97391 11400.57 + 8400 284.83989 121.59143 -3386.3812 -2962.7034 294.44578 11400.452 + 8410 279.66328 121.49365 -3383.1215 -2967.1436 387.33014 11400.34 + 8420 276.96923 121.47226 -3382.4086 -2970.4379 424.35814 11400.235 + 8430 277.08081 121.44233 -3381.4111 -2969.2744 433.77903 11400.139 + 8440 279.4958 121.47367 -3382.4557 -2966.7269 392.30373 11400.05 + 8450 283.94071 121.5495 -3384.9834 -2962.6431 309.03611 11399.969 + 8460 290.07431 121.64008 -3388.0027 -2956.5392 200.2232 11399.894 + 8470 296.67142 121.77463 -3392.4876 -2951.2114 68.007365 11399.822 + 8480 302.55236 121.88049 -3396.0164 -2945.9927 -46.706348 11399.752 + 8490 307.37942 121.92398 -3397.4661 -2940.2625 -124.44112 11399.68 + 8500 310.91803 121.9796 -3399.32 -2936.853 -187.72128 11399.607 + 8510 312.0744 122.01967 -3400.6557 -2936.4687 -216.05844 11399.529 + 8520 310.03337 121.97016 -3399.0054 -2937.8543 -174.14377 11399.448 + 8530 305.4702 121.90647 -3396.8823 -2942.5185 -97.404065 11399.363 + 8540 300.14456 121.8409 -3394.6968 -2948.2545 -6.3546077 11399.277 + 8550 295.15692 121.71853 -3390.6176 -2951.594 110.54611 11399.19 + 8560 289.88137 121.60675 -3386.8917 -2955.7151 227.14754 11399.106 + 8570 284.12722 121.49029 -3383.0097 -2960.392 343.04281 11399.025 + 8580 279.48295 121.39387 -3379.7957 -2964.086 428.8436 11398.952 + 8590 278.01857 121.41011 -3380.3368 -2966.8053 428.24162 11398.886 + 8600 280.73817 121.45454 -3381.818 -2964.2413 368.28406 11398.828 + 8610 286.38305 121.52415 -3384.1382 -2958.1652 266.10722 11398.777 + 8620 292.37653 121.69082 -3389.694 -2954.8061 124.26457 11398.732 + 8630 296.50249 121.76462 -3392.154 -2951.129 43.314116 11398.689 + 8640 297.27655 121.79348 -3393.1159 -2950.9396 21.585399 11398.646 + 8650 294.05583 121.7657 -3392.1898 -2954.8041 64.9006 11398.605 + 8660 288.11582 121.63341 -3387.7802 -2959.2298 171.84686 11398.564 + 8670 282.94381 121.54279 -3384.7596 -2963.9021 250.73657 11398.527 + 8680 282.02821 121.52902 -3384.3005 -2964.805 255.33136 11398.494 + 8690 286.19312 121.55186 -3385.0619 -2959.3713 194.44745 11398.466 + 8700 293.03064 121.69406 -3389.802 -2953.9412 60.228584 11398.442 + 8710 299.53934 121.81456 -3393.8188 -2948.2767 -60.820475 11398.419 + 8720 304.061 121.87932 -3395.9774 -2943.7097 -134.22411 11398.396 + 8730 305.53422 121.89281 -3396.4269 -2941.9679 -148.26935 11398.369 + 8740 303.24325 121.85775 -3395.2583 -2944.207 -98.441056 11398.34 + 8750 298.01675 121.73411 -3391.1368 -2947.8595 18.078199 11398.31 + 8760 292.4167 121.64325 -3388.1083 -2953.1606 127.1258 11398.279 + 8770 288.7018 121.56681 -3385.5605 -2956.1384 211.38436 11398.251 + 8780 286.87925 121.5404 -3384.6799 -2957.9687 253.32335 11398.227 + 8790 285.60047 121.53166 -3384.3888 -2959.5797 276.29221 11398.208 + 8800 284.38203 121.5515 -3385.0501 -2962.0534 283.35374 11398.194 + 8810 283.70737 121.49429 -3383.1429 -2961.1497 312.80633 11398.186 + 8820 284.23031 121.47844 -3382.6147 -2959.8437 314.03166 11398.184 + 8830 286.43411 121.55211 -3385.0702 -2959.0212 266.82873 11398.187 + 8840 289.86091 121.62816 -3387.6052 -2956.459 208.75908 11398.196 + 8850 293.24822 121.68368 -3389.4561 -2953.2716 158.0099 11398.209 + 8860 295.57022 121.73176 -3391.0587 -2951.4204 117.39573 11398.225 + 8870 296.8218 121.78013 -3392.6711 -2951.1712 84.661279 11398.243 + 8880 297.43259 121.82078 -3394.0261 -2951.6177 59.880643 11398.263 + 8890 297.58122 121.78595 -3392.8649 -2950.2353 70.769558 11398.284 + 8900 296.26156 121.73242 -3391.0805 -2950.4139 105.69366 11398.307 + 8910 292.10637 121.67087 -3389.029 -2954.5429 168.61304 11398.332 + 8920 286.12348 121.56834 -3385.6114 -2960.0244 259.13272 11398.361 + 8930 282.01762 121.48409 -3382.8031 -2963.3233 320.45391 11398.394 + 8940 283.23713 121.51585 -3383.8616 -2962.5679 287.11228 11398.433 + 8950 289.73668 121.61142 -3387.0474 -2956.086 184.38724 11398.478 + 8960 297.49232 121.73754 -3391.2513 -2948.754 63.139523 11398.527 + 8970 301.90764 121.77947 -3392.649 -2943.5843 10.003678 11398.577 + 8980 301.25088 121.7847 -3392.8234 -2944.7355 22.305957 11398.627 + 8990 297.22125 121.70917 -3390.3056 -2948.2115 99.477754 11398.678 + 9000 293.15104 121.64083 -3388.0276 -2951.9876 176.06272 11398.732 + 9010 291.39011 121.59061 -3386.3535 -2952.9328 224.34907 11398.788 + 9020 291.98975 121.59098 -3386.366 -2952.0534 230.33662 11398.849 + 9030 293.49771 121.61557 -3387.1858 -2950.6301 217.2585 11398.915 + 9040 294.31425 121.61969 -3387.3229 -2949.5528 215.48784 11398.985 + 9050 293.7006 121.61756 -3387.2519 -2950.3945 226.42499 11399.059 + 9060 292.28726 121.58916 -3386.3053 -2951.5501 252.28989 11399.138 + 9070 291.7129 121.568 -3385.5999 -2951.6991 265.29971 11399.221 + 9080 293.04446 121.59302 -3386.434 -2950.5526 242.03179 11399.31 + 9090 295.24417 121.65345 -3388.4485 -2949.2951 194.81576 11399.403 + 9100 296.37884 121.62357 -3387.4524 -2946.6113 188.95802 11399.501 + 9110 295.23505 121.61548 -3387.1827 -2948.0429 191.19521 11399.602 + 9120 292.90824 121.56618 -3385.5393 -2949.8605 218.67123 11399.706 + 9130 291.58905 121.50746 -3383.5819 -2949.8652 242.83745 11399.816 + 9140 292.25301 121.56348 -3385.4492 -2950.745 210.12144 11399.93 + 9150 294.12366 121.59386 -3386.4621 -2948.9755 181.87082 11400.048 + 9160 295.81477 121.6525 -3388.4167 -2948.4147 148.20394 11400.17 + 9170 296.64574 121.62445 -3387.4818 -2946.2438 157.497 11400.295 + 9180 297.28156 121.65595 -3388.5316 -2946.3478 148.91035 11400.423 + 9190 298.31972 121.6907 -3389.6899 -2945.9619 137.15003 11400.554 + 9200 299.22093 121.67552 -3389.1839 -2944.1154 146.14576 11400.688 + 9210 298.78222 121.63221 -3387.7402 -2943.3243 178.25077 11400.825 + 9220 296.43598 121.57788 -3385.9295 -2945.0034 230.31154 11400.966 + 9230 293.26285 121.50868 -3383.6227 -2947.4164 291.88416 11401.111 + 9240 291.29012 121.49662 -3383.2207 -2949.9488 316.6004 11401.263 + 9250 291.2153 121.46745 -3382.2483 -2949.0876 327.441 11401.42 + 9260 291.54379 121.49683 -3383.2276 -2949.5784 309.86491 11401.584 + 9270 290.57005 121.43944 -3381.3148 -2949.1139 335.99199 11401.754 + 9280 288.23138 121.3968 -3379.8934 -2951.1711 370.00494 11401.931 + 9290 285.91423 121.38208 -3379.4025 -2954.1268 397.04401 11402.115 + 9300 284.90575 121.37911 -3379.3036 -2955.5278 411.30367 11402.307 + 9310 285.42822 121.37995 -3379.3318 -2954.7789 412.44678 11402.507 + 9320 286.82007 121.40428 -3380.1425 -2953.5194 395.69373 11402.715 + 9330 288.91408 121.46087 -3382.0289 -2952.2911 357.81461 11402.932 + 9340 292.09688 121.48175 -3382.7249 -2948.2529 321.45139 11403.155 + 9350 296.56634 121.57704 -3385.9015 -2944.7815 244.68683 11403.385 + 9360 301.5146 121.64133 -3388.0442 -2939.5641 176.68016 11403.62 + 9370 305.17646 121.69805 -3389.9351 -2936.0082 123.87169 11403.859 + 9380 305.87527 121.73266 -3391.0886 -2936.1223 104.95438 11404.101 + 9390 303.44747 121.69083 -3389.6944 -2938.3393 139.38301 11404.344 + 9400 300.14961 121.57806 -3385.9354 -2939.4856 205.76471 11404.592 + 9410 298.87347 121.55313 -3385.1045 -2940.5529 219.68891 11404.843 + 9420 300.64536 121.59112 -3386.3708 -2939.1836 184.36019 11405.098 + 9430 303.88367 121.61609 -3387.2029 -2935.1989 144.23619 11405.358 + 9440 306.12219 121.60308 -3386.7693 -2931.4357 131.48715 11405.621 + 9450 305.90898 121.6167 -3387.2234 -2932.2069 134.21971 11405.887 + 9460 303.4797 121.60203 -3386.7342 -2935.3311 170.09512 11406.155 + 9470 300.23214 121.54105 -3384.7016 -2938.129 232.29244 11406.428 + 9480 297.52756 121.44255 -3381.4183 -2938.8686 301.13182 11406.706 + 9490 296.66368 121.45619 -3381.8728 -2940.6081 306.20888 11406.989 + 9500 298.23305 121.4742 -3382.4734 -2938.8744 283.06153 11407.279 + 9510 301.69507 121.51052 -3383.6839 -2934.9354 232.47928 11407.574 + 9520 305.65307 121.54783 -3384.9276 -2930.2918 175.42728 11407.875 + 9530 308.96601 121.63652 -3387.8839 -2928.3204 105.97393 11408.179 + 9540 310.90613 121.65292 -3388.4306 -2925.9813 83.616787 11408.486 + 9550 310.34517 121.6161 -3387.2034 -2925.5885 114.7155 11408.795 + 9560 306.11703 121.50118 -3383.3726 -2928.0467 217.12485 11409.107 + 9570 298.57301 121.39359 -3379.7864 -2935.6817 347.79888 11409.424 + 9580 290.5066 121.24927 -3374.9755 -2942.869 492.52626 11409.748 + 9590 284.8869 121.21634 -3373.8779 -2950.1302 564.58935 11410.081 + 9600 282.24345 121.14241 -3371.4137 -2951.598 615.47273 11410.426 + 9610 281.54477 121.17453 -3372.4844 -2953.7079 597.63074 11410.783 + 9620 282.37592 121.16698 -3372.2327 -2952.22 572.73408 11411.152 + 9630 285.87631 121.23507 -3374.5024 -2949.283 489.72728 11411.532 + 9640 293.23863 121.32922 -3377.6407 -2941.4704 359.22268 11411.922 + 9650 303.78977 121.52811 -3384.2703 -2932.406 160.0119 11412.32 + 9660 314.90908 121.70572 -3390.1907 -2921.7873 -31.00397 11412.721 + 9670 323.19384 121.81222 -3393.7407 -2913.0144 -159.81088 11413.122 + 9680 326.47698 121.83596 -3394.5319 -2908.9221 -197.94728 11413.521 + 9690 324.49204 121.77608 -3392.5359 -2909.8786 -140.96579 11413.917 + 9700 318.39873 121.7012 -3390.0399 -2916.4459 -28.07165 11414.311 + 9710 309.9298 121.56798 -3385.5994 -2924.6024 134.24289 11414.705 + 9720 301.40115 121.38285 -3379.4283 -2931.117 314.19572 11415.102 + 9730 295.50304 121.29689 -3376.5631 -2937.0247 424.36683 11415.506 + 9740 293.69194 121.24535 -3374.845 -2938.0005 472.17246 11415.918 + 9750 295.82738 121.28075 -3376.0249 -2936.0041 436.99987 11416.34 + 9760 300.84709 121.36028 -3378.6761 -2931.1888 346.71992 11416.77 + 9770 306.75344 121.43996 -3381.3319 -2925.0594 241.24451 11417.208 + 9780 311.02557 121.5807 -3386.0235 -2923.3965 124.30422 11417.651 + 9790 312.54333 121.58098 -3386.0327 -2921.1481 86.777327 11418.098 + 9800 311.94009 121.58638 -3386.2127 -2922.2255 70.507396 11418.547 + 9810 310.63038 121.53963 -3384.6545 -2922.6153 85.926164 11418.997 + 9820 310.24158 121.54915 -3384.9716 -2923.5107 80.534768 11419.451 + 9830 310.26239 121.57399 -3385.7997 -2924.3079 74.796432 11419.906 + 9840 308.09463 121.48045 -3382.6815 -2924.4141 137.35072 11420.364 + 9850 303.25155 121.36246 -3378.7487 -2927.685 230.41562 11420.825 + 9860 298.15768 121.24027 -3374.6756 -2931.1886 319.73318 11421.291 + 9870 296.24967 121.20328 -3373.4426 -2932.7936 338.78078 11421.764 + 9880 299.74146 121.24008 -3374.6695 -2928.8268 273.4407 11422.244 + 9890 308.03575 121.3596 -3378.6534 -2920.4736 131.9849 11422.729 + 9900 317.20428 121.51862 -3383.9541 -2912.1367 -26.751232 11423.218 + 9910 322.01402 121.62253 -3387.4177 -2908.4463 -114.41883 11423.707 + 9920 319.17778 121.54485 -3384.8285 -2910.0757 -51.898249 11424.195 + 9930 309.068 121.37161 -3379.0535 -2919.3383 121.82333 11424.683 + 9940 295.19905 121.13123 -3371.0411 -2931.9549 355.38766 11425.174 + 9950 283.24166 120.94189 -3364.7297 -2943.4292 543.86415 11425.673 + 9960 278.89942 120.87426 -3362.4754 -2947.6337 605.50591 11426.182 + 9970 284.59829 120.96752 -3365.5839 -2942.2655 505.31899 11426.703 + 9980 297.42507 121.21278 -3373.7593 -2931.3621 277.96374 11427.235 + 9990 310.57569 121.40093 -3380.0309 -2918.0731 68.495001 11427.773 + 10000 318.36799 121.47659 -3382.5532 -2909.0049 -46.362454 11428.313 +Loop time of 2.5028 on 1 procs for 10000 steps with 500 atoms -Performance: 265.040 ns/day, 0.091 hours/ns, 3067.588 timesteps/s -100.0% CPU use with 1 MPI tasks x no OpenMP threads +Performance: 345.213 ns/day, 0.070 hours/ns, 3995.527 timesteps/s +99.9% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 2.5437 | 2.5437 | 2.5437 | 0.0 | 78.03 -Bond | 0.00088167 | 0.00088167 | 0.00088167 | 0.0 | 0.03 -Neigh | 0.027397 | 0.027397 | 0.027397 | 0.0 | 0.84 -Comm | 0.088112 | 0.088112 | 0.088112 | 0.0 | 2.70 -Output | 0.0010602 | 0.0010602 | 0.0010602 | 0.0 | 0.03 -Modify | 0.33726 | 0.33726 | 0.33726 | 0.0 | 10.35 -Other | | 0.2614 | | | 8.02 +Pair | 2.0314 | 2.0314 | 2.0314 | 0.0 | 81.16 +Bond | 0.00091743 | 0.00091743 | 0.00091743 | 0.0 | 0.04 +Neigh | 0.022412 | 0.022412 | 0.022412 | 0.0 | 0.90 +Comm | 0.074236 | 0.074236 | 0.074236 | 0.0 | 2.97 +Output | 0.0073647 | 0.0073647 | 0.0073647 | 0.0 | 0.29 +Modify | 0.23464 | 0.23464 | 0.23464 | 0.0 | 9.38 +Other | | 0.1319 | | | 5.27 Nlocal: 500 ave 500 max 500 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -171,4 +1076,4 @@ Neighbor list builds = 29 Dangerous builds = 0 #write_data ${rep}/lj-out.data -Total wall time: 0:00:03 +Total wall time: 0:00:02 diff --git a/examples/USER/misc/grem/lj-temper/in.gREM-temper b/examples/USER/misc/grem/lj-temper/in.gREM-temper index 9e94edec7968deef9929b4e6bee41621ea06e8c6..8aa495d9d23624ff49b5091a477e02ea06a7e9a0 100644 --- a/examples/USER/misc/grem/lj-temper/in.gREM-temper +++ b/examples/USER/misc/grem/lj-temper/in.gREM-temper @@ -18,14 +18,14 @@ read_data ${rep}/lj.data #dump dump all xyz 1000 ${rep}/dump.xyz -thermo 10 -thermo_style custom step temp pe etotal press vol -timestep 1.0 - fix fxnpt all npt temp ${T0} ${T0} 1000.0 iso ${press} ${press} 10000.0 fix fxgREM all grem ${lambda} -.03 -30000 fxnpt + +thermo 10 +thermo_style custom step temp f_fxgREM pe etotal press vol thermo_modify press fxgREM_press +timestep 1.0 -temper/grem 10000 100 ${lambda} fxgREM fxnpt 10294 98392 #${walker} +temper/grem 10000 100 ${lambda} fxgREM fxnpt 10294 98392 #${walker} #write_data ${rep}/lj-out.data diff --git a/examples/USER/tally/log.12Jun17.force.1 b/examples/USER/tally/log.12Jun17.force.1 new file mode 100644 index 0000000000000000000000000000000000000000..98b3bad2457c44d328556eecf183f537be2b9b93 --- /dev/null +++ b/examples/USER/tally/log.12Jun17.force.1 @@ -0,0 +1,177 @@ +LAMMPS (19 May 2017) + +units real +atom_style full + +read_data data.spce + orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 4500 atoms + scanning bonds ... + 2 = max bonds/atom + scanning angles ... + 1 = max angles/atom + reading bonds ... + 3000 bonds + reading angles ... + 1500 angles + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 1 = max # of 1-4 neighbors + 2 = max # of special neighbors + +pair_style lj/cut/coul/long 12.0 12.0 +kspace_style pppm 1.0e-4 + +pair_coeff 1 1 0.15535 3.166 +pair_coeff * 2 0.0000 0.0000 + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 2 = max # of special neighbors + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 1500 = # of frozen angles +fix 2 all nvt temp 300.0 300.0 100.0 + +# make certain that shake constraints are satisfied +run 0 post no +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.218482 + grid = 15 15 15 + stencil order = 5 + estimated absolute RMS force accuracy = 0.0319435 + estimated relative force accuracy = 9.61968e-05 + using double precision FFTs + 3d grid and FFT values/proc = 8000 3375 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 26.54 | 26.54 | 26.54 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -16692.358 0 -16692.358 -1289.8319 +Loop time of 3e-06 on 1 procs for 0 steps with 4500 atoms + + +group one molecule 1 2 +6 atoms in group one + +# the following section shows equivalences between using the force/tally compute and other computes and thermo keywords + +# compute pairwise force between two molecules and everybody +compute fpa one group/group all pair yes kspace no boundary no +# tally pairwise force between two molecules and the all molecules +compute c1 one force/tally all +# tally the force of all with all (should be zero) +compute c2 all force/tally all +# collect per atom data. only reduce over the first group. +compute one one reduce sum c_c1[1] c_c1[2] c_c1[3] +compute red all reduce sum c_c2[1] c_c2[2] c_c2[3] +# determine magnitude of force +variable fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3]) +variable for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3]) +# round to 10**-10 absolute precision. +variable ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10 +variable all equal round(1e10*c_c2)*1e-10 + +velocity all create 300 432567 dist uniform + +timestep 2.0 + +# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero +thermo_style custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all +thermo 10 + +run 50 +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.218482 + grid = 15 15 15 + stencil order = 5 + estimated absolute RMS force accuracy = 0.0319435 + estimated relative force accuracy = 9.61968e-05 + using double precision FFTs + 3d grid and FFT values/proc = 8000 3375 +WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77) +WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77) +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) compute group/group, occasional, copy from (1) + attributes: half, newton on + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 28.47 | 28.47 | 28.47 Mbytes +Step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all + 0 22.7331 22.7331 22.7331 -17.068295 -17.068295 -8.8348335 -8.8348334 -12.141369 -12.141369 0 0 + 10 11.736901 11.736901 11.736901 -3.3897029 -3.3897029 9.1193856 9.1193856 -6.5651786 -6.5651786 0 0 + 20 5.6120339 5.6120339 5.6120339 -0.60046861 -0.60046861 -4.4481306 -4.4481306 3.3687528 3.3687528 0 0 + 30 17.29261 17.29261 17.29261 6.179302 6.179302 -10.593979 -10.593979 12.190906 12.190906 0 0 + 40 18.664433 18.664433 18.664433 5.4727782 5.4727782 -6.9329319 -6.9329319 16.442148 16.442148 0 0 + 50 12.130407 12.130407 12.130407 -1.0321196 -1.0321196 8.0035558 8.0035558 -9.0567428 -9.0567428 0 0 +Loop time of 13.9507 on 1 procs for 50 steps with 4500 atoms + +Performance: 0.619 ns/day, 38.752 hours/ns, 3.584 timesteps/s +32.0% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 12.594 | 12.594 | 12.594 | 0.0 | 90.27 +Bond | 7.3e-05 | 7.3e-05 | 7.3e-05 | 0.0 | 0.00 +Kspace | 0.56296 | 0.56296 | 0.56296 | 0.0 | 4.04 +Neigh | 0.65858 | 0.65858 | 0.65858 | 0.0 | 4.72 +Comm | 0.019093 | 0.019093 | 0.019093 | 0.0 | 0.14 +Output | 0.055025 | 0.055025 | 0.055025 | 0.0 | 0.39 +Modify | 0.057276 | 0.057276 | 0.057276 | 0.0 | 0.41 +Other | | 0.004003 | | | 0.03 + +Nlocal: 4500 ave 4500 max 4500 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 21131 ave 21131 max 21131 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 2.60198e+06 ave 2.60198e+06 max 2.60198e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 2601983 +Ave neighs/atom = 578.218 +Ave special neighs/atom = 2 +Neighbor list builds = 4 +Dangerous builds = 1 + +Total wall time: 0:00:15 diff --git a/examples/USER/tally/log.12Jun17.force.4 b/examples/USER/tally/log.12Jun17.force.4 new file mode 100644 index 0000000000000000000000000000000000000000..4238173fb60e3a5eb806da82101b0282c0185cac --- /dev/null +++ b/examples/USER/tally/log.12Jun17.force.4 @@ -0,0 +1,177 @@ +LAMMPS (19 May 2017) + +units real +atom_style full + +read_data data.spce + orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) + 2 by 2 by 1 MPI processor grid + reading atoms ... + 4500 atoms + scanning bonds ... + 2 = max bonds/atom + scanning angles ... + 1 = max angles/atom + reading bonds ... + 3000 bonds + reading angles ... + 1500 angles + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 1 = max # of 1-4 neighbors + 2 = max # of special neighbors + +pair_style lj/cut/coul/long 12.0 12.0 +kspace_style pppm 1.0e-4 + +pair_coeff 1 1 0.15535 3.166 +pair_coeff * 2 0.0000 0.0000 + +bond_style harmonic +angle_style harmonic +dihedral_style none +improper_style none + +bond_coeff 1 1000.00 1.000 +angle_coeff 1 100.0 109.47 + +special_bonds lj/coul 0.0 0.0 1.0 + 2 = max # of 1-2 neighbors + 1 = max # of 1-3 neighbors + 2 = max # of special neighbors + +neighbor 2.0 bin + +fix 1 all shake 0.0001 20 0 b 1 a 1 + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 1500 = # of frozen angles +fix 2 all nvt temp 300.0 300.0 100.0 + +# make certain that shake constraints are satisfied +run 0 post no +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.218482 + grid = 15 15 15 + stencil order = 5 + estimated absolute RMS force accuracy = 0.0319435 + estimated relative force accuracy = 9.61968e-05 + using double precision FFTs + 3d grid and FFT values/proc = 3380 960 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 10.6 | 10.61 | 10.61 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -16692.358 0 -16692.358 -1289.8319 +Loop time of 4.5e-06 on 4 procs for 0 steps with 4500 atoms + + +group one molecule 1 2 +6 atoms in group one + +# the following section shows equivalences between using the force/tally compute and other computes and thermo keywords + +# compute pairwise force between two molecules and everybody +compute fpa one group/group all pair yes kspace no boundary no +# tally pairwise force between two molecules and the all molecules +compute c1 one force/tally all +# tally the force of all with all (should be zero) +compute c2 all force/tally all +# collect per atom data. only reduce over the first group. +compute one one reduce sum c_c1[1] c_c1[2] c_c1[3] +compute red all reduce sum c_c2[1] c_c2[2] c_c2[3] +# determine magnitude of force +variable fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3]) +variable for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3]) +# round to 10**-10 absolute precision. +variable ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10 +variable all equal round(1e10*c_c2)*1e-10 + +velocity all create 300 432567 dist uniform + +timestep 2.0 + +# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero +thermo_style custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all +thermo 10 + +run 50 +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.218482 + grid = 15 15 15 + stencil order = 5 + estimated absolute RMS force accuracy = 0.0319435 + estimated relative force accuracy = 9.61968e-05 + using double precision FFTs + 3d grid and FFT values/proc = 3380 960 +WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77) +WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77) +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) compute group/group, occasional, copy from (1) + attributes: half, newton on + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 11.58 | 11.59 | 11.6 Mbytes +Step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all + 0 22.7331 22.7331 22.7331 -17.068295 -17.068295 -8.8348335 -8.8348334 -12.141369 -12.141369 0 0 + 10 11.736901 11.736901 11.736901 -3.3897029 -3.3897029 9.1193856 9.1193856 -6.5651786 -6.5651786 0 0 + 20 5.6120339 5.6120339 5.6120339 -0.60046861 -0.60046861 -4.4481306 -4.4481306 3.3687528 3.3687528 0 0 + 30 17.29261 17.29261 17.29261 6.179302 6.179302 -10.593979 -10.593979 12.190906 12.190906 0 0 + 40 18.664433 18.664433 18.664433 5.4727782 5.4727782 -6.9329319 -6.9329319 16.442148 16.442148 0 0 + 50 12.130407 12.130407 12.130407 -1.0321196 -1.0321196 8.0035558 8.0035558 -9.0567428 -9.0567428 0 0 +Loop time of 4.31614 on 4 procs for 50 steps with 4500 atoms + +Performance: 2.002 ns/day, 11.989 hours/ns, 11.584 timesteps/s +31.6% CPU use with 4 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 3.5075 | 3.6114 | 3.7489 | 4.7 | 83.67 +Bond | 8.6e-05 | 0.00010525 | 0.000141 | 0.0 | 0.00 +Kspace | 0.2581 | 0.39489 | 0.49723 | 14.2 | 9.15 +Neigh | 0.19826 | 0.19888 | 0.19918 | 0.1 | 4.61 +Comm | 0.034639 | 0.037137 | 0.038938 | 0.9 | 0.86 +Output | 0.025465 | 0.025997 | 0.027558 | 0.6 | 0.60 +Modify | 0.044022 | 0.044175 | 0.044407 | 0.1 | 1.02 +Other | | 0.003593 | | | 0.08 + +Nlocal: 1125 ave 1148 max 1097 min +Histogram: 1 0 0 1 0 0 0 0 1 1 +Nghost: 12212.5 ave 12269 max 12162 min +Histogram: 1 0 0 1 0 1 0 0 0 1 +Neighs: 650496 ave 675112 max 631353 min +Histogram: 1 0 0 1 1 0 0 0 0 1 + +Total # of neighbors = 2601983 +Ave neighs/atom = 578.218 +Ave special neighs/atom = 2 +Neighbor list builds = 4 +Dangerous builds = 1 + +Total wall time: 0:00:04 diff --git a/examples/USER/tally/log.21Aug15.pe.1 b/examples/USER/tally/log.12Jun17.pe.1 similarity index 50% rename from examples/USER/tally/log.21Aug15.pe.1 rename to examples/USER/tally/log.12Jun17.pe.1 index aea553003871b4fec713295409636c14f3afb446..8b0f7534144b7b00ad05ec59926250503934a0a9 100644 --- a/examples/USER/tally/log.21Aug15.pe.1 +++ b/examples/USER/tally/log.12Jun17.pe.1 @@ -1,5 +1,4 @@ -LAMMPS (21 Aug 2015-ICMS) - using 1 OpenMP thread(s) per MPI task +LAMMPS (19 May 2017) units real atom_style full @@ -50,6 +49,35 @@ fix 1 all shake 0.0001 20 0 b 1 a 1 1500 = # of frozen angles fix 2 all nvt temp 300.0 300.0 100.0 +# make certain that shake constraints are satisfied +run 0 post no +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.218482 + grid = 15 15 15 + stencil order = 5 + estimated absolute RMS force accuracy = 0.0319435 + estimated relative force accuracy = 9.61968e-05 + using double precision FFTs + 3d grid and FFT values/proc = 8000 3375 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 26.54 | 26.54 | 26.54 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -16692.358 0 -16692.358 -1289.8319 +Loop time of 1e-06 on 1 procs for 0 steps with 4500 atoms + + group oxy type 1 1500 atoms in group oxy group hyd type 2 @@ -88,6 +116,7 @@ thermo 10 run 50 PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) G vector (1/distance) = 0.218482 grid = 15 15 15 stencil order = 5 @@ -95,38 +124,49 @@ PPPM initialization ... estimated relative force accuracy = 9.61968e-05 using double precision FFTs 3d grid and FFT values/proc = 8000 3375 -WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75) -WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75) +WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77) +WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77) Neighbor list info ... - 2 neighbor list requests update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 master list distance cutoff = 14 ghost atom cutoff = 14 - binsize = 7 -> bins = 6 6 6 -Memory usage per processor = 17.381 Mbytes -Step epa epa E_vdwl vdwl E_coul coul eref pe c2 pair - 0 -516632.19 -516632.19 3169.9382 3169.9382 46213.889 46213.889 49383.827 49383.827 49383.827 49383.827 - 10 -517027.36 -517027.36 3099.1322 3099.1322 45891.84 45891.84 48990.972 48990.972 48990.972 48990.972 - 20 -516828.06 -516828.06 3101.4321 3101.4321 45884.14 45884.14 48985.572 48985.572 48985.572 48985.572 - 30 -517032.1 -517032.1 3198.5939 3198.5939 45793.571 45793.571 48992.165 48992.165 48992.165 48992.165 - 40 -517095.56 -517095.56 3244.0797 3244.0797 45715.265 45715.265 48959.345 48959.345 48959.345 48959.345 - 50 -517273.54 -517273.54 3274.9142 3274.9142 45665.997 45665.997 48940.911 48940.911 48940.911 48940.911 - -Loop time of 4.31105 on 1 procs for 50 steps with 4500 atoms -100.1% CPU use with 1 MPI tasks x 1 OpenMP threads -Performance: 2.004 ns/day 11.975 hours/ns 11.598 timesteps/s - -MPI task timings breakdown: + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) compute group/group, occasional, copy from (1) + attributes: half, newton on + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 29.08 | 29.08 | 29.08 Mbytes +Step c_epa v_epa E_vdwl v_vdwl E_coul v_coul v_eref v_pe c_c2 v_pair + 0 -516634.27 -516634.27 3169.9427 3169.9427 46212.482 46212.482 49382.425 49382.425 49382.425 49382.425 + 10 -517027.35 -517027.35 3099.1374 3099.1374 45891.866 45891.866 48991.003 48991.003 48991.003 48991.003 + 20 -516828.05 -516828.05 3101.4373 3101.4373 45884.156 45884.156 48985.594 48985.594 48985.594 48985.594 + 30 -517032.07 -517032.07 3198.5951 3198.5951 45793.595 45793.595 48992.191 48992.191 48992.191 48992.191 + 40 -517095.54 -517095.54 3244.0771 3244.0771 45715.292 45715.292 48959.369 48959.369 48959.369 48959.369 + 50 -517273.5 -517273.5 3274.9097 3274.9097 45666.025 45666.025 48940.935 48940.935 48940.935 48940.935 +Loop time of 15.3339 on 1 procs for 50 steps with 4500 atoms + +Performance: 0.563 ns/day, 42.594 hours/ns, 3.261 timesteps/s +32.0% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 3.5071 | 3.5071 | 3.5071 | 0.0 | 81.35 -Bond | 0.00025034 | 0.00025034 | 0.00025034 | 0.0 | 0.01 -Kspace | 0.19991 | 0.19991 | 0.19991 | 0.0 | 4.64 -Neigh | 0.31459 | 0.31459 | 0.31459 | 0.0 | 7.30 -Comm | 0.010338 | 0.010338 | 0.010338 | 0.0 | 0.24 -Output | 0.24722 | 0.24722 | 0.24722 | 0.0 | 5.73 -Modify | 0.029466 | 0.029466 | 0.029466 | 0.0 | 0.68 -Other | | 0.002182 | | | 0.05 +Pair | 13.432 | 13.432 | 13.432 | 0.0 | 87.60 +Bond | 0.000365 | 0.000365 | 0.000365 | 0.0 | 0.00 +Kspace | 0.581 | 0.581 | 0.581 | 0.0 | 3.79 +Neigh | 0.66081 | 0.66081 | 0.66081 | 0.0 | 4.31 +Comm | 0.019908 | 0.019908 | 0.019908 | 0.0 | 0.13 +Output | 0.57731 | 0.57731 | 0.57731 | 0.0 | 3.76 +Modify | 0.058515 | 0.058515 | 0.058515 | 0.0 | 0.38 +Other | | 0.003889 | | | 0.03 Nlocal: 4500 ave 4500 max 4500 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -135,10 +175,10 @@ Histogram: 1 0 0 0 0 0 0 0 0 0 Neighs: 2.60198e+06 ave 2.60198e+06 max 2.60198e+06 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Total # of neighbors = 2601984 -Ave neighs/atom = 578.219 +Total # of neighbors = 2601983 +Ave neighs/atom = 578.218 Ave special neighs/atom = 2 Neighbor list builds = 4 Dangerous builds = 1 -Total wall time: 0:00:04 +Total wall time: 0:00:16 diff --git a/examples/USER/tally/log.21Aug15.pe.4 b/examples/USER/tally/log.12Jun17.pe.4 similarity index 50% rename from examples/USER/tally/log.21Aug15.pe.4 rename to examples/USER/tally/log.12Jun17.pe.4 index 303af555ff2655073827812bfdeab7e737731b39..f684fabe01032051684737adf1241a6b0471a78a 100644 --- a/examples/USER/tally/log.21Aug15.pe.4 +++ b/examples/USER/tally/log.12Jun17.pe.4 @@ -1,5 +1,4 @@ -LAMMPS (21 Aug 2015-ICMS) - using 1 OpenMP thread(s) per MPI task +LAMMPS (19 May 2017) units real atom_style full @@ -50,6 +49,35 @@ fix 1 all shake 0.0001 20 0 b 1 a 1 1500 = # of frozen angles fix 2 all nvt temp 300.0 300.0 100.0 +# make certain that shake constraints are satisfied +run 0 post no +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.218482 + grid = 15 15 15 + stencil order = 5 + estimated absolute RMS force accuracy = 0.0319435 + estimated relative force accuracy = 9.61968e-05 + using double precision FFTs + 3d grid and FFT values/proc = 3380 960 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 10.6 | 10.61 | 10.61 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -16692.358 0 -16692.358 -1289.8319 +Loop time of 1.75e-06 on 4 procs for 0 steps with 4500 atoms + + group oxy type 1 1500 atoms in group oxy group hyd type 2 @@ -88,6 +116,7 @@ thermo 10 run 50 PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) G vector (1/distance) = 0.218482 grid = 15 15 15 stencil order = 5 @@ -95,38 +124,49 @@ PPPM initialization ... estimated relative force accuracy = 9.61968e-05 using double precision FFTs 3d grid and FFT values/proc = 3380 960 -WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75) -WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75) +WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77) +WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77) Neighbor list info ... - 2 neighbor list requests update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 master list distance cutoff = 14 ghost atom cutoff = 14 - binsize = 7 -> bins = 6 6 6 -Memory usage per processor = 8.44413 Mbytes -Step epa epa E_vdwl vdwl E_coul coul eref pe c2 pair - 0 -516632.19 -516632.19 3169.9382 3169.9382 46213.889 46213.889 49383.827 49383.827 49383.827 49383.827 - 10 -517027.36 -517027.36 3099.1322 3099.1322 45891.84 45891.84 48990.972 48990.972 48990.972 48990.972 - 20 -516828.06 -516828.06 3101.4321 3101.4321 45884.14 45884.14 48985.572 48985.572 48985.572 48985.572 - 30 -517032.1 -517032.1 3198.5939 3198.5939 45793.571 45793.571 48992.165 48992.165 48992.165 48992.165 - 40 -517095.56 -517095.56 3244.0797 3244.0797 45715.265 45715.265 48959.345 48959.345 48959.345 48959.345 - 50 -517273.54 -517273.54 3274.9142 3274.9142 45665.997 45665.997 48940.911 48940.911 48940.911 48940.911 - -Loop time of 1.20533 on 4 procs for 50 steps with 4500 atoms -100.0% CPU use with 4 MPI tasks x 1 OpenMP threads -Performance: 7.168 ns/day 3.348 hours/ns 41.482 timesteps/s - -MPI task timings breakdown: + binsize = 7, bins = 6 6 6 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard + (2) compute group/group, occasional, copy from (1) + attributes: half, newton on + pair build: copy + stencil: none + bin: none +Per MPI rank memory allocation (min/avg/max) = 11.86 | 11.87 | 11.88 Mbytes +Step c_epa v_epa E_vdwl v_vdwl E_coul v_coul v_eref v_pe c_c2 v_pair + 0 -516634.27 -516634.27 3169.9427 3169.9427 46212.482 46212.482 49382.425 49382.425 49382.425 49382.425 + 10 -517027.35 -517027.35 3099.1374 3099.1374 45891.866 45891.866 48991.003 48991.003 48991.003 48991.003 + 20 -516828.05 -516828.05 3101.4373 3101.4373 45884.156 45884.156 48985.594 48985.594 48985.594 48985.594 + 30 -517032.07 -517032.07 3198.5951 3198.5951 45793.595 45793.595 48992.191 48992.191 48992.191 48992.191 + 40 -517095.54 -517095.54 3244.0771 3244.0771 45715.292 45715.292 48959.369 48959.369 48959.369 48959.369 + 50 -517273.5 -517273.5 3274.9097 3274.9097 45666.025 45666.025 48940.935 48940.935 48940.935 48940.935 +Loop time of 2.32344 on 4 procs for 50 steps with 4500 atoms + +Performance: 3.719 ns/day, 6.454 hours/ns, 21.520 timesteps/s +64.0% CPU use with 4 MPI tasks x no OpenMP threads + +MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.87053 | 0.90325 | 0.94364 | 2.8 | 74.94 -Bond | 0.00015402 | 0.00018191 | 0.00020909 | 0.2 | 0.02 -Kspace | 0.061657 | 0.10164 | 0.13394 | 8.4 | 8.43 -Neigh | 0.088292 | 0.088332 | 0.088373 | 0.0 | 7.33 -Comm | 0.017319 | 0.017806 | 0.018291 | 0.4 | 1.48 -Output | 0.07067 | 0.070706 | 0.070813 | 0.0 | 5.87 -Modify | 0.021655 | 0.021694 | 0.02173 | 0.0 | 1.80 -Other | | 0.001719 | | | 0.14 +Pair | 1.5561 | 1.8883 | 2.0327 | 14.1 | 81.27 +Bond | 8.8e-05 | 0.000116 | 0.000135 | 0.0 | 0.00 +Kspace | 0.094718 | 0.1933 | 0.26055 | 14.1 | 8.32 +Neigh | 0.085117 | 0.1073 | 0.1147 | 3.9 | 4.62 +Comm | 0.014156 | 0.017907 | 0.020005 | 1.8 | 0.77 +Output | 0.071634 | 0.090599 | 0.097665 | 3.6 | 3.90 +Modify | 0.019447 | 0.024101 | 0.026277 | 1.8 | 1.04 +Other | | 0.001804 | | | 0.08 Nlocal: 1125 ave 1148 max 1097 min Histogram: 1 0 0 1 0 0 0 0 1 1 @@ -135,10 +175,10 @@ Histogram: 1 0 0 1 0 1 0 0 0 1 Neighs: 650496 ave 675112 max 631353 min Histogram: 1 0 0 1 1 0 0 0 0 1 -Total # of neighbors = 2601984 -Ave neighs/atom = 578.219 +Total # of neighbors = 2601983 +Ave neighs/atom = 578.218 Ave special neighs/atom = 2 Neighbor list builds = 4 Dangerous builds = 1 -Total wall time: 0:00:01 +Total wall time: 0:00:02 diff --git a/examples/USER/tally/log.21Aug15.stress.1 b/examples/USER/tally/log.12Jun17.stress.1 similarity index 56% rename from examples/USER/tally/log.21Aug15.stress.1 rename to examples/USER/tally/log.12Jun17.stress.1 index c20b51559655d55f10d69024485f95f791214975..a76012487cca3c6d73f4331fc27f62165d326c8d 100644 --- a/examples/USER/tally/log.21Aug15.stress.1 +++ b/examples/USER/tally/log.12Jun17.stress.1 @@ -1,5 +1,4 @@ -LAMMPS (21 Aug 2015-ICMS) - using 1 OpenMP thread(s) per MPI task +LAMMPS (19 May 2017) units real atom_style full @@ -50,6 +49,35 @@ fix 1 all shake 0.0001 20 0 b 1 a 1 1500 = # of frozen angles fix 2 all nvt temp 300.0 300.0 100.0 +# make certain that shake constraints are satisfied +run 0 post no +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.218482 + grid = 15 15 15 + stencil order = 5 + estimated absolute RMS force accuracy = 0.0319435 + estimated relative force accuracy = 9.61968e-05 + using double precision FFTs + 3d grid and FFT values/proc = 8000 3375 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 26.54 | 26.54 | 26.54 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -16692.358 0 -16692.358 -1289.8319 +Loop time of 2e-06 on 1 procs for 0 steps with 4500 atoms + + group one molecule 1 2 6 atoms in group one @@ -79,6 +107,7 @@ thermo 10 run 50 PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) G vector (1/distance) = 0.218482 grid = 15 15 15 stencil order = 5 @@ -86,38 +115,32 @@ PPPM initialization ... estimated relative force accuracy = 9.61968e-05 using double precision FFTs 3d grid and FFT values/proc = 8000 3375 -WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75) -WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75) -Neighbor list info ... - 1 neighbor list requests - update every 1 steps, delay 10 steps, check yes - master list distance cutoff = 14 - ghost atom cutoff = 14 - binsize = 7 -> bins = 6 6 6 -Memory usage per processor = 24.631 Mbytes -Step press spa press one ref - 0 26497.547 26497.547 26497.547 -2357033.6 -2357033.6 - 10 23665.073 23665.073 23665.073 -2096057.3 -2096057.3 - 20 23338.149 23338.149 23338.149 -2034283 -2034283 - 30 25946.4 25946.4 25946.4 -2002817 -2002817 - 40 27238.349 27238.349 27238.349 -2155411.5 -2155411.5 - 50 27783.092 27783.092 27783.092 -1862190.3 -1862190.3 - -Loop time of 4.15609 on 1 procs for 50 steps with 4500 atoms -100.1% CPU use with 1 MPI tasks x 1 OpenMP threads -Performance: 2.079 ns/day 11.545 hours/ns 12.031 timesteps/s - -MPI task timings breakdown: +WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79) +WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79) +Per MPI rank memory allocation (min/avg/max) = 35.9 | 35.9 | 35.9 Mbytes +Step c_press v_spa v_press v_one v_ref + 0 26496.811 26496.811 26496.811 -2356992.7 -2356992.7 + 10 23665.129 23665.129 23665.129 -2096059 -2096059 + 20 23338.197 23338.197 23338.197 -2034284.1 -2034284.1 + 30 25946.434 25946.434 25946.434 -2002815.3 -2002815.3 + 40 27238.374 27238.374 27238.374 -2155408.7 -2155408.7 + 50 27783.107 27783.107 27783.107 -1862191.5 -1862191.5 +Loop time of 14.2089 on 1 procs for 50 steps with 4500 atoms + +Performance: 0.608 ns/day, 39.469 hours/ns, 3.519 timesteps/s +32.0% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 3.6444 | 3.6444 | 3.6444 | 0.0 | 87.69 -Bond | 0.0016105 | 0.0016105 | 0.0016105 | 0.0 | 0.04 -Kspace | 0.22345 | 0.22345 | 0.22345 | 0.0 | 5.38 -Neigh | 0.23588 | 0.23588 | 0.23588 | 0.0 | 5.68 -Comm | 0.010035 | 0.010035 | 0.010035 | 0.0 | 0.24 -Output | 0.0084085 | 0.0084085 | 0.0084085 | 0.0 | 0.20 -Modify | 0.029978 | 0.029978 | 0.029978 | 0.0 | 0.72 -Other | | 0.002368 | | | 0.06 +Pair | 12.983 | 12.983 | 12.983 | 0.0 | 91.37 +Bond | 0.002788 | 0.002788 | 0.002788 | 0.0 | 0.02 +Kspace | 0.62745 | 0.62745 | 0.62745 | 0.0 | 4.42 +Neigh | 0.49839 | 0.49839 | 0.49839 | 0.0 | 3.51 +Comm | 0.018597 | 0.018597 | 0.018597 | 0.0 | 0.13 +Output | 0.015852 | 0.015852 | 0.015852 | 0.0 | 0.11 +Modify | 0.058415 | 0.058415 | 0.058415 | 0.0 | 0.41 +Other | | 0.004126 | | | 0.03 Nlocal: 4500 ave 4500 max 4500 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -132,4 +155,4 @@ Ave special neighs/atom = 2 Neighbor list builds = 3 Dangerous builds = 0 -Total wall time: 0:00:04 +Total wall time: 0:00:15 diff --git a/examples/USER/tally/log.21Aug15.stress.4 b/examples/USER/tally/log.12Jun17.stress.4 similarity index 55% rename from examples/USER/tally/log.21Aug15.stress.4 rename to examples/USER/tally/log.12Jun17.stress.4 index c681960c99d4759c302907fbb07791281d6c48d9..37bb60f013ee2b589a569a11a65139b6cc0a3f10 100644 --- a/examples/USER/tally/log.21Aug15.stress.4 +++ b/examples/USER/tally/log.12Jun17.stress.4 @@ -1,5 +1,4 @@ -LAMMPS (21 Aug 2015-ICMS) - using 1 OpenMP thread(s) per MPI task +LAMMPS (19 May 2017) units real atom_style full @@ -50,6 +49,35 @@ fix 1 all shake 0.0001 20 0 b 1 a 1 1500 = # of frozen angles fix 2 all nvt temp 300.0 300.0 100.0 +# make certain that shake constraints are satisfied +run 0 post no +PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.218482 + grid = 15 15 15 + stencil order = 5 + estimated absolute RMS force accuracy = 0.0319435 + estimated relative force accuracy = 9.61968e-05 + using double precision FFTs + 3d grid and FFT values/proc = 3380 960 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 14 + ghost atom cutoff = 14 + binsize = 7, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 10.6 | 10.61 | 10.61 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -16692.358 0 -16692.358 -1289.8319 +Loop time of 4e-06 on 4 procs for 0 steps with 4500 atoms + + group one molecule 1 2 6 atoms in group one @@ -79,6 +107,7 @@ thermo 10 run 50 PPPM initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) G vector (1/distance) = 0.218482 grid = 15 15 15 stencil order = 5 @@ -86,44 +115,38 @@ PPPM initialization ... estimated relative force accuracy = 9.61968e-05 using double precision FFTs 3d grid and FFT values/proc = 3380 960 -WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75) -WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75) -Neighbor list info ... - 1 neighbor list requests - update every 1 steps, delay 10 steps, check yes - master list distance cutoff = 14 - ghost atom cutoff = 14 - binsize = 7 -> bins = 6 6 6 -Memory usage per processor = 12.0691 Mbytes -Step press spa press one ref - 0 26497.547 26497.547 26497.547 -2357033.6 -2357033.6 - 10 23665.073 23665.073 23665.073 -2096057.3 -2096057.3 - 20 23338.149 23338.149 23338.149 -2034283 -2034283 - 30 25946.4 25946.4 25946.4 -2002817 -2002817 - 40 27238.349 27238.349 27238.349 -2155411.5 -2155411.5 - 50 27783.092 27783.092 27783.092 -1862190.3 -1862190.3 - -Loop time of 1.17266 on 4 procs for 50 steps with 4500 atoms -100.0% CPU use with 4 MPI tasks x 1 OpenMP threads -Performance: 7.368 ns/day 3.257 hours/ns 42.638 timesteps/s - -MPI task timings breakdown: +WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79) +WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79) +Per MPI rank memory allocation (min/avg/max) = 15.25 | 15.26 | 15.27 Mbytes +Step c_press v_spa v_press v_one v_ref + 0 26496.811 26496.811 26496.811 -2356992.7 -2356992.7 + 10 23665.129 23665.129 23665.129 -2096059 -2096059 + 20 23338.197 23338.197 23338.197 -2034284.1 -2034284.1 + 30 25946.434 25946.434 25946.434 -2002815.3 -2002815.3 + 40 27238.374 27238.374 27238.374 -2155408.7 -2155408.7 + 50 27783.107 27783.107 27783.107 -1862191.5 -1862191.5 +Loop time of 4.32017 on 4 procs for 50 steps with 4500 atoms + +Performance: 2.000 ns/day, 12.000 hours/ns, 11.574 timesteps/s +31.8% CPU use with 4 MPI tasks x no OpenMP threads + +MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.89832 | 0.93222 | 0.98611 | 3.4 | 79.50 -Bond | 0.00081754 | 0.00096095 | 0.0011327 | 0.4 | 0.08 -Kspace | 0.068058 | 0.12154 | 0.15522 | 9.4 | 10.36 -Neigh | 0.065756 | 0.065785 | 0.065824 | 0.0 | 5.61 -Comm | 0.017489 | 0.017982 | 0.018623 | 0.4 | 1.53 -Output | 0.010985 | 0.011017 | 0.011111 | 0.1 | 0.94 -Modify | 0.021429 | 0.021491 | 0.021551 | 0.0 | 1.83 -Other | | 0.001671 | | | 0.14 +Pair | 3.5816 | 3.6917 | 3.839 | 4.9 | 85.45 +Bond | 0.001579 | 0.0016563 | 0.001709 | 0.1 | 0.04 +Kspace | 0.22505 | 0.3716 | 0.48023 | 15.3 | 8.60 +Neigh | 0.14558 | 0.14568 | 0.14575 | 0.0 | 3.37 +Comm | 0.032009 | 0.03441 | 0.036274 | 0.8 | 0.80 +Output | 0.02253 | 0.023115 | 0.024844 | 0.7 | 0.54 +Modify | 0.046954 | 0.047086 | 0.047132 | 0.0 | 1.09 +Other | | 0.004935 | | | 0.11 Nlocal: 1125 ave 1154 max 1092 min Histogram: 1 0 0 0 1 0 1 0 0 1 Nghost: 12263.5 ave 12300 max 12219 min Histogram: 1 0 1 0 0 0 0 0 0 2 -Neighs: 650438 ave 678786 max 626279 min +Neighs: 650438 ave 678787 max 626279 min Histogram: 1 0 0 1 1 0 0 0 0 1 Total # of neighbors = 2601750 @@ -132,4 +155,4 @@ Ave special neighs/atom = 2 Neighbor list builds = 3 Dangerous builds = 0 -Total wall time: 0:00:01 +Total wall time: 0:00:04 diff --git a/examples/USER/tally/log.21Aug15.force.1 b/examples/USER/tally/log.21Aug15.force.1 deleted file mode 100644 index 8e7bdb9520deef2758c2a556edb69f5483e1604d..0000000000000000000000000000000000000000 --- a/examples/USER/tally/log.21Aug15.force.1 +++ /dev/null @@ -1,136 +0,0 @@ -LAMMPS (21 Aug 2015-ICMS) - using 1 OpenMP thread(s) per MPI task - -units real -atom_style full - -read_data data.spce - orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) - 1 by 1 by 1 MPI processor grid - reading atoms ... - 4500 atoms - scanning bonds ... - 2 = max bonds/atom - scanning angles ... - 1 = max angles/atom - reading bonds ... - 3000 bonds - reading angles ... - 1500 angles - 2 = max # of 1-2 neighbors - 1 = max # of 1-3 neighbors - 1 = max # of 1-4 neighbors - 2 = max # of special neighbors - -pair_style lj/cut/coul/long 12.0 12.0 -kspace_style pppm 1.0e-4 - -pair_coeff 1 1 0.15535 3.166 -pair_coeff * 2 0.0000 0.0000 - -bond_style harmonic -angle_style harmonic -dihedral_style none -improper_style none - -bond_coeff 1 1000.00 1.000 -angle_coeff 1 100.0 109.47 - -special_bonds lj/coul 0.0 0.0 1.0 - 2 = max # of 1-2 neighbors - 1 = max # of 1-3 neighbors - 2 = max # of special neighbors - -neighbor 2.0 bin - -fix 1 all shake 0.0001 20 0 b 1 a 1 - 0 = # of size 2 clusters - 0 = # of size 3 clusters - 0 = # of size 4 clusters - 1500 = # of frozen angles -fix 2 all nvt temp 300.0 300.0 100.0 - -group one molecule 1 2 -6 atoms in group one - -# the following section shows equivalences between using the pe/tally compute and other computes and thermo keywords - -# compute pairwise force between two molecules and everybody -compute fpa one group/group all pair yes kspace no boundary no -# tally pairwise force between two molecules and the all molecules -compute c1 one force/tally all -# tally the force of all with all (should be zero) -compute c2 all force/tally all -# collect per atom data. only reduce over the first group. -compute one one reduce sum c_c1[1] c_c1[2] c_c1[3] -compute red all reduce sum c_c2[1] c_c2[2] c_c2[3] -# determine magnitude of force -variable fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3]) -variable for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3]) -# round to 10**-10 absolute precision. -variable ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10 - -velocity all create 300 432567 dist uniform - -timestep 2.0 - -# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero -thermo_style custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref c_c2 -thermo 10 - -run 50 -PPPM initialization ... - G vector (1/distance) = 0.218482 - grid = 15 15 15 - stencil order = 5 - estimated absolute RMS force accuracy = 0.0319435 - estimated relative force accuracy = 9.61968e-05 - using double precision FFTs - 3d grid and FFT values/proc = 8000 3375 -WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75) -WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75) -Neighbor list info ... - 2 neighbor list requests - update every 1 steps, delay 10 steps, check yes - master list distance cutoff = 14 - ghost atom cutoff = 14 - binsize = 7 -> bins = 6 6 6 -Memory usage per processor = 16.7648 Mbytes -Step fpa for c1 fpa[1] one[1] fpa[2] one[2] fpa[3] one[3] ref c2 - 0 22.732789 22.732789 22.732789 -17.068392 -17.068392 -8.8345214 -8.8345214 -12.140878 -12.140878 0 0 - 10 11.736915 11.736915 11.736915 -3.3898298 -3.3898298 9.119272 9.119272 -6.5652948 -6.5652948 0 0 - 20 5.6119761 5.6119761 5.6119761 -0.60028931 -0.60028931 -4.4479886 -4.4479886 3.368876 3.368876 0 0 - 30 17.292617 17.292617 17.292617 6.1793856 6.1793856 -10.593927 -10.593927 12.190919 12.190919 0 0 - 40 18.664226 18.664226 18.664226 5.4725079 5.4725079 -6.933046 -6.933046 16.441955 16.441955 0 0 - 50 12.130282 12.130282 12.130282 -1.0321244 -1.0321244 8.0032646 8.0032646 -9.0568326 -9.0568326 0 0 - -Loop time of 4.11825 on 1 procs for 50 steps with 4500 atoms -100.0% CPU use with 1 MPI tasks x 1 OpenMP threads -Performance: 2.098 ns/day 11.440 hours/ns 12.141 timesteps/s - -MPI task timings breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 3.5286 | 3.5286 | 3.5286 | 0.0 | 85.68 -Bond | 6.1274e-05 | 6.1274e-05 | 6.1274e-05 | 0.0 | 0.00 -Kspace | 0.1937 | 0.1937 | 0.1937 | 0.0 | 4.70 -Neigh | 0.31454 | 0.31454 | 0.31454 | 0.0 | 7.64 -Comm | 0.01037 | 0.01037 | 0.01037 | 0.0 | 0.25 -Output | 0.039355 | 0.039355 | 0.039355 | 0.0 | 0.96 -Modify | 0.029273 | 0.029273 | 0.029273 | 0.0 | 0.71 -Other | | 0.002351 | | | 0.06 - -Nlocal: 4500 ave 4500 max 4500 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 21131 ave 21131 max 21131 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 2.60198e+06 ave 2.60198e+06 max 2.60198e+06 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2601984 -Ave neighs/atom = 578.219 -Ave special neighs/atom = 2 -Neighbor list builds = 4 -Dangerous builds = 1 - -Total wall time: 0:00:04 diff --git a/examples/USER/tally/log.21Aug15.force.4 b/examples/USER/tally/log.21Aug15.force.4 deleted file mode 100644 index 17cbc50f02fba59ef993419f88e91428ac824653..0000000000000000000000000000000000000000 --- a/examples/USER/tally/log.21Aug15.force.4 +++ /dev/null @@ -1,136 +0,0 @@ -LAMMPS (21 Aug 2015-ICMS) - using 1 OpenMP thread(s) per MPI task - -units real -atom_style full - -read_data data.spce - orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736) - 2 by 2 by 1 MPI processor grid - reading atoms ... - 4500 atoms - scanning bonds ... - 2 = max bonds/atom - scanning angles ... - 1 = max angles/atom - reading bonds ... - 3000 bonds - reading angles ... - 1500 angles - 2 = max # of 1-2 neighbors - 1 = max # of 1-3 neighbors - 1 = max # of 1-4 neighbors - 2 = max # of special neighbors - -pair_style lj/cut/coul/long 12.0 12.0 -kspace_style pppm 1.0e-4 - -pair_coeff 1 1 0.15535 3.166 -pair_coeff * 2 0.0000 0.0000 - -bond_style harmonic -angle_style harmonic -dihedral_style none -improper_style none - -bond_coeff 1 1000.00 1.000 -angle_coeff 1 100.0 109.47 - -special_bonds lj/coul 0.0 0.0 1.0 - 2 = max # of 1-2 neighbors - 1 = max # of 1-3 neighbors - 2 = max # of special neighbors - -neighbor 2.0 bin - -fix 1 all shake 0.0001 20 0 b 1 a 1 - 0 = # of size 2 clusters - 0 = # of size 3 clusters - 0 = # of size 4 clusters - 1500 = # of frozen angles -fix 2 all nvt temp 300.0 300.0 100.0 - -group one molecule 1 2 -6 atoms in group one - -# the following section shows equivalences between using the pe/tally compute and other computes and thermo keywords - -# compute pairwise force between two molecules and everybody -compute fpa one group/group all pair yes kspace no boundary no -# tally pairwise force between two molecules and the all molecules -compute c1 one force/tally all -# tally the force of all with all (should be zero) -compute c2 all force/tally all -# collect per atom data. only reduce over the first group. -compute one one reduce sum c_c1[1] c_c1[2] c_c1[3] -compute red all reduce sum c_c2[1] c_c2[2] c_c2[3] -# determine magnitude of force -variable fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3]) -variable for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3]) -# round to 10**-10 absolute precision. -variable ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10 - -velocity all create 300 432567 dist uniform - -timestep 2.0 - -# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero -thermo_style custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref c_c2 -thermo 10 - -run 50 -PPPM initialization ... - G vector (1/distance) = 0.218482 - grid = 15 15 15 - stencil order = 5 - estimated absolute RMS force accuracy = 0.0319435 - estimated relative force accuracy = 9.61968e-05 - using double precision FFTs - 3d grid and FFT values/proc = 3380 960 -WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75) -WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75) -Neighbor list info ... - 2 neighbor list requests - update every 1 steps, delay 10 steps, check yes - master list distance cutoff = 14 - ghost atom cutoff = 14 - binsize = 7 -> bins = 6 6 6 -Memory usage per processor = 8.16441 Mbytes -Step fpa for c1 fpa[1] one[1] fpa[2] one[2] fpa[3] one[3] ref c2 - 0 22.732789 22.732789 22.732789 -17.068392 -17.068392 -8.8345214 -8.8345214 -12.140878 -12.140878 0 0 - 10 11.736915 11.736915 11.736915 -3.3898298 -3.3898298 9.119272 9.119272 -6.5652948 -6.5652948 0 0 - 20 5.6119761 5.6119761 5.6119761 -0.60028931 -0.60028931 -4.4479886 -4.4479886 3.368876 3.368876 0 0 - 30 17.292617 17.292617 17.292617 6.1793856 6.1793856 -10.593927 -10.593927 12.190919 12.190919 0 0 - 40 18.664226 18.664226 18.664226 5.4725079 5.4725079 -6.933046 -6.933046 16.441955 16.441955 0 0 - 50 12.130282 12.130282 12.130282 -1.0321244 -1.0321244 8.0032646 8.0032646 -9.0568326 -9.0568326 0 0 - -Loop time of 1.13658 on 4 procs for 50 steps with 4500 atoms -100.0% CPU use with 4 MPI tasks x 1 OpenMP threads -Performance: 7.602 ns/day 3.157 hours/ns 43.991 timesteps/s - -MPI task timings breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.85795 | 0.89088 | 0.93636 | 3.0 | 78.38 -Bond | 3.4571e-05 | 4.4644e-05 | 5.4598e-05 | 0.1 | 0.00 -Kspace | 0.059847 | 0.1051 | 0.1384 | 8.9 | 9.25 -Neigh | 0.085891 | 0.085954 | 0.086 | 0.0 | 7.56 -Comm | 0.01758 | 0.018091 | 0.019178 | 0.5 | 1.59 -Output | 0.013697 | 0.013725 | 0.013805 | 0.0 | 1.21 -Modify | 0.021068 | 0.021137 | 0.021205 | 0.0 | 1.86 -Other | | 0.001656 | | | 0.15 - -Nlocal: 1125 ave 1148 max 1097 min -Histogram: 1 0 0 1 0 0 0 0 1 1 -Nghost: 12212.5 ave 12269 max 12162 min -Histogram: 1 0 0 1 0 1 0 0 0 1 -Neighs: 650496 ave 675112 max 631353 min -Histogram: 1 0 0 1 1 0 0 0 0 1 - -Total # of neighbors = 2601984 -Ave neighs/atom = 578.219 -Ave special neighs/atom = 2 -Neighbor list builds = 4 -Dangerous builds = 1 - -Total wall time: 0:00:01 diff --git a/examples/neb/README b/examples/neb/README index 130d0fd2e34cf27105913c1bcef37f0bdec1eef8..5ef32f2ba612e680bb9f64cb917e69b858c2d8e1 100644 --- a/examples/neb/README +++ b/examples/neb/README @@ -2,12 +2,16 @@ Run these examples as: mpirun -np 4 lmp_g++ -partition 4x1 -in in.neb.hop1 mpirun -np 4 lmp_g++ -partition 4x1 -in in.neb.hop2 +mpirun -np 4 lmp_g++ -partition 4x1 -in in.neb.hop1.end mpirun -np 3 lmp_g++ -partition 3x1 -in in.neb.sivac mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.hop1 mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.hop2 -mpirun -np 6 lmp_g++ -partition 3x2 -in in.neb.sivac -mpirun -np 9 lmp_g++ -partition 3x3 -in in.neb.sivac +mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.hop1.end +mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.sivac + +Note that more than 4 replicas should be used for a precise estimate +of the activation energy corresponding to a transition. If you uncomment the dump command lines in the input scripts, you can create dump files to do visualization from via Python tools: (see diff --git a/examples/neb/in.neb.hop1 b/examples/neb/in.neb.hop1 index 9a3d7579f3b5709bf24f45867eb9d98d88fb7a0c..f26b52a28a5875c8591c63e9c186ccb5860bb8e0 100644 --- a/examples/neb/in.neb.hop1 +++ b/examples/neb/in.neb.hop1 @@ -51,7 +51,7 @@ set group nebatoms type 3 group nonneb subtract all nebatoms fix 1 lower setforce 0.0 0.0 0.0 -fix 2 nebatoms neb 1.0 +fix 2 nebatoms neb 1.0 parallel ideal fix 3 all enforce2d thermo 100 diff --git a/examples/neb/in.neb.hop1.end b/examples/neb/in.neb.hop1.end new file mode 100644 index 0000000000000000000000000000000000000000..81e5315306a44a4869f1ce3c5b4d8e890149c3ef --- /dev/null +++ b/examples/neb/in.neb.hop1.end @@ -0,0 +1,56 @@ +# 2d NEB surface simulation, hop from surface to become adatom + +dimension 2 +boundary p s p + +atom_style atomic +neighbor 0.3 bin +neigh_modify delay 5 +atom_modify map array sort 0 0.0 + +variable u uloop 20 + +# create geometry with flat surface + +lattice hex 0.9 +region box block 0 20 0 10 -0.25 0.25 + +read_data initial.hop1.end + +# LJ potentials + +pair_style lj/cut 2.5 +pair_coeff * * 1.0 1.0 2.5 +pair_modify shift yes + +# define groups + +region 1 block INF INF INF 1.25 INF INF +group lower region 1 +group mobile subtract all lower +set group lower type 2 + +timestep 0.05 + +# group of NEB atoms - either block or single atom ID 412 + +region surround block 10 18 17 20 0 0 units box +group nebatoms region surround +#group nebatoms id 412 +set group nebatoms type 3 +group nonneb subtract all nebatoms + +fix 1 lower setforce 0.0 0.0 0.0 +fix 2 nebatoms neb 1.0 parallel ideal end first 1.0 +fix 3 all enforce2d + +thermo 100 + +#dump 1 nebatoms atom 10 dump.neb.$u +#dump 2 nonneb atom 10 dump.nonneb.$u + +# run NEB for 2000 steps or to force tolerance + +min_style quickmin + +neb 0.0 0.1 1000 1000 100 final final.hop1 diff --git a/examples/neb/in.neb.hop2 b/examples/neb/in.neb.hop2 index 3eb16248e19676bd5f0db3786908a5feaa2d5450..e69fb338cdd44abae00d60132f4a65ba0d39692e 100644 --- a/examples/neb/in.neb.hop2 +++ b/examples/neb/in.neb.hop2 @@ -53,7 +53,7 @@ set group nebatoms type 3 group nonneb subtract all nebatoms fix 1 lower setforce 0.0 0.0 0.0 -fix 2 nebatoms neb 1.0 +fix 2 nebatoms neb 1.0 fix 3 all enforce2d thermo 100 @@ -65,4 +65,4 @@ thermo 100 min_style fire -neb 0.0 0.01 1000 1000 100 final final.hop2 +neb 0.0 0.05 1000 1000 100 final final.hop2 diff --git a/examples/neb/in.neb.sivac b/examples/neb/in.neb.sivac index 566a38062700ad58ee693e18a46069f984a09a6f..941d063b90e60a6b58b9ea1d4247de533fa80615 100644 --- a/examples/neb/in.neb.sivac +++ b/examples/neb/in.neb.sivac @@ -66,7 +66,7 @@ minimize 1.0e-6 1.0e-4 1000 10000 reset_timestep 0 -fix 1 all neb 1.0 +fix 1 all neb 1.0 thermo 100 diff --git a/examples/neb/initial.hop1.end b/examples/neb/initial.hop1.end new file mode 100644 index 0000000000000000000000000000000000000000..0357431fda071fa4a50dd58b71ec2ff1fca8d83c --- /dev/null +++ b/examples/neb/initial.hop1.end @@ -0,0 +1,860 @@ +LAMMPS data file via write_data, version 4 May 2017, timestep = 155 + +420 atoms +3 atom types + +0.0000000000000000e+00 2.2653923264628304e+01 xlo xhi +2.1918578738841410e-01 1.9932852254455714e+01 ylo yhi +-2.8317404080785380e-01 2.8317404080785380e-01 zlo zhi + +Masses + +1 1 +2 1 +3 1 + +Atoms # atomic + +1 2 0.0000000000000000e+00 2.2114806707013038e-01 0.0000000000000000e+00 0 0 0 +2 2 5.6634808161570760e-01 1.1832938184587634e+00 0.0000000000000000e+00 0 0 0 +3 2 1.1326961632314152e+00 2.2114806707013018e-01 0.0000000000000000e+00 0 0 0 +4 2 1.6990442448471228e+00 1.1832938184587634e+00 0.0000000000000000e+00 0 0 0 +5 2 2.2653923264628304e+00 2.2114806707013032e-01 0.0000000000000000e+00 0 0 0 +6 2 2.8317404080785380e+00 1.1832938184587634e+00 0.0000000000000000e+00 0 0 0 +7 2 3.3980884896942456e+00 2.2114806707013024e-01 0.0000000000000000e+00 0 0 0 +8 2 3.9644365713099532e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +9 2 4.5307846529256608e+00 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +10 2 5.0971327345413684e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +11 2 5.6634808161570760e+00 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +12 2 6.2298288977727836e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +13 2 6.7961769793884912e+00 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +14 2 7.3625250610041988e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +15 2 7.9288731426199064e+00 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +16 2 8.4952212242356140e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +17 2 9.0615693058513216e+00 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +18 2 9.6279173874670292e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +19 2 1.0194265469082737e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +20 2 1.0760613550698444e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +21 2 1.1326961632314152e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +22 2 1.1893309713929860e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +23 2 1.2459657795545567e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +24 2 1.3026005877161275e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +25 2 1.3592353958776982e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +26 2 1.4158702040392690e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +27 2 1.4725050122008398e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +28 2 1.5291398203624105e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +29 2 1.5857746285239813e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +30 2 1.6424094366855520e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +31 2 1.6990442448471228e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +32 2 1.7556790530086936e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +33 2 1.8123138611702643e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +34 2 1.8689486693318351e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +35 2 1.9255834774934058e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0 +36 2 1.9822182856549766e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0 +37 2 2.0388530938165474e+01 2.2114806707013024e-01 0.0000000000000000e+00 0 0 0 +38 2 2.0954879019781181e+01 1.1832938184587634e+00 0.0000000000000000e+00 0 0 0 +39 2 2.1521227101396889e+01 2.2114806707013043e-01 0.0000000000000000e+00 0 0 0 +40 2 2.2087575183012596e+01 1.1832938184587634e+00 0.0000000000000000e+00 0 0 0 +41 2 5.5197595012095140e-17 2.1414943053865136e+00 0.0000000000000000e+00 0 0 0 +42 1 5.6653050195082300e-01 3.1000166664180786e+00 0.0000000000000000e+00 0 0 0 +43 2 1.1326961632314152e+00 2.1414943053865136e+00 0.0000000000000000e+00 0 0 0 +44 1 1.6992713312703549e+00 3.1000339212153092e+00 0.0000000000000000e+00 0 0 0 +45 2 2.2653923264628304e+00 2.1414943053865136e+00 0.0000000000000000e+00 0 0 0 +46 1 2.8319979330663916e+00 3.1000568858502824e+00 0.0000000000000000e+00 0 0 0 +47 2 3.3980884896942456e+00 2.1414943053865136e+00 0.0000000000000000e+00 0 0 0 +48 1 3.9647072056144004e+00 3.1000829051868171e+00 0.0000000000000000e+00 0 0 0 +49 2 4.5307846529256608e+00 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +50 1 5.0973978903306154e+00 3.1001089282984520e+00 0.0000000000000000e+00 0 0 0 +51 2 5.6634808161570760e+00 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +52 1 6.2300706856774344e+00 3.1001320005511488e+00 0.0000000000000000e+00 0 0 0 +53 2 6.7961769793884912e+00 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +54 1 7.3627281418365298e+00 3.1001497026412643e+00 0.0000000000000000e+00 0 0 0 +55 2 7.9288731426199064e+00 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +56 1 8.4953743353575657e+00 3.1001604410839558e+00 0.0000000000000000e+00 0 0 0 +57 2 9.0615693058513216e+00 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +58 1 9.6280143647524650e+00 3.1001635457640377e+00 0.0000000000000000e+00 0 0 0 +59 2 1.0194265469082737e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +60 1 1.0760653757776259e+01 3.1001591904894030e+00 0.0000000000000000e+00 0 0 0 +61 2 1.1326961632314152e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +62 1 1.1893297897551465e+01 3.1001481997229781e+00 0.0000000000000000e+00 0 0 0 +63 2 1.2459657795545567e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +64 1 1.3025951551034638e+01 3.1001318239711781e+00 0.0000000000000000e+00 0 0 0 +65 2 1.3592353958776982e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +66 1 1.4158618530491893e+01 3.1001115545681470e+00 0.0000000000000000e+00 0 0 0 +67 2 1.4725050122008398e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +68 1 1.5291301468763761e+01 3.1000890162853869e+00 0.0000000000000000e+00 0 0 0 +69 2 1.5857746285239813e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +70 1 1.6424001663467980e+01 3.1000659357603495e+00 0.0000000000000000e+00 0 0 0 +71 2 1.6990442448471228e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +72 1 1.7556718955895743e+01 3.1000441476131195e+00 0.0000000000000000e+00 0 0 0 +73 2 1.8123138611702643e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +74 1 1.8689451647636982e+01 3.1000255781755963e+00 0.0000000000000000e+00 0 0 0 +75 2 1.9255834774934058e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0 +76 1 1.9822196505112320e+01 3.1000121466922494e+00 0.0000000000000000e+00 0 0 0 +77 2 2.0388530938165474e+01 2.1414943053865136e+00 0.0000000000000000e+00 0 0 0 +78 1 2.0954948927196146e+01 3.1000055506449713e+00 0.0000000000000000e+00 0 0 0 +79 2 2.1521227101396889e+01 2.1414943053865141e+00 0.0000000000000000e+00 0 0 0 +80 1 2.2087703334137267e+01 3.1000069547492535e+00 0.0000000000000000e+00 0 0 0 +81 1 3.1056926716504509e-04 4.0585004644184055e+00 0.0000000000000000e+00 0 0 0 +82 1 5.6689331628382078e-01 5.0169857265632762e+00 0.0000000000000000e+00 0 0 0 +83 1 1.1331010876667682e+00 4.0585336877518543e+00 0.0000000000000000e+00 0 0 0 +84 1 1.6997107179473134e+00 5.0170595571637469e+00 0.0000000000000000e+00 0 0 0 +85 1 2.2658691471408239e+00 4.0585832735991989e+00 0.0000000000000000e+00 0 0 0 +86 1 2.8324913387275488e+00 5.0171576059016481e+00 0.0000000000000000e+00 0 0 0 +87 1 3.3986077265334802e+00 4.0586437183143182e+00 0.0000000000000000e+00 0 0 0 +88 1 3.9652274946581523e+00 5.0172690174612651e+00 0.0000000000000000e+00 0 0 0 +89 1 4.5313127263524615e+00 4.0587080892871539e+00 0.0000000000000000e+00 0 0 0 +90 1 5.0979153202534064e+00 5.0173813990872880e+00 0.0000000000000000e+00 0 0 0 +91 1 5.6639833195247755e+00 4.0587690704404489e+00 0.0000000000000000e+00 0 0 0 +92 1 6.2305551824295442e+00 5.0174824868813017e+00 0.0000000000000000e+00 0 0 0 +93 1 6.7966220748571669e+00 4.0588200390400129e+00 0.0000000000000000e+00 0 0 0 +94 1 7.3631519876339633e+00 5.0175617795367824e+00 0.0000000000000000e+00 0 0 0 +95 1 7.9292347620768062e+00 4.0588559557915787e+00 0.0000000000000000e+00 0 0 0 +96 1 8.4957150696300925e+00 5.0176118394895646e+00 0.0000000000000000e+00 0 0 0 +97 1 9.0618297669257259e+00 4.0588738859603266e+00 0.0000000000000000e+00 0 0 0 +98 1 9.6282574219214077e+00 5.0176289672989007e+00 0.0000000000000000e+00 0 0 0 +99 1 1.0194417159454611e+01 4.0588730767572860e+00 0.0000000000000000e+00 0 0 0 +100 1 1.0760794315385466e+01 5.0176131474245498e+00 0.0000000000000000e+00 0 0 0 +101 1 1.1327007580768864e+01 4.0588546552053515e+00 0.0000000000000000e+00 0 0 0 +102 1 1.1893341583868121e+01 5.0175674119454996e+00 0.0000000000000000e+00 0 0 0 +103 1 1.2459611156068675e+01 4.0588211205885418e+00 0.0000000000000000e+00 0 0 0 +104 1 1.3025913928919357e+01 5.0174969437432848e+00 0.0000000000000000e+00 0 0 0 +105 1 1.3592236588154931e+01 4.0587758328652299e+00 0.0000000000000000e+00 0 0 0 +106 1 1.4158523495745847e+01 5.0174082592346645e+00 0.0000000000000000e+00 0 0 0 +107 1 1.4724890484932756e+01 4.0587226477181808e+00 0.0000000000000000e+00 0 0 0 +108 1 1.5291178803597106e+01 5.0173086870307237e+00 0.0000000000000000e+00 0 0 0 +109 1 1.5857576888353359e+01 4.0586657476126140e+00 0.0000000000000000e+00 0 0 0 +110 1 1.6423884000624799e+01 5.0172061640888863e+00 0.0000000000000000e+00 0 0 0 +111 1 1.6990296946466405e+01 4.0586096139851531e+00 0.0000000000000000e+00 0 0 0 +112 1 1.7556638404998214e+01 5.0171091825602536e+00 0.0000000000000000e+00 0 0 0 +113 1 1.8123048711157228e+01 4.0585590083330025e+00 0.0000000000000000e+00 0 0 0 +114 1 1.8689436384449273e+01 5.0170266065355777e+00 0.0000000000000000e+00 0 0 0 +115 1 1.9255827121600600e+01 4.0585188068824696e+00 0.0000000000000000e+00 0 0 0 +116 1 1.9822267727126505e+01 5.0169670887341100e+00 0.0000000000000000e+00 0 0 0 +117 1 2.0388624292977298e+01 4.0584935738800203e+00 0.0000000000000000e+00 0 0 0 +118 1 2.0955118660666272e+01 5.0169379847636248e+00 0.0000000000000000e+00 0 0 0 +119 1 2.1521430213723754e+01 4.0584868720623906e+00 0.0000000000000000e+00 0 0 0 +120 1 2.2087973498256840e+01 5.0169439545250629e+00 0.0000000000000000e+00 0 0 0 +121 1 6.5693888433665819e-04 5.9753894955957820e+00 0.0000000000000000e+00 0 0 0 +122 1 5.6732815172745055e-01 6.9338399304063270e+00 0.0000000000000000e+00 0 0 0 +123 1 1.1335287178365945e+00 5.9754794631117711e+00 0.0000000000000000e+00 0 0 0 +124 1 1.7002170239497103e+00 6.9340002985609068e+00 0.0000000000000000e+00 0 0 0 +125 1 2.2663607603415961e+00 5.9756128140497200e+00 0.0000000000000000e+00 0 0 0 +126 1 2.8330530781363672e+00 6.9342120448719999e+00 0.0000000000000000e+00 0 0 0 +127 1 3.3991419489134609e+00 5.9757753571666763e+00 0.0000000000000000e+00 0 0 0 +128 1 3.9658260192240613e+00 6.9344537679961507e+00 0.0000000000000000e+00 0 0 0 +129 1 4.5318648361825700e+00 5.9759497256682410e+00 0.0000000000000000e+00 0 0 0 +130 1 5.0985283212441441e+00 6.9347011709619251e+00 0.0000000000000000e+00 0 0 0 +131 1 5.6645260085061278e+00 5.9761173336292988e+00 0.0000000000000000e+00 0 0 0 +132 1 6.2311560978848899e+00 6.9349290616610286e+00 0.0000000000000000e+00 0 0 0 +133 1 6.7971269443565747e+00 5.9762605495489280e+00 0.0000000000000000e+00 0 0 0 +134 1 7.3637111409380722e+00 6.9351139899270322e+00 0.0000000000000000e+00 0 0 0 +135 1 7.9296749255956041e+00 5.9763649377635293e+00 0.0000000000000000e+00 0 0 0 +136 1 8.4962030194710927e+00 6.9352373287964380e+00 0.0000000000000000e+00 0 0 0 +137 1 9.0621832354873675e+00 5.9764210443253543e+00 0.0000000000000000e+00 0 0 0 +138 1 9.6286498073427680e+00 6.9352878316174378e+00 0.0000000000000000e+00 0 0 0 +139 1 1.0194670187278859e+01 5.9764252099845692e+00 0.0000000000000000e+00 0 0 0 +140 1 1.0761076657788550e+01 6.9352626758233988e+00 0.0000000000000000e+00 0 0 0 +141 1 1.1327157075090911e+01 5.9763792530010624e+00 0.0000000000000000e+00 0 0 0 +142 1 1.1893512574619942e+01 6.9351667929726579e+00 0.0000000000000000e+00 0 0 0 +143 1 1.2459665575239395e+01 5.9762893326393627e+00 0.0000000000000000e+00 0 0 0 +144 1 1.3025986375565017e+01 6.9350110693313853e+00 0.0000000000000000e+00 0 0 0 +145 1 1.3592215193397568e+01 5.9761645662970668e+00 0.0000000000000000e+00 0 0 0 +146 1 1.4158522852873338e+01 6.9348103731164086e+00 0.0000000000000000e+00 0 0 0 +147 1 1.4724821146400661e+01 5.9760159211600943e+00 0.0000000000000000e+00 0 0 0 +148 1 1.5291139696354101e+01 6.9345821426564456e+00 0.0000000000000000e+00 0 0 0 +149 1 1.5857492801384174e+01 5.9758556179168476e+00 0.0000000000000000e+00 0 0 0 +150 1 1.6423845342403485e+01 6.9343457083902562e+00 0.0000000000000000e+00 0 0 0 +151 1 1.6990232659392920e+01 5.9756969240409656e+00 0.0000000000000000e+00 0 0 0 +152 1 1.7556637811168688e+01 6.9341219348234606e+00 0.0000000000000000e+00 0 0 0 +153 1 1.8123035927190724e+01 5.9755539362099377e+00 0.0000000000000000e+00 0 0 0 +154 1 1.8689504786395585e+01 6.9339324030079297e+00 0.0000000000000000e+00 0 0 0 +155 1 1.9255890870078105e+01 5.9754408656724385e+00 0.0000000000000000e+00 0 0 0 +156 1 1.9822425360651039e+01 6.9337974543626846e+00 0.0000000000000000e+00 0 0 0 +157 1 2.0388780199969101e+01 5.9753705251759808e+00 0.0000000000000000e+00 0 0 0 +158 1 2.0955373611320280e+01 6.9337330661143222e+00 0.0000000000000000e+00 0 0 0 +159 1 2.1521683507254988e+01 5.9753521824721574e+00 0.0000000000000000e+00 0 0 0 +160 1 2.2088323232189435e+01 6.9337475792566039e+00 0.0000000000000000e+00 0 0 0 +161 1 1.1151815023353693e-03 7.8921416571122727e+00 0.0000000000000000e+00 0 0 0 +162 1 5.6789887436851039e-01 8.8505576275120745e+00 0.0000000000000000e+00 0 0 0 +163 1 1.1340615020344891e+00 7.8923152028921146e+00 0.0000000000000000e+00 0 0 0 +164 1 1.7008494977197184e+00 8.8508369646616227e+00 0.0000000000000000e+00 0 0 0 +165 1 2.2669564852467339e+00 7.8925678787693965e+00 0.0000000000000000e+00 0 0 0 +166 1 2.8337429988374914e+00 8.8512007799959171e+00 0.0000000000000000e+00 0 0 0 +167 1 3.3997890834793392e+00 7.8928753791925752e+00 0.0000000000000000e+00 0 0 0 +168 1 3.9665700017177907e+00 8.8516188392723496e+00 0.0000000000000000e+00 0 0 0 +169 1 4.5325491541722158e+00 7.8932093579911635e+00 0.0000000000000000e+00 0 0 0 +170 1 5.0993179760197034e+00 8.8520570451664753e+00 0.0000000000000000e+00 0 0 0 +171 1 5.6652272696563086e+00 7.8935385042762318e+00 0.0000000000000000e+00 0 0 0 +172 1 6.2319722558852177e+00 8.8524768944511472e+00 0.0000000000000000e+00 0 0 0 +173 1 6.7978170214800082e+00 7.8938302754648539e+00 0.0000000000000000e+00 0 0 0 +174 1 7.3645207249719933e+00 8.8528366651387476e+00 0.0000000000000000e+00 0 0 0 +175 1 7.9303191911043118e+00 7.8940542651579788e+00 0.0000000000000000e+00 0 0 0 +176 1 8.4969615618418324e+00 8.8530963542120293e+00 0.0000000000000000e+00 0 0 0 +177 1 9.0627458585593441e+00 7.8941868850969135e+00 0.0000000000000000e+00 0 0 0 +178 1 9.6293104463590424e+00 8.8532254399208412e+00 0.0000000000000000e+00 0 0 0 +179 1 1.0195121730902658e+01 7.8942152485172352e+00 0.0000000000000000e+00 0 0 0 +180 1 1.0761602408503441e+01 8.8532092085980238e+00 0.0000000000000000e+00 0 0 0 +181 1 1.1327481649719793e+01 7.8941385508356099e+00 0.0000000000000000e+00 0 0 0 +182 1 1.1893886870241856e+01 8.8530505445055354e+00 0.0000000000000000e+00 0 0 0 +183 1 1.2459865179342737e+01 7.8939667557582798e+00 0.0000000000000000e+00 0 0 0 +184 1 1.3026218291904378e+01 8.8527674547956821e+00 0.0000000000000000e+00 0 0 0 +185 1 1.3592310202433307e+01 7.8937178025905181e+00 0.0000000000000000e+00 0 0 0 +186 1 1.4158645900042497e+01 8.8523887379317436e+00 0.0000000000000000e+00 0 0 0 +187 1 1.4724847145311326e+01 7.8934149070498600e+00 0.0000000000000000e+00 0 0 0 +188 1 1.5291205081244327e+01 8.8519503874602243e+00 0.0000000000000000e+00 0 0 0 +189 1 1.5857494607334019e+01 7.8930848995638652e+00 0.0000000000000000e+00 0 0 0 +190 1 1.6423911366860466e+01 8.8514936483282209e+00 0.0000000000000000e+00 0 0 0 +191 1 1.6990256625068444e+01 7.8927574412240151e+00 0.0000000000000000e+00 0 0 0 +192 1 1.7556757521848787e+01 8.8510636099500459e+00 0.0000000000000000e+00 0 0 0 +193 1 1.8123121878813144e+01 7.8924640508501298e+00 0.0000000000000000e+00 0 0 0 +194 1 1.8689714850348466e+01 8.8507060559423465e+00 0.0000000000000000e+00 0 0 0 +195 1 1.9256065579477248e+01 7.8922356001392169e+00 0.0000000000000000e+00 0 0 0 +196 1 1.9822740225596814e+01 8.8504608774193994e+00 0.0000000000000000e+00 0 0 0 +197 1 2.0389054599310764e+01 7.8920977743942782e+00 0.0000000000000000e+00 0 0 0 +198 1 2.0955788196198530e+01 8.8503534864083591e+00 0.0000000000000000e+00 0 0 0 +199 1 2.1522054950758765e+01 7.8920658349416701e+00 0.0000000000000000e+00 0 0 0 +200 1 2.2088823030833748e+01 8.8503894045591807e+00 0.0000000000000000e+00 0 0 0 +201 1 1.7402898961801966e-03 9.8087331458102049e+00 0.0000000000000000e+00 0 0 0 +202 1 5.6862550253253785e-01 1.0767129063577668e+01 0.0000000000000000e+00 0 0 0 +203 1 1.1347351125604563e+00 9.8090210312609756e+00 0.0000000000000000e+00 0 0 0 +204 1 1.7016010961270076e+00 1.0767553944884048e+01 0.0000000000000000e+00 0 0 0 +205 1 2.2676800733457139e+00 9.8094251915038573e+00 0.0000000000000000e+00 0 0 0 +206 1 2.8345388558320415e+00 1.0768094021206529e+01 0.0000000000000000e+00 0 0 0 +207 1 3.4005711921286008e+00 9.8099146303251388e+00 0.0000000000000000e+00 0 0 0 +208 1 3.9674359888022686e+00 1.0768719604543580e+01 0.0000000000000000e+00 0 0 0 +209 1 4.5333977826109315e+00 9.8104561733570019e+00 0.0000000000000000e+00 0 0 0 +210 1 5.1002760963180327e+00 1.0769398202643465e+01 0.0000000000000000e+00 0 0 0 +211 1 5.6661407887052828e+00 9.8110111848429966e+00 0.0000000000000000e+00 0 0 0 +212 1 6.2330282022400469e+00 1.0770087202120337e+01 0.0000000000000000e+00 0 0 0 +213 1 6.7987755062394477e+00 9.8115326503110527e+00 0.0000000000000000e+00 0 0 0 +214 1 7.3656514287550623e+00 1.0770727843890981e+01 0.0000000000000000e+00 0 0 0 +215 1 7.9312798141889260e+00 9.8119658218493768e+00 0.0000000000000000e+00 0 0 0 +216 1 8.4981076412551477e+00 1.0771244633836279e+01 0.0000000000000000e+00 0 0 0 +217 1 9.0636474998261161e+00 9.8122560909429151e+00 0.0000000000000000e+00 0 0 0 +218 1 9.6303843877347930e+00 1.0771559046035311e+01 0.0000000000000000e+00 0 0 0 +219 1 1.0195900672859819e+01 9.8123627359180627e+00 0.0000000000000000e+00 0 0 0 +220 1 1.0762516251278290e+01 1.0771614844517241e+01 0.0000000000000000e+00 0 0 0 +221 1 1.1328091472906591e+01 9.8122692653101016e+00 0.0000000000000000e+00 0 0 0 +222 1 1.1894584725285364e+01 1.0771394980275380e+01 0.0000000000000000e+00 0 0 0 +223 1 1.2460291956550108e+01 9.8119854743716211e+00 0.0000000000000000e+00 0 0 0 +224 1 1.3026697175518089e+01 1.0770922584297365e+01 0.0000000000000000e+00 0 0 0 +225 1 1.3592577560562113e+01 9.8115426529845742e+00 0.0000000000000000e+00 0 0 0 +226 1 1.4158957523975143e+01 1.0770251678533704e+01 0.0000000000000000e+00 0 0 0 +227 1 1.4725010595311739e+01 9.8109868569230709e+00 0.0000000000000000e+00 0 0 0 +228 1 1.5291439665423439e+01 1.0769456959141509e+01 0.0000000000000000e+00 0 0 0 +229 1 1.5857627568713173e+01 9.8103742214932304e+00 0.0000000000000000e+00 0 0 0 +230 1 1.6424169320270668e+01 1.0768628052568168e+01 0.0000000000000000e+00 0 0 0 +231 1 1.6990431516954079e+01 9.8097684628141781e+00 0.0000000000000000e+00 0 0 0 +232 1 1.7557116532362020e+01 1.0767864432631596e+01 0.0000000000000000e+00 0 0 0 +233 1 1.8123390991250901e+01 9.8092369760472078e+00 0.0000000000000000e+00 0 0 0 +234 1 1.8690204705628890e+01 1.0767262063551410e+01 0.0000000000000000e+00 0 0 0 +235 1 1.9256448808830498e+01 9.8088413825519911e+00 0.0000000000000000e+00 0 0 0 +236 1 1.9823340586830241e+01 1.0766888821404979e+01 0.0000000000000000e+00 0 0 0 +237 1 2.0389541413400988e+01 9.8086229912274785e+00 0.0000000000000000e+00 0 0 0 +238 1 2.0956458511796701e+01 1.0766759511236279e+01 0.0000000000000000e+00 0 0 0 +239 1 2.1522621458778595e+01 9.8085916713182311e+00 0.0000000000000000e+00 0 0 0 +240 1 2.2089529168272502e+01 1.0766851883618157e+01 0.0000000000000000e+00 0 0 0 +241 1 2.5440858595377333e-03 1.1725176449724485e+01 0.0000000000000000e+00 0 0 0 +242 1 5.6945959459694062e-01 1.2683596360703445e+01 0.0000000000000000e+00 0 0 0 +243 1 1.1355189649219313e+00 1.1725603142335736e+01 0.0000000000000000e+00 0 0 0 +244 1 1.7023827890664067e+00 1.2684167657575470e+01 0.0000000000000000e+00 0 0 0 +245 1 2.2684713496063051e+00 1.1726169790097240e+01 0.0000000000000000e+00 0 0 0 +246 1 2.8353214317297493e+00 1.2684869845626739e+01 0.0000000000000000e+00 0 0 0 +247 1 3.4014115221528614e+00 1.1726849793467629e+01 0.0000000000000000e+00 0 0 0 +248 1 3.9682847366436711e+00 1.2685690043118647e+01 0.0000000000000000e+00 0 0 0 +249 1 4.5343333925353440e+00 1.1727620546655658e+01 0.0000000000000000e+00 0 0 0 +250 1 5.1012595788864648e+00 1.2686617936467927e+01 0.0000000000000000e+00 0 0 0 +251 1 5.6672100999124009e+00 1.1728453321807010e+01 0.0000000000000000e+00 0 0 0 +252 1 6.2342050679378476e+00 1.2687631443781253e+01 0.0000000000000000e+00 0 0 0 +253 1 6.7999929539663801e+00 1.1729301393807379e+01 0.0000000000000000e+00 0 0 0 +254 1 7.3670487632296053e+00 1.2688678524169049e+01 0.0000000000000000e+00 0 0 0 +255 1 7.9326168577620031e+00 1.1730088752185795e+01 0.0000000000000000e+00 0 0 0 +256 1 8.4996909972151879e+00 1.2689657545646673e+01 0.0000000000000000e+00 0 0 0 +257 1 9.0650186324858186e+00 1.1730705889838760e+01 0.0000000000000000e+00 0 0 0 +258 1 9.6320279172941738e+00 1.2690401359419884e+01 0.0000000000000000e+00 0 0 0 +259 1 1.0197176988949883e+01 1.1731033591325737e+01 0.0000000000000000e+00 0 0 0 +260 1 1.0764025265158372e+01 1.2690719979755405e+01 0.0000000000000000e+00 0 0 0 +261 1 1.1329151471753224e+01 1.1730992082437087e+01 0.0000000000000000e+00 0 0 0 +262 1 1.1895793457864773e+01 1.2690499952724066e+01 0.0000000000000000e+00 0 0 0 +263 1 1.2461074954083520e+01 1.1730568008302011e+01 0.0000000000000000e+00 0 0 0 +264 1 1.3027557263784812e+01 1.2689744664661927e+01 0.0000000000000000e+00 0 0 0 +265 1 1.3593109813371450e+01 1.1729811224797992e+01 0.0000000000000000e+00 0 0 0 +266 1 1.4159562461497188e+01 1.2688557996910490e+01 0.0000000000000000e+00 0 0 0 +267 1 1.4725395146103379e+01 1.1728816806025771e+01 0.0000000000000000e+00 0 0 0 +268 1 1.5291979336937130e+01 1.2687112353846338e+01 0.0000000000000000e+00 0 0 0 +269 1 1.5858003969640130e+01 1.1727709969544065e+01 0.0000000000000000e+00 0 0 0 +270 1 1.6424834380846097e+01 1.2685627373535834e+01 0.0000000000000000e+00 0 0 0 +271 1 1.6990919595491782e+01 1.1726637864021814e+01 0.0000000000000000e+00 0 0 0 +272 1 1.7557997576834389e+01 1.2684345428666392e+01 0.0000000000000000e+00 0 0 0 +273 1 1.8124040077451223e+01 1.1725751812758334e+01 0.0000000000000000e+00 0 0 0 +274 1 1.8691233014266899e+01 1.2683475493290855e+01 0.0000000000000000e+00 0 0 0 +275 1 1.9257221467828444e+01 1.1725167602985902e+01 0.0000000000000000e+00 0 0 0 +276 1 1.9824402296022900e+01 1.2683041511515679e+01 0.0000000000000000e+00 0 0 0 +277 1 2.0390369399207284e+01 1.1724905550220807e+01 0.0000000000000000e+00 0 0 0 +278 1 2.0957468093457749e+01 1.2682973372169659e+01 0.0000000000000000e+00 0 0 0 +279 1 2.1523449818304549e+01 1.1724927159323300e+01 0.0000000000000000e+00 0 0 0 +280 1 2.2090449109149038e+01 1.2683182366055206e+01 0.0000000000000000e+00 0 0 0 +281 1 3.4599372752678664e-03 1.3641562113178441e+01 0.0000000000000000e+00 0 0 0 +282 1 5.7026645299712297e-01 1.4600072778762289e+01 0.0000000000000000e+00 0 0 0 +283 1 1.1363018176625184e+00 1.3642122621820601e+01 0.0000000000000000e+00 0 0 0 +284 1 1.7030304397851530e+00 1.4600744808517264e+01 0.0000000000000000e+00 0 0 0 +285 1 2.2691832178537314e+00 1.3642819174389231e+01 0.0000000000000000e+00 0 0 0 +286 1 2.8358920720180945e+00 1.4601540713388465e+01 0.0000000000000000e+00 0 0 0 +287 1 3.4021290160499604e+00 1.3643644649633430e+01 0.0000000000000000e+00 0 0 0 +288 1 3.9688713598453158e+00 1.4602477110803298e+01 0.0000000000000000e+00 0 0 0 +289 1 4.5351441054704758e+00 1.3644605343002951e+01 0.0000000000000000e+00 0 0 0 +290 1 5.1019693529741863e+00 1.4603583498422479e+01 0.0000000000000000e+00 0 0 0 +291 1 5.6682074099496385e+00 1.3645707993168530e+01 0.0000000000000000e+00 0 0 0 +292 1 6.2351598742721581e+00 1.4604888485699654e+01 0.0000000000000000e+00 0 0 0 +293 1 6.8012626955016664e+00 1.3646941178415421e+01 0.0000000000000000e+00 0 0 0 +294 1 7.3683745276621622e+00 1.4606397643582930e+01 0.0000000000000000e+00 0 0 0 +295 1 7.9342088141787288e+00 1.3648248663869856e+01 0.0000000000000000e+00 0 0 0 +296 1 8.5014805494154277e+00 1.4608055299059714e+01 0.0000000000000000e+00 0 0 0 +297 1 9.0668968877652869e+00 1.3649493406156790e+01 0.0000000000000000e+00 0 0 0 +298 1 9.6342620410232698e+00 1.4609684783907733e+01 0.0000000000000000e+00 0 0 0 +299 1 1.0199152905272882e+01 1.3650407439181874e+01 0.0000000000000000e+00 0 0 0 +300 1 1.0766413330496736e+01 1.4610836305969919e+01 0.0000000000000000e+00 0 0 0 +301 1 1.1330921232704116e+01 1.3650669393063648e+01 0.0000000000000000e+00 0 0 0 +302 1 1.1897841502623006e+01 1.4610930556808350e+01 0.0000000000000000e+00 0 0 0 +303 1 1.2462421601307861e+01 1.3650120591395567e+01 0.0000000000000000e+00 0 0 0 +304 1 1.3029018938638984e+01 1.4609761134444172e+01 0.0000000000000000e+00 0 0 0 +305 1 1.3594046992755665e+01 1.3648816127477271e+01 0.0000000000000000e+00 0 0 0 +306 1 1.4160647838817273e+01 1.4607529632893289e+01 0.0000000000000000e+00 0 0 0 +307 1 1.4726158301286814e+01 1.3646976680611393e+01 0.0000000000000000e+00 0 0 0 +308 1 1.5293172565463893e+01 1.4604737496674128e+01 0.0000000000000000e+00 0 0 0 +309 1 1.5858902039329786e+01 1.3644935977195637e+01 0.0000000000000000e+00 0 0 0 +310 1 1.6426542400228328e+01 1.4602024837155536e+01 0.0000000000000000e+00 0 0 0 +311 1 1.6992136000094352e+01 1.3643085312965626e+01 0.0000000000000000e+00 0 0 0 +312 1 1.7559955069272618e+01 1.4600235886953440e+01 0.0000000000000000e+00 0 0 0 +313 1 1.8125444003110619e+01 1.3641827838397322e+01 0.0000000000000000e+00 0 0 0 +314 1 1.8693119558449155e+01 1.4599324386000902e+01 0.0000000000000000e+00 0 0 0 +315 1 1.9258622464532973e+01 1.3641178402937728e+01 0.0000000000000000e+00 0 0 0 +316 1 1.9826034884164418e+01 1.4599040434292966e+01 0.0000000000000000e+00 0 0 0 +317 1 2.0391639577074734e+01 1.3641005830730871e+01 0.0000000000000000e+00 0 0 0 +318 1 2.0958793244869717e+01 1.4599158580846662e+01 0.0000000000000000e+00 0 0 0 +319 1 2.1524539566888354e+01 1.3641168546895004e+01 0.0000000000000000e+00 0 0 0 +320 1 2.2091487241099536e+01 1.4599530965127977e+01 0.0000000000000000e+00 0 0 0 +321 1 4.3165808837657372e-03 1.5558056864296654e+01 0.0000000000000000e+00 0 0 0 +322 1 5.7083714563799326e-01 1.6516708751979007e+01 0.0000000000000000e+00 0 0 0 +323 1 1.1369032012442155e+00 1.5558694023524415e+01 0.0000000000000000e+00 0 0 0 +324 1 1.7033478671452040e+00 1.6517390053019362e+01 0.0000000000000000e+00 0 0 0 +325 1 2.2696166894760093e+00 1.5559438272441989e+01 0.0000000000000000e+00 0 0 0 +326 1 2.8360390517176817e+00 1.6518173675862919e+01 0.0000000000000000e+00 0 0 0 +327 1 3.4024835472607537e+00 1.5560308985846410e+01 0.0000000000000000e+00 0 0 0 +328 1 3.9689280293273477e+00 1.6519102391285173e+01 0.0000000000000000e+00 0 0 0 +329 1 4.5355194771089229e+00 1.5561347597079759e+01 0.0000000000000000e+00 0 0 0 +330 1 5.1020300316691811e+00 1.6520241802728972e+01 0.0000000000000000e+00 0 0 0 +331 1 5.6687261068083830e+00 1.5562609975559823e+01 0.0000000000000000e+00 0 0 0 +332 1 6.2353553251066147e+00 1.6521676870813170e+01 0.0000000000000000e+00 0 0 0 +333 1 6.8020803209306022e+00 1.5564154380715671e+01 0.0000000000000000e+00 0 0 0 +334 1 7.3688969132482525e+00 1.6523504940606951e+01 0.0000000000000000e+00 0 0 0 +335 1 7.9355107433004930e+00 1.5566016713593251e+01 0.0000000000000000e+00 0 0 0 +336 1 8.5025988576228730e+00 1.6525815249413437e+01 0.0000000000000000e+00 0 0 0 +337 1 9.0688572861916743e+00 1.5568159644275148e+01 0.0000000000000000e+00 0 0 0 +338 1 9.6362867759002064e+00 1.6528628994685398e+01 0.0000000000000000e+00 0 0 0 +339 1 1.0201819017728946e+01 1.5570385216229328e+01 0.0000000000000000e+00 0 0 0 +340 1 1.0769583800973923e+01 1.6531845844888416e+01 0.0000000000000000e+00 0 0 0 +341 1 1.1333827071325254e+01 1.5571891707346142e+01 0.0000000000000000e+00 0 0 0 +342 1 1.1901384079738815e+01 1.6533906438348030e+01 0.0000000000000000e+00 0 0 0 +343 1 1.2464735369608531e+01 1.5571536667850689e+01 0.0000000000000000e+00 0 0 0 +344 1 1.3031616332415751e+01 1.6532264065445577e+01 0.0000000000000000e+00 0 0 0 +345 1 1.3595655007692846e+01 1.5569123466092078e+01 0.0000000000000000e+00 0 0 0 +346 1 1.4162673858547034e+01 1.6527452690896975e+01 0.0000000000000000e+00 0 0 0 +347 1 1.4727720485417455e+01 1.5565353830287787e+01 0.0000000000000000e+00 0 0 0 +348 1 1.5296922688141537e+01 1.6520776714395723e+01 0.0000000000000000e+00 0 0 0 +349 1 1.5861362778086731e+01 1.5561222799294468e+01 0.0000000000000000e+00 0 0 0 +350 1 1.6430601172841506e+01 1.6517208583528205e+01 0.0000000000000000e+00 0 0 0 +351 1 1.6994922716392164e+01 1.5558699760506759e+01 0.0000000000000000e+00 0 0 0 +352 1 1.7563499738678178e+01 1.6515675589984340e+01 0.0000000000000000e+00 0 0 0 +353 1 1.8128019408521833e+01 1.5557486534354460e+01 0.0000000000000000e+00 0 0 0 +354 1 1.8695872352982320e+01 1.6515215010141834e+01 0.0000000000000000e+00 0 0 0 +355 1 1.9260742856192188e+01 1.5557098710840322e+01 0.0000000000000000e+00 0 0 0 +356 1 1.9828039670247652e+01 1.6515278710047973e+01 0.0000000000000000e+00 0 0 0 +357 1 2.0393271644052639e+01 1.5557181842904438e+01 0.0000000000000000e+00 0 0 0 +358 1 2.0960186167839215e+01 1.6515618076673135e+01 0.0000000000000000e+00 0 0 0 +359 1 2.1525740973565441e+01 1.5557536533469163e+01 0.0000000000000000e+00 0 0 0 +360 1 2.2092409007659992e+01 1.6516113225524911e+01 0.0000000000000000e+00 0 0 0 +361 1 4.8529541639103424e-03 1.7474826118864232e+01 0.0000000000000000e+00 0 0 0 +362 1 5.7093835977538809e-01 1.8433654344787417e+01 0.0000000000000000e+00 0 0 0 +363 1 1.1371184592334547e+00 1.7475436697267657e+01 0.0000000000000000e+00 0 0 0 +364 1 1.7031721612449391e+00 1.8434245395923575e+01 0.0000000000000000e+00 0 0 0 +365 1 2.2695912343467985e+00 1.7476125500853652e+01 0.0000000000000000e+00 0 0 0 +366 1 2.8356286616018873e+00 1.8434926390559969e+01 0.0000000000000000e+00 0 0 0 +367 1 3.4022762478393873e+00 1.7476929029147396e+01 0.0000000000000000e+00 0 0 0 +368 1 3.9683006032101118e+00 1.8435747821965681e+01 0.0000000000000000e+00 0 0 0 +369 1 4.5351848152653442e+00 1.7477909278192492e+01 0.0000000000000000e+00 0 0 0 +370 1 5.1011966627212457e+00 1.8436786379097875e+01 0.0000000000000000e+00 0 0 0 +371 1 5.6683370882181823e+00 1.7479154484464996e+01 0.0000000000000000e+00 0 0 0 +372 1 6.2343468444568826e+00 1.8438149387991913e+01 0.0000000000000000e+00 0 0 0 +373 1 6.8017614989468314e+00 1.7480782103907771e+01 0.0000000000000000e+00 0 0 0 +374 1 7.3678106610096554e+00 1.8439986240742563e+01 0.0000000000000000e+00 0 0 0 +375 1 7.9354841298542631e+00 1.7482944033551004e+01 0.0000000000000000e+00 0 0 0 +376 1 8.5016878917981664e+00 1.8442513240991936e+01 0.0000000000000000e+00 0 0 0 +377 1 9.0694835466447152e+00 1.7485826099637269e+01 0.0000000000000000e+00 0 0 0 +378 1 9.6361298737773780e+00 1.8446058020412732e+01 0.0000000000000000e+00 0 0 0 +379 3 1.0203552137528503e+01 1.7489611887918201e+01 0.0000000000000000e+00 0 0 0 +380 3 1.0771411102042309e+01 1.8452757080501815e+01 0.0000000000000000e+00 0 0 0 +381 3 1.1337379036649208e+01 1.7494680029430171e+01 0.0000000000000000e+00 0 0 0 +382 3 1.1906745469447291e+01 1.8462022833637111e+01 0.0000000000000000e+00 0 0 0 +383 3 1.2469280552911147e+01 1.7497259990645460e+01 0.0000000000000000e+00 0 0 0 +384 3 1.3036336275216634e+01 1.8467761987124586e+01 0.0000000000000000e+00 0 0 0 +385 3 1.3598327635293121e+01 1.7492598542232617e+01 0.0000000000000000e+00 0 0 0 +386 3 1.4175415687021504e+01 1.8442378825657116e+01 0.0000000000000000e+00 0 0 0 +387 3 1.4734039006741700e+01 1.7480807237018496e+01 0.0000000000000000e+00 0 0 0 +388 3 1.5307067776131944e+01 1.8435324828663727e+01 0.0000000000000000e+00 0 0 0 +389 3 1.5867533841026015e+01 1.7475841850521086e+01 0.0000000000000000e+00 0 0 0 +390 3 1.6437492616293390e+01 1.8432690656917494e+01 0.0000000000000000e+00 0 0 0 +391 3 1.6999775067986949e+01 1.7473899503923125e+01 0.0000000000000000e+00 0 0 0 +392 3 1.7567888910690055e+01 1.8431965422256074e+01 0.0000000000000000e+00 0 0 0 +393 1 1.8131542880564929e+01 1.7473449678636637e+01 0.0000000000000000e+00 0 0 0 +394 1 1.8698655140485940e+01 1.8432031695013379e+01 0.0000000000000000e+00 0 0 0 +395 1 1.9263155459010161e+01 1.7473512474723787e+01 0.0000000000000000e+00 0 0 0 +396 1 1.9829735002682860e+01 1.8432284732233299e+01 0.0000000000000000e+00 0 0 0 +397 1 2.0394851027090610e+01 1.7473827778588312e+01 0.0000000000000000e+00 0 0 0 +398 1 2.0961149629160545e+01 1.8432663919612150e+01 0.0000000000000000e+00 0 0 0 +399 1 2.1526715232103822e+01 1.7474283624212315e+01 0.0000000000000000e+00 0 0 0 +400 1 2.2092870034810801e+01 1.8433127631588540e+01 0.0000000000000000e+00 0 0 0 +401 1 4.8013042136807321e-03 1.9396043340320471e+01 0.0000000000000000e+00 0 0 0 +402 1 1.1367941837037772e+00 1.9396530996055038e+01 0.0000000000000000e+00 0 0 0 +403 1 2.2690256110731926e+00 1.9397098404349112e+01 0.0000000000000000e+00 0 0 0 +404 1 3.4014603716469214e+00 1.9397780714315690e+01 0.0000000000000000e+00 0 0 0 +405 1 4.5340819027961929e+00 1.9398637844588755e+01 0.0000000000000000e+00 0 0 0 +406 1 5.6668967352607975e+00 1.9399756780683603e+01 0.0000000000000000e+00 0 0 0 +407 1 6.7999442980869498e+00 1.9401258783163115e+01 0.0000000000000000e+00 0 0 0 +408 1 7.9333186797521877e+00 1.9403316975932661e+01 0.0000000000000000e+00 0 0 0 +409 1 9.0672196895765094e+00 1.9406194727028712e+01 0.0000000000000000e+00 0 0 0 +410 3 1.0202094493927524e+01 1.9410357283161499e+01 0.0000000000000000e+00 0 0 0 +411 3 1.1344493619984819e+01 1.9424786980556338e+01 0.0000000000000000e+00 0 0 0 +412 3 1.2637742177785299e+01 1.9930889974773997e+01 0.0000000000000000e+00 0 0 0 +413 3 1.3631034994726834e+01 1.9413135103231994e+01 0.0000000000000000e+00 0 0 0 +414 3 1.4752642655140622e+01 1.9401337789912994e+01 0.0000000000000000e+00 0 0 0 +415 3 1.5877850085888255e+01 1.9396855373999653e+01 0.0000000000000000e+00 0 0 0 +416 3 1.7005401679016000e+01 1.9395287677046394e+01 0.0000000000000000e+00 0 0 0 +417 1 1.8134615217150579e+01 1.9394983390043944e+01 0.0000000000000000e+00 0 0 0 +418 1 1.9264825666738730e+01 1.9395059679793604e+01 0.0000000000000000e+00 0 0 0 +419 1 2.0395687437852093e+01 1.9395286212420899e+01 0.0000000000000000e+00 0 0 0 +420 1 2.1527023164623820e+01 1.9395624428209523e+01 0.0000000000000000e+00 0 0 0 + +Velocities + +1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +11 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +12 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +30 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +31 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +32 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +33 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +34 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +35 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +36 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +37 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +38 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +39 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +40 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +41 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +42 1.4582830912290846e-05 8.8846352819582645e-06 0.0000000000000000e+00 +43 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +44 1.7312859198533731e-05 8.9315474912415886e-06 0.0000000000000000e+00 +45 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +46 1.9675072488620216e-05 9.1850485538530490e-06 0.0000000000000000e+00 +47 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +48 2.1433074233334628e-05 9.6286706528260491e-06 0.0000000000000000e+00 +49 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +50 2.2405931370299284e-05 1.0203419110643709e-05 0.0000000000000000e+00 +51 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +52 2.2491059900157929e-05 1.0820884817441812e-05 0.0000000000000000e+00 +53 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +54 2.1681031416712071e-05 1.1392109985469690e-05 0.0000000000000000e+00 +55 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +56 2.0060641760669130e-05 1.1861625865799975e-05 0.0000000000000000e+00 +57 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +58 1.7796484401635950e-05 1.2218953480137158e-05 0.0000000000000000e+00 +59 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +60 1.5112094617093786e-05 1.2477438154365889e-05 0.0000000000000000e+00 +61 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +62 1.2267198630926124e-05 1.2639445999779870e-05 0.0000000000000000e+00 +63 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +64 9.5350822789358476e-06 1.2675742401072055e-05 0.0000000000000000e+00 +65 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +66 7.1824119616190063e-06 1.2539493069881058e-05 0.0000000000000000e+00 +67 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +68 5.4428729520064613e-06 1.2202930059423273e-05 0.0000000000000000e+00 +69 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +70 4.4916342132800272e-06 1.1686680302800539e-05 0.0000000000000000e+00 +71 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +72 4.4211188598874964e-06 1.1059784694772424e-05 0.0000000000000000e+00 +73 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +74 5.2335171901646275e-06 1.0413115381938121e-05 0.0000000000000000e+00 +75 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +76 6.8418345483267640e-06 9.8270961040952689e-06 0.0000000000000000e+00 +77 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +78 9.0862581078626780e-06 9.3562441357185347e-06 0.0000000000000000e+00 +79 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 +80 1.1750898218046424e-05 9.0341536486321376e-06 0.0000000000000000e+00 +81 2.6507896911975861e-05 1.6371545140358991e-05 0.0000000000000000e+00 +82 4.4071318998573086e-05 2.1693750644120881e-05 0.0000000000000000e+00 +83 3.1968282279407066e-05 1.6236651087870452e-05 0.0000000000000000e+00 +84 5.1796828963389391e-05 2.2204255617741203e-05 0.0000000000000000e+00 +85 3.6954144790935994e-05 1.6513220966131449e-05 0.0000000000000000e+00 +86 5.8466837911599906e-05 2.3765145047455889e-05 0.0000000000000000e+00 +87 4.0988263449448856e-05 1.7195597287912591e-05 0.0000000000000000e+00 +88 6.3421700984930829e-05 2.6202560196158383e-05 0.0000000000000000e+00 +89 4.3676514981254302e-05 1.8262201659237366e-05 0.0000000000000000e+00 +90 6.6189231995872341e-05 2.9208208903089284e-05 0.0000000000000000e+00 +91 4.4757820844412710e-05 1.9680681285465242e-05 0.0000000000000000e+00 +92 6.6503049702947023e-05 3.2363912757959332e-05 0.0000000000000000e+00 +93 4.4112984955796063e-05 2.1371026788621213e-05 0.0000000000000000e+00 +94 6.4345177020566643e-05 3.5287785267920645e-05 0.0000000000000000e+00 +95 4.1787976800244936e-05 2.3162941413835267e-05 0.0000000000000000e+00 +96 5.9928744845970383e-05 3.7723007380205399e-05 0.0000000000000000e+00 +97 3.7995079634412303e-05 2.4792875906820557e-05 0.0000000000000000e+00 +98 5.3679482622744082e-05 3.9576112916242527e-05 0.0000000000000000e+00 +99 3.3102802347870566e-05 2.5962643816709875e-05 0.0000000000000000e+00 +100 4.6196928202386916e-05 4.0841431876258162e-05 0.0000000000000000e+00 +101 2.7604731797030090e-05 2.6452929465938663e-05 0.0000000000000000e+00 +102 3.8198605610544056e-05 4.1488731650403403e-05 0.0000000000000000e+00 +103 2.2062125644364829e-05 2.6211631192676194e-05 0.0000000000000000e+00 +104 3.0461532031511585e-05 4.1405446569862951e-05 0.0000000000000000e+00 +105 1.7036890410487144e-05 2.5367151114186193e-05 0.0000000000000000e+00 +106 2.3743747198513856e-05 4.0442707371844690e-05 0.0000000000000000e+00 +107 1.3025410253023063e-05 2.4148417954514280e-05 0.0000000000000000e+00 +108 1.8714618137705143e-05 3.8535713097978544e-05 0.0000000000000000e+00 +109 1.0408634299164180e-05 2.2777378345262281e-05 0.0000000000000000e+00 +110 1.5879886471080462e-05 3.5803991688099920e-05 0.0000000000000000e+00 +111 9.4236282619897256e-06 2.1395799353203516e-05 0.0000000000000000e+00 +112 1.5528508660313246e-05 3.2559849894401442e-05 0.0000000000000000e+00 +113 1.0151636970505765e-05 2.0069631778223131e-05 0.0000000000000000e+00 +114 1.7697076907652322e-05 2.9219489273322326e-05 0.0000000000000000e+00 +115 1.2515393741428794e-05 1.8835255026813244e-05 0.0000000000000000e+00 +116 2.2166639673992795e-05 2.6190973817683679e-05 0.0000000000000000e+00 +117 1.6287686218571795e-05 1.7749195377506077e-05 0.0000000000000000e+00 +118 2.8490182736201632e-05 2.3793837746782787e-05 0.0000000000000000e+00 +119 2.1107512829311378e-05 1.6897890319175769e-05 0.0000000000000000e+00 +120 3.6037837106045466e-05 2.2245603004594772e-05 0.0000000000000000e+00 +121 5.3754680927831089e-05 2.5818243561840512e-05 0.0000000000000000e+00 +122 7.3852984703953750e-05 2.7472934391924166e-05 0.0000000000000000e+00 +123 6.4028996009385391e-05 2.5673459200729887e-05 0.0000000000000000e+00 +124 8.5908169593167111e-05 2.8779655657983422e-05 0.0000000000000000e+00 +125 7.3452059594226324e-05 2.6972883064637495e-05 0.0000000000000000e+00 +126 9.6325597970378315e-05 3.2438649799599460e-05 0.0000000000000000e+00 +127 8.1100712408732978e-05 2.9661132010523619e-05 0.0000000000000000e+00 +128 1.0416393664254911e-04 3.7996059391476322e-05 0.0000000000000000e+00 +129 8.6279187735689372e-05 3.3655093070911189e-05 0.0000000000000000e+00 +130 1.0867812733907078e-04 4.4662561906767820e-05 0.0000000000000000e+00 +131 8.8474833676771911e-05 3.8743354165649700e-05 0.0000000000000000e+00 +132 1.0953332540892396e-04 5.1642519960343192e-05 0.0000000000000000e+00 +133 8.7489301482666931e-05 4.4565301821478113e-05 0.0000000000000000e+00 +134 1.0664519563149365e-04 5.8191926603172465e-05 0.0000000000000000e+00 +135 8.3386685919597045e-05 5.0496180582897816e-05 0.0000000000000000e+00 +136 1.0029664202627839e-04 6.3854180722835299e-05 0.0000000000000000e+00 +137 7.6554421387387345e-05 5.5724916856637890e-05 0.0000000000000000e+00 +138 9.1026233328924885e-05 6.8337584627018583e-05 0.0000000000000000e+00 +139 6.7636782318513533e-05 5.9415263115523977e-05 0.0000000000000000e+00 +140 7.9687801662836277e-05 7.1465060890868459e-05 0.0000000000000000e+00 +141 5.7497112140338785e-05 6.0967609586633220e-05 0.0000000000000000e+00 +142 6.7357844628560681e-05 7.3031410372387953e-05 0.0000000000000000e+00 +143 4.7129030804640708e-05 6.0212629249287861e-05 0.0000000000000000e+00 +144 5.5250218292995579e-05 7.2797466423152327e-05 0.0000000000000000e+00 +145 3.7563119690651669e-05 5.7436898093437411e-05 0.0000000000000000e+00 +146 4.4567659752612482e-05 7.0555413628180241e-05 0.0000000000000000e+00 +147 2.9761856381290211e-05 5.3240161725660877e-05 0.0000000000000000e+00 +148 3.6379968408071024e-05 6.6254587745984145e-05 0.0000000000000000e+00 +149 2.4507249328654254e-05 4.8297754292616488e-05 0.0000000000000000e+00 +150 3.1513287993668526e-05 6.0149614548130757e-05 0.0000000000000000e+00 +151 2.2321658045569226e-05 4.3171432510556026e-05 0.0000000000000000e+00 +152 3.0489149535510922e-05 5.2826430989590112e-05 0.0000000000000000e+00 +153 2.3406691442805219e-05 3.8251620084219645e-05 0.0000000000000000e+00 +154 3.3434005351227340e-05 4.5149642749209296e-05 0.0000000000000000e+00 +155 2.7646263937105427e-05 3.3806816567877322e-05 0.0000000000000000e+00 +156 4.0072022728543377e-05 3.8051851023877120e-05 0.0000000000000000e+00 +157 3.4612453095456479e-05 3.0084179504664271e-05 0.0000000000000000e+00 +158 4.9715995320497382e-05 3.2366348850974144e-05 0.0000000000000000e+00 +159 4.3610755492928814e-05 2.7338724794948033e-05 0.0000000000000000e+00 +160 6.1380291935873857e-05 2.8717537667974358e-05 0.0000000000000000e+00 +161 8.1494982491825407e-05 2.8148953219575557e-05 0.0000000000000000e+00 +162 1.0332280599497646e-04 2.6201503829225565e-05 0.0000000000000000e+00 +163 9.5761847160871774e-05 2.7946357434336516e-05 0.0000000000000000e+00 +164 1.1864695815872809e-04 2.8356135457834722e-05 0.0000000000000000e+00 +165 1.0885399470738391e-04 3.0748146450938071e-05 0.0000000000000000e+00 +166 1.3196232705661449e-04 3.4623883717438410e-05 0.0000000000000000e+00 +167 1.1960145227112585e-04 3.6566084099414615e-05 0.0000000000000000e+00 +168 1.4195026861275745e-04 4.3922046815628174e-05 0.0000000000000000e+00 +169 1.2694731664409029e-04 4.5104351462522260e-05 0.0000000000000000e+00 +170 1.4795217588335909e-04 5.5130340832613979e-05 0.0000000000000000e+00 +171 1.3031757484079781e-04 5.5906601170376687e-05 0.0000000000000000e+00 +172 1.4942385512754647e-04 6.7008660580344934e-05 0.0000000000000000e+00 +173 1.2933895493945637e-04 6.7959205844179285e-05 0.0000000000000000e+00 +174 1.4640862693120205e-04 7.8808347871067721e-05 0.0000000000000000e+00 +175 1.2418770632620368e-04 7.9948632905278019e-05 0.0000000000000000e+00 +176 1.3899686227644703e-04 8.9704374162949410e-05 0.0000000000000000e+00 +177 1.1530175670323509e-04 9.0294899930563893e-05 0.0000000000000000e+00 +178 1.2781482749801704e-04 9.8919094420893419e-05 0.0000000000000000e+00 +179 1.0353662297703481e-04 9.7655023731693666e-05 0.0000000000000000e+00 +180 1.1373191796318627e-04 1.0541969215504335e-04 0.0000000000000000e+00 +181 8.9924310567240939e-05 1.0107679026970171e-04 0.0000000000000000e+00 +182 9.8144155759054283e-05 1.0842945197242355e-04 0.0000000000000000e+00 +183 7.5715426026251240e-05 1.0020899633485792e-04 0.0000000000000000e+00 +184 8.2585472575644195e-05 1.0759497773947524e-04 0.0000000000000000e+00 +185 6.2273313315312773e-05 9.5280171371694988e-05 0.0000000000000000e+00 +186 6.8676150395344254e-05 1.0303784847881596e-04 0.0000000000000000e+00 +187 5.0970916505555661e-05 8.7088088473526171e-05 0.0000000000000000e+00 +188 5.7773431465618642e-05 9.5162943772818259e-05 0.0000000000000000e+00 +189 4.3046171895633206e-05 7.6794608467548045e-05 0.0000000000000000e+00 +190 5.1018825812688962e-05 8.4471826448580955e-05 0.0000000000000000e+00 +191 3.9350805046373119e-05 6.5685772521718177e-05 0.0000000000000000e+00 +192 4.9118794840322114e-05 7.1798478806600616e-05 0.0000000000000000e+00 +193 4.0296544731752153e-05 5.4848218154306130e-05 0.0000000000000000e+00 +194 5.2373823993379091e-05 5.8337807582076184e-05 0.0000000000000000e+00 +195 4.5754994278033105e-05 4.5116316024063770e-05 0.0000000000000000e+00 +196 6.0480102795976155e-05 4.5653533328626764e-05 0.0000000000000000e+00 +197 5.5147574075957093e-05 3.7092945765346277e-05 0.0000000000000000e+00 +198 7.2607642712602124e-05 3.5308098903458904e-05 0.0000000000000000e+00 +199 6.7490113431981086e-05 3.1302088338741595e-05 0.0000000000000000e+00 +200 8.7399794370653100e-05 2.8568976378293145e-05 0.0000000000000000e+00 +201 1.0893722215793989e-04 2.3548242675947018e-05 0.0000000000000000e+00 +202 1.3096734611020011e-04 1.8222479559561189e-05 0.0000000000000000e+00 +203 1.2587405764826476e-04 2.3075086083653581e-05 0.0000000000000000e+00 +204 1.4794813469964918e-04 2.1310741972757686e-05 0.0000000000000000e+00 +205 1.4140062010542452e-04 2.7813335553710658e-05 0.0000000000000000e+00 +206 1.6232505969042551e-04 3.0392445992766945e-05 0.0000000000000000e+00 +207 1.5397074924697088e-04 3.7554013992723016e-05 0.0000000000000000e+00 +208 1.7307084911688810e-04 4.3967269632201604e-05 0.0000000000000000e+00 +209 1.6265299884253917e-04 5.1975640619979227e-05 0.0000000000000000e+00 +210 1.7920198664424654e-04 6.0190020213309052e-05 0.0000000000000000e+00 +211 1.6652150024137926e-04 6.9718144800536699e-05 0.0000000000000000e+00 +212 1.8081789854731273e-04 7.8360099041392683e-05 0.0000000000000000e+00 +213 1.6556181720350750e-04 8.9018344965798718e-05 0.0000000000000000e+00 +214 1.7752817256212197e-04 9.7575999242155437e-05 0.0000000000000000e+00 +215 1.5977208852566740e-04 1.0737397867986304e-04 0.0000000000000000e+00 +216 1.6979140654165145e-04 1.1697357648400367e-04 0.0000000000000000e+00 +217 1.4991716557360079e-04 1.2288188040735228e-04 0.0000000000000000e+00 +218 1.5773246565703171e-04 1.3407831270088809e-04 0.0000000000000000e+00 +219 1.3669127604115129e-04 1.3409320273816903e-04 0.0000000000000000e+00 +220 1.4251229678068216e-04 1.4611735938454550e-04 0.0000000000000000e+00 +221 1.2122307334826826e-04 1.4035076438954076e-04 0.0000000000000000e+00 +222 1.2529950168048559e-04 1.5081786662543979e-04 0.0000000000000000e+00 +223 1.0469592755885001e-04 1.4095013503932427e-04 0.0000000000000000e+00 +224 1.0808583047877408e-04 1.4794462550140816e-04 0.0000000000000000e+00 +225 8.8627241500263001e-05 1.3548056521811501e-04 0.0000000000000000e+00 +226 9.2458332531887566e-05 1.3885968265358318e-04 0.0000000000000000e+00 +227 7.4741877098264211e-05 1.2401626995789950e-04 0.0000000000000000e+00 +228 8.0205883060811833e-05 1.2520982577425913e-04 0.0000000000000000e+00 +229 6.4625586397733592e-05 1.0787656142000266e-04 0.0000000000000000e+00 +230 7.2455365263725255e-05 1.0828209149006050e-04 0.0000000000000000e+00 +231 5.9588283630412220e-05 8.9170324651604443e-05 0.0000000000000000e+00 +232 7.0192628935414336e-05 8.8909581629389317e-05 0.0000000000000000e+00 +233 6.0204784194593798e-05 7.0300012127155753e-05 0.0000000000000000e+00 +234 7.3736654845263383e-05 6.8446290010432812e-05 0.0000000000000000e+00 +235 6.6430610182749772e-05 5.3155748135392397e-05 0.0000000000000000e+00 +236 8.2877314926921305e-05 4.8905829630990080e-05 0.0000000000000000e+00 +237 7.7513488386102286e-05 3.9091588598701160e-05 0.0000000000000000e+00 +238 9.6520171103331445e-05 3.2781173935459572e-05 0.0000000000000000e+00 +239 9.2232416915701703e-05 2.8983592081175541e-05 0.0000000000000000e+00 +240 1.1323768745207866e-04 2.2099480605147771e-05 0.0000000000000000e+00 +241 1.3459865563319741e-04 1.2703227752398906e-05 0.0000000000000000e+00 +242 1.5474472990034682e-04 4.9028952578147883e-06 0.0000000000000000e+00 +243 1.5215092550023015e-04 1.2097129719583488e-05 0.0000000000000000e+00 +244 1.7049283829256650e-04 9.1706083284784945e-06 0.0000000000000000e+00 +245 1.6776008627299133e-04 1.9099097427610624e-05 0.0000000000000000e+00 +246 1.8333367243371162e-04 2.1604042292807520e-05 0.0000000000000000e+00 +247 1.8008195062651708e-04 3.3615378061081087e-05 0.0000000000000000e+00 +248 1.9189650531499411e-04 3.9685789135164573e-05 0.0000000000000000e+00 +249 1.8783573502138761e-04 5.4407048131235179e-05 0.0000000000000000e+00 +250 1.9614236422538478e-04 6.1776442206818580e-05 0.0000000000000000e+00 +251 1.9089458289321614e-04 7.9516349638183995e-05 0.0000000000000000e+00 +252 1.9558796322159504e-04 8.6892146170328155e-05 0.0000000000000000e+00 +253 1.8874887732394583e-04 1.0500710476222600e-04 0.0000000000000000e+00 +254 1.9152092472987035e-04 1.1597519058197392e-04 0.0000000000000000e+00 +255 1.8256170726520249e-04 1.2813543516461412e-04 0.0000000000000000e+00 +256 1.8317412822647789e-04 1.4660018932962857e-04 0.0000000000000000e+00 +257 1.7255263462072589e-04 1.4676532692364173e-04 0.0000000000000000e+00 +258 1.7180488878385214e-04 1.7463142934331309e-04 0.0000000000000000e+00 +259 1.5968255923162790e-04 1.6112886872639011e-04 0.0000000000000000e+00 +260 1.5736418072846288e-04 1.9373036611985696e-04 0.0000000000000000e+00 +261 1.4438478717201006e-04 1.7104686393958903e-04 0.0000000000000000e+00 +262 1.4172732865070544e-04 1.9993397606046031e-04 0.0000000000000000e+00 +263 1.2780764883044244e-04 1.7581062997197168e-04 0.0000000000000000e+00 +264 1.2551429537422006e-04 1.9326020335803872e-04 0.0000000000000000e+00 +265 1.1143818436667695e-04 1.7279653463973672e-04 0.0000000000000000e+00 +266 1.1129339275113247e-04 1.7696772682632424e-04 0.0000000000000000e+00 +267 9.7026315827934351e-05 1.6059526683700199e-04 0.0000000000000000e+00 +268 1.0011077369688101e-04 1.5518094661065159e-04 0.0000000000000000e+00 +269 8.6522173578507114e-05 1.3949676203897713e-04 0.0000000000000000e+00 +270 9.3664351269599769e-05 1.3012327167812935e-04 0.0000000000000000e+00 +271 8.1291625785936272e-05 1.1272276579557331e-04 0.0000000000000000e+00 +272 9.2500602391673231e-05 1.0286100151558550e-04 0.0000000000000000e+00 +273 8.2174156364853157e-05 8.4321872533994066e-05 0.0000000000000000e+00 +274 9.7130125497925732e-05 7.4496044974704109e-05 0.0000000000000000e+00 +275 8.9034151680274584e-05 5.8030339746749375e-05 0.0000000000000000e+00 +276 1.0678389790186836e-04 4.7638087115883802e-05 0.0000000000000000e+00 +277 1.0119535639935181e-04 3.6303198355693708e-05 0.0000000000000000e+00 +278 1.2094537118731950e-04 2.5060744325496045e-05 0.0000000000000000e+00 +279 1.1693017924424426e-04 2.0976803055913763e-05 0.0000000000000000e+00 +280 1.3749539877318216e-04 1.0260590705305633e-05 0.0000000000000000e+00 +281 1.5664700963922696e-04 -2.3143582219387062e-06 0.0000000000000000e+00 +282 1.7213521099798126e-04 -1.0893217653461935e-05 0.0000000000000000e+00 +283 1.7163725578470403e-04 -2.7032144307298500e-06 0.0000000000000000e+00 +284 1.8302101438290170e-04 -4.7015876043674941e-06 0.0000000000000000e+00 +285 1.8408986700031230e-04 7.3762135624856621e-06 0.0000000000000000e+00 +286 1.9034465344562048e-04 1.1634737927368466e-05 0.0000000000000000e+00 +287 1.9261711656854316e-04 2.7002357232706195e-05 0.0000000000000000e+00 +288 1.9361558083412500e-04 3.5128829947321527e-05 0.0000000000000000e+00 +289 1.9675951681764233e-04 5.4504542291358708e-05 0.0000000000000000e+00 +290 1.9229119872704733e-04 6.3160704993402156e-05 0.0000000000000000e+00 +291 1.9589287639141945e-04 8.5624062570252574e-05 0.0000000000000000e+00 +292 1.8763172746254775e-04 9.5926064062507038e-05 0.0000000000000000e+00 +293 1.9144012028784324e-04 1.1574950605207010e-04 0.0000000000000000e+00 +294 1.7925388721026504e-04 1.3336630569376064e-04 0.0000000000000000e+00 +295 1.8325338492376081e-04 1.3941523897602475e-04 0.0000000000000000e+00 +296 1.7111384196688562e-04 1.7606151476373124e-04 0.0000000000000000e+00 +297 1.7441139798144990e-04 1.5854503282415543e-04 0.0000000000000000e+00 +298 1.6032326888461984e-04 2.1446583056531008e-04 0.0000000000000000e+00 +299 1.6387438500404321e-04 1.7460023770546828e-04 0.0000000000000000e+00 +300 1.5074052192984994e-04 2.4020838682409438e-04 0.0000000000000000e+00 +301 1.5148519378157811e-04 1.8980377776341359e-04 0.0000000000000000e+00 +302 1.3966552521236660e-04 2.4774151750977874e-04 0.0000000000000000e+00 +303 1.3793365457372421e-04 2.0064007271268226e-04 0.0000000000000000e+00 +304 1.2999175496236775e-04 2.3635880684653166e-04 0.0000000000000000e+00 +305 1.2445809284142433e-04 2.0319207617434042e-04 0.0000000000000000e+00 +306 1.2058479570330461e-04 2.1223162942725083e-04 0.0000000000000000e+00 +307 1.1335261202324864e-04 1.9202689248144918e-04 0.0000000000000000e+00 +308 1.1489966476687653e-04 1.8116775432026056e-04 0.0000000000000000e+00 +309 1.0607147008139182e-04 1.6763704556867280e-04 0.0000000000000000e+00 +310 1.1336585045824133e-04 1.4707143380834897e-04 0.0000000000000000e+00 +311 1.0344655043634243e-04 1.3351953844142965e-04 0.0000000000000000e+00 +312 1.1682664536528110e-04 1.1117223278055075e-04 0.0000000000000000e+00 +313 1.0584011765559926e-04 9.5633260364903774e-05 0.0000000000000000e+00 +314 1.2284574193182076e-04 7.5987122603626839e-05 0.0000000000000000e+00 +315 1.1395109003961985e-04 5.9134539871496392e-05 0.0000000000000000e+00 +316 1.3307867183720012e-04 4.1776312299002252e-05 0.0000000000000000e+00 +317 1.2588269209880162e-04 2.9470198058979825e-05 0.0000000000000000e+00 +318 1.4544292811228404e-04 1.3605886591663147e-05 0.0000000000000000e+00 +319 1.4084591944156649e-04 8.4739193403876596e-06 0.0000000000000000e+00 +320 1.5916834934516234e-04 -4.8366910892216897e-06 0.0000000000000000e+00 +321 1.7356992275325834e-04 -1.8042780077684453e-05 0.0000000000000000e+00 +322 1.8147503918697350e-04 -2.4540925551368165e-05 0.0000000000000000e+00 +323 1.8163463266840930e-04 -1.7130129888846129e-05 0.0000000000000000e+00 +324 1.8253898079463108e-04 -1.5598547575766951e-05 0.0000000000000000e+00 +325 1.8657806229653399e-04 -3.4133038182733444e-06 0.0000000000000000e+00 +326 1.8037702210949025e-04 5.3434050311440554e-06 0.0000000000000000e+00 +327 1.8771879764928681e-04 2.1715518823040016e-05 0.0000000000000000e+00 +328 1.7471074558026713e-04 3.4539745108762290e-05 0.0000000000000000e+00 +329 1.8460873128030577e-04 5.4984524671188725e-05 0.0000000000000000e+00 +330 1.6600292929781353e-04 6.9008728811924969e-05 0.0000000000000000e+00 +331 1.7796512641162431e-04 9.1385034689547154e-05 0.0000000000000000e+00 +332 1.5410918238014520e-04 1.0736588228516046e-04 0.0000000000000000e+00 +333 1.6795483683047752e-04 1.2318778434343078e-04 0.0000000000000000e+00 +334 1.4157775831030313e-04 1.5014040855437159e-04 0.0000000000000000e+00 +335 1.5785083269945281e-04 1.4650075439474400e-04 0.0000000000000000e+00 +336 1.2665633684184470e-04 1.9516493438247949e-04 0.0000000000000000e+00 +337 1.4652933492756114e-04 1.5825492457397368e-04 0.0000000000000000e+00 +338 1.2214547186144981e-04 2.4167794353872549e-04 0.0000000000000000e+00 +339 1.4180074826065196e-04 1.7604592446691659e-04 0.0000000000000000e+00 +340 1.1255092108513610e-04 2.7260123374319155e-04 0.0000000000000000e+00 +341 1.3761399094902802e-04 1.9706727165017120e-04 0.0000000000000000e+00 +342 1.1522842530577449e-04 2.7911375117613133e-04 0.0000000000000000e+00 +343 1.2918411829601699e-04 2.1803343236056900e-04 0.0000000000000000e+00 +344 1.1734351128622201e-04 2.6571681944023359e-04 0.0000000000000000e+00 +345 1.2254276275087787e-04 2.2432204295978476e-04 0.0000000000000000e+00 +346 1.1974443872507096e-04 2.3680090433102696e-04 0.0000000000000000e+00 +347 1.2071540692597284e-04 2.1269652219509870e-04 0.0000000000000000e+00 +348 1.2270101616137648e-04 1.9743779542515523e-04 0.0000000000000000e+00 +349 1.2264642729289398e-04 1.8508739809863369e-04 0.0000000000000000e+00 +350 1.3383017178927455e-04 1.5156768459346495e-04 0.0000000000000000e+00 +351 1.2577540617175406e-04 1.4740321111045147e-04 0.0000000000000000e+00 +352 1.4340773421068627e-04 1.1293118958385223e-04 0.0000000000000000e+00 +353 1.3341749813102418e-04 1.0072504549123244e-04 0.0000000000000000e+00 +354 1.5386164243670204e-04 7.2227623415051933e-05 0.0000000000000000e+00 +355 1.4204167491417043e-04 5.6498808092830119e-05 0.0000000000000000e+00 +356 1.6279824866213127e-04 3.3429940217178515e-05 0.0000000000000000e+00 +357 1.5267578941533281e-04 1.9709016910970241e-05 0.0000000000000000e+00 +358 1.7110318234027366e-04 1.1467794122178855e-06 0.0000000000000000e+00 +359 1.6340539277356244e-04 -5.5189289771696855e-06 0.0000000000000000e+00 +360 1.7740714701955653e-04 -1.8977834327990608e-05 0.0000000000000000e+00 +361 1.8452802730369313e-04 -2.8832061973687458e-05 0.0000000000000000e+00 +362 1.8186625712888205e-04 -2.9558136605260799e-05 0.0000000000000000e+00 +363 1.8024969476821697e-04 -2.5772260635909678e-05 0.0000000000000000e+00 +364 1.6761747789816595e-04 -1.7724478134172529e-05 0.0000000000000000e+00 +365 1.7295655435232461e-04 -8.2023972909357763e-06 0.0000000000000000e+00 +366 1.5167563906890865e-04 7.1136721706312189e-06 0.0000000000000000e+00 +367 1.6300729193455892e-04 2.1421261796460145e-05 0.0000000000000000e+00 +368 1.3513262146713916e-04 4.1132163185070293e-05 0.0000000000000000e+00 +369 1.5092813590493438e-04 5.9201618093873702e-05 0.0000000000000000e+00 +370 1.1841008358865480e-04 8.0419467894957835e-05 0.0000000000000000e+00 +371 1.3728404354778657e-04 9.8997396367778159e-05 0.0000000000000000e+00 +372 1.0207956567589023e-04 1.2179874114308306e-04 0.0000000000000000e+00 +373 1.2285830843381253e-04 1.3408468250307866e-04 0.0000000000000000e+00 +374 8.5374614775879387e-05 1.6300686953091837e-04 0.0000000000000000e+00 +375 1.0823800239604665e-04 1.5689628308888026e-04 0.0000000000000000e+00 +376 7.0794245443767117e-05 2.0076846781781206e-04 0.0000000000000000e+00 +377 9.4916240542002045e-05 1.6757505383186989e-04 0.0000000000000000e+00 +378 5.3305776376960658e-05 2.3409206018084466e-04 0.0000000000000000e+00 +379 9.7006448817647123e-05 1.6923635825827552e-04 0.0000000000000000e+00 +380 7.3465624650027872e-05 2.8247344576399282e-04 0.0000000000000000e+00 +381 1.0549927592481066e-04 2.0928780749128950e-04 0.0000000000000000e+00 +382 8.2435739649036618e-05 2.8810010190574649e-04 0.0000000000000000e+00 +383 1.0402453921892486e-04 2.4985200604758925e-04 0.0000000000000000e+00 +384 1.0698549586748876e-04 2.9499323828433428e-04 0.0000000000000000e+00 +385 1.0406074064549273e-04 2.4902814952768373e-04 0.0000000000000000e+00 +386 1.2548092418124016e-04 2.5651719838597350e-04 0.0000000000000000e+00 +387 1.2351646868146525e-04 2.1977784401163054e-04 0.0000000000000000e+00 +388 1.5004190410192569e-04 2.0137571208681400e-04 0.0000000000000000e+00 +389 1.3393580166351108e-04 1.9255393507981809e-04 0.0000000000000000e+00 +390 1.6295883087441844e-04 1.5199213302359484e-04 0.0000000000000000e+00 +391 1.5063751113547077e-04 1.4694859067203417e-04 0.0000000000000000e+00 +392 1.8000897081745795e-04 1.0619343548646192e-04 0.0000000000000000e+00 +393 1.6561309378602883e-04 9.7037670677296156e-05 0.0000000000000000e+00 +394 1.9382745100814531e-04 6.5648980655922534e-05 0.0000000000000000e+00 +395 1.7695741527044807e-04 4.9716511735790200e-05 0.0000000000000000e+00 +396 2.0065188597728214e-04 2.5746708320466968e-05 0.0000000000000000e+00 +397 1.8297867804266331e-04 1.0615003992406336e-05 0.0000000000000000e+00 +398 1.9982611540716595e-04 -6.5682876870518107e-06 0.0000000000000000e+00 +399 1.8556554896400902e-04 -1.6509803537161082e-05 0.0000000000000000e+00 +400 1.9312536581833663e-04 -2.5912353602364910e-05 0.0000000000000000e+00 +401 1.9023801410824606e-04 -2.7464462701394523e-05 0.0000000000000000e+00 +402 1.6698242474704196e-04 -2.2099770957974276e-05 0.0000000000000000e+00 +403 1.4243470745724796e-04 -2.3480099448894497e-06 0.0000000000000000e+00 +404 1.1876992118927330e-04 2.8893674554990122e-05 0.0000000000000000e+00 +405 9.7519654816455303e-05 6.7473438674861880e-05 0.0000000000000000e+00 +406 7.9406575250692843e-05 1.0857040090545463e-04 0.0000000000000000e+00 +407 6.4539515581651943e-05 1.4692649268945184e-04 0.0000000000000000e+00 +408 5.1587395282262643e-05 1.7813848972085620e-04 0.0000000000000000e+00 +409 3.9112490387149122e-05 1.9658956051422694e-04 0.0000000000000000e+00 +410 5.0646024795521216e-05 2.1400372314275622e-04 0.0000000000000000e+00 +411 5.0846759072495365e-05 2.4269431977691632e-04 0.0000000000000000e+00 +412 -1.1482904584733112e-04 -1.4373303263865990e-04 0.0000000000000000e+00 +413 1.5511043799162975e-04 2.5875085720661746e-04 0.0000000000000000e+00 +414 1.6626514293530906e-04 2.1735425519646309e-04 0.0000000000000000e+00 +415 1.8641734272053080e-04 1.7687638118890660e-04 0.0000000000000000e+00 +416 2.0380463041431767e-04 1.3085055646827544e-04 0.0000000000000000e+00 +417 2.1756044245783355e-04 8.4550353671555104e-05 0.0000000000000000e+00 +418 2.2486305540664193e-04 4.3653832482456800e-05 0.0000000000000000e+00 +419 2.2188236489361566e-04 7.6524745655054321e-06 0.0000000000000000e+00 +420 2.0949238720629205e-04 -1.7218568434280989e-05 0.0000000000000000e+00 diff --git a/examples/neb/log.19Jun17.neb.hop1.end.g++.4 b/examples/neb/log.19Jun17.neb.hop1.end.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..4878b865663bc82e04a7bbc60bc539efe7e13dcd --- /dev/null +++ b/examples/neb/log.19Jun17.neb.hop1.end.g++.4 @@ -0,0 +1,11 @@ +LAMMPS (19 May 2017) +Running on 4 partitions of processors +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +0 229.26196 146.68251 2.9774577 4.4127369 233.11559 0.023301843 0.0224626 1.4763579 0 -3.048332 0.33333333 -3.0250302 0.66666667 -3.0291888 1 -3.0474928 +100 0.11027532 0.085410308 3.0967938 0.024201563 0.38551033 0.0017583261 0.0021866943 1.7710358 0 -3.0483469 0.31192818 -3.0465886 0.61093022 -3.0466143 1 -3.0487752 +130 0.09954083 0.075481108 3.0927626 0.015664388 0.37491833 0.0017573704 0.0021913201 1.7713726 0 -3.048342 0.31428487 -3.0465846 0.61762817 -3.0466296 1 -3.048776 +Climbing replica = 2 +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +130 0.37838747 0.3502435 3.0927626 0.015664388 0.37491833 0.0017573704 0.0021913201 1.7713726 0 -3.048342 0.31428487 -3.0465846 0.61762817 -3.0466296 1 -3.048776 +230 0.22757286 0.12027481 3.1250243 0.0081260569 0.14019507 0.0018364585 0.002278918 1.76926 0 -3.0483347 0.39730698 -3.0464983 0.64450769 -3.0466973 1 -3.0487772 +278 0.096184498 0.085088496 3.1405655 0.0068164307 0.093861113 0.0018426056 0.002286256 1.7684765 0 -3.0483338 0.41277997 -3.0464912 0.65562984 -3.0467294 1 -3.0487775 diff --git a/examples/neb/log.19Jun17.neb.hop1.end.g++.8 b/examples/neb/log.19Jun17.neb.hop1.end.g++.8 new file mode 100644 index 0000000000000000000000000000000000000000..62344b3da584e3ef2bb96f57a1535323ef11a418 --- /dev/null +++ b/examples/neb/log.19Jun17.neb.hop1.end.g++.8 @@ -0,0 +1,11 @@ +LAMMPS (19 May 2017) +Running on 4 partitions of processors +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +0 229.26196 146.68251 2.9774577 4.4127369 233.11559 0.023301843 0.0224626 1.4763579 0 -3.048332 0.33333333 -3.0250302 0.66666667 -3.0291888 1 -3.0474928 +100 0.11375359 0.085350745 3.0966418 0.0236765 0.38531777 0.0017582606 0.0021868783 1.7710738 0 -3.0483467 0.31201141 -3.0465884 0.61117406 -3.0466149 1 -3.0487753 +119 0.09996986 0.078639268 3.0937691 0.017444108 0.3780308 0.0017574935 0.0021899317 1.7713574 0 -3.0483433 0.31354192 -3.0465858 0.61555533 -3.0466249 1 -3.0487758 +Climbing replica = 2 +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +119 0.3793192 0.35281863 3.0937691 0.017444108 0.3780308 0.0017574935 0.0021899317 1.7713574 0 -3.0483433 0.31354192 -3.0465858 0.61555533 -3.0466249 1 -3.0487758 +219 0.20159133 0.12247026 3.1244061 0.0085896057 0.13938632 0.0018362816 0.0022783681 1.7693295 0 -3.048335 0.39646633 -3.0464988 0.64277703 -3.0466925 1 -3.0487771 +266 0.099868725 0.086180598 3.1401661 0.0070922949 0.095128081 0.001842608 0.002286044 1.7685191 0 -3.048334 0.41231024 -3.0464914 0.65425179 -3.0467252 1 -3.0487774 diff --git a/examples/neb/log.19Jun17.neb.hop1.g++.4 b/examples/neb/log.19Jun17.neb.hop1.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..e2984c031c4a06d8320b99014db19a0b3b1b74fb --- /dev/null +++ b/examples/neb/log.19Jun17.neb.hop1.g++.4 @@ -0,0 +1,9 @@ +LAMMPS (19 May 2017) +Running on 4 partitions of processors +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +0 4327.2753 2746.3378 0.082169072 4.9967651 4514.5424 0.42933428 0.42323635 1.8941131 0 -3.0535948 0.33333333 -2.6242605 0.66666667 -2.7623811 1 -3.0474969 +87 0.095951502 0.052720903 0.005588927 0.065110105 0.12467831 0.0071014928 0.0022798007 2.3003372 0 -3.0535967 0.32435271 -3.0473127 0.62805027 -3.0464952 1 -3.048775 +Climbing replica = 3 +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +87 0.14137277 0.11108954 0.005588927 0.065110105 0.12467831 0.0071014928 0.0022798007 2.3003372 0 -3.0535967 0.32435271 -3.0473127 0.62805027 -3.0464952 1 -3.048775 +124 0.099583263 0.085936899 0.0044220372 0.023873795 0.091308308 0.0071061754 0.0022863931 2.308121 0 -3.0535968 0.32223905 -3.0473329 0.61673898 -3.0464906 1 -3.048777 diff --git a/examples/neb/log.19Jun17.neb.hop1.g++.8 b/examples/neb/log.19Jun17.neb.hop1.g++.8 new file mode 100644 index 0000000000000000000000000000000000000000..d1be1284faf6583359e4f01c9eed2c955589eddf --- /dev/null +++ b/examples/neb/log.19Jun17.neb.hop1.g++.8 @@ -0,0 +1,9 @@ +LAMMPS (19 May 2017) +Running on 4 partitions of processors +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +0 4327.2753 2746.3378 0.082169072 4.9967651 4514.5424 0.42933428 0.42323635 1.8941131 0 -3.0535948 0.33333333 -2.6242605 0.66666667 -2.7623811 1 -3.0474969 +87 0.095951792 0.052720902 0.0055889267 0.065110091 0.12467831 0.0071014928 0.0022798007 2.3003372 0 -3.0535967 0.32435271 -3.0473127 0.62805027 -3.0464952 1 -3.048775 +Climbing replica = 3 +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +87 0.14137297 0.11108954 0.0055889267 0.065110091 0.12467831 0.0071014928 0.0022798007 2.3003372 0 -3.0535967 0.32435271 -3.0473127 0.62805027 -3.0464952 1 -3.048775 +124 0.099582186 0.08593683 0.0044220345 0.023873731 0.091308197 0.0071061754 0.0022863931 2.3081211 0 -3.0535968 0.32223904 -3.0473329 0.61673896 -3.0464906 1 -3.048777 diff --git a/examples/neb/log.19Jun17.neb.hop2.g++.4 b/examples/neb/log.19Jun17.neb.hop2.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..c6b6cbe2ceb1539afa087412c78e22dfae6d3176 --- /dev/null +++ b/examples/neb/log.19Jun17.neb.hop2.g++.4 @@ -0,0 +1,12 @@ +LAMMPS (19 May 2017) +Running on 4 partitions of processors +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +0 14.104748 10.419633 0.1227071 4.999238 8.2087606 0.0018276223 0.00064050211 0.98401186 0 -3.0514921 0.33333333 -3.0496673 0.66666667 -3.0496645 1 -3.050305 +100 0.24646695 0.10792196 0.0077146918 0.058733261 0.63504706 0.001516756 0.0015151635 1.165391 0 -3.0514939 0.2890334 -3.0503533 0.59718494 -3.0499771 1 -3.0514923 +200 0.061777741 0.050288749 0.0047486883 0.0095236035 0.88698597 0.0014465772 0.0014462528 1.1692938 0 -3.0514941 0.29975094 -3.0503052 0.62768286 -3.0500476 1 -3.0514938 +261 0.048699591 0.038138604 0.0040083594 0.0074854409 0.95722712 0.0014243579 0.0014241377 1.1696848 0 -3.0514942 0.30525481 -3.0502812 0.6357998 -3.0500698 1 -3.051494 +Climbing replica = 3 +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +261 0.95753855 0.94297239 0.0040083594 0.0074854409 0.95722712 0.0014243579 0.0014241377 1.1696848 0 -3.0514942 0.30525481 -3.0502812 0.6357998 -3.0500698 1 -3.051494 +361 0.072509627 0.06580631 0.0027545765 0.0044749366 0.016746483 0.0016018879 0.0016017805 1.1704611 0 -3.0514943 0.28176307 -3.0503855 0.50355454 -3.0498924 1 -3.0514942 +381 0.04884836 0.040787876 0.0023445904 0.0035162935 0.017959209 0.0016017716 0.0016016898 1.1713862 0 -3.0514943 0.27120138 -3.0504399 0.50428218 -3.0498925 1 -3.0514942 diff --git a/examples/neb/log.19Jun17.neb.hop2.g++.8 b/examples/neb/log.19Jun17.neb.hop2.g++.8 new file mode 100644 index 0000000000000000000000000000000000000000..c6b6cbe2ceb1539afa087412c78e22dfae6d3176 --- /dev/null +++ b/examples/neb/log.19Jun17.neb.hop2.g++.8 @@ -0,0 +1,12 @@ +LAMMPS (19 May 2017) +Running on 4 partitions of processors +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +0 14.104748 10.419633 0.1227071 4.999238 8.2087606 0.0018276223 0.00064050211 0.98401186 0 -3.0514921 0.33333333 -3.0496673 0.66666667 -3.0496645 1 -3.050305 +100 0.24646695 0.10792196 0.0077146918 0.058733261 0.63504706 0.001516756 0.0015151635 1.165391 0 -3.0514939 0.2890334 -3.0503533 0.59718494 -3.0499771 1 -3.0514923 +200 0.061777741 0.050288749 0.0047486883 0.0095236035 0.88698597 0.0014465772 0.0014462528 1.1692938 0 -3.0514941 0.29975094 -3.0503052 0.62768286 -3.0500476 1 -3.0514938 +261 0.048699591 0.038138604 0.0040083594 0.0074854409 0.95722712 0.0014243579 0.0014241377 1.1696848 0 -3.0514942 0.30525481 -3.0502812 0.6357998 -3.0500698 1 -3.051494 +Climbing replica = 3 +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +261 0.95753855 0.94297239 0.0040083594 0.0074854409 0.95722712 0.0014243579 0.0014241377 1.1696848 0 -3.0514942 0.30525481 -3.0502812 0.6357998 -3.0500698 1 -3.051494 +361 0.072509627 0.06580631 0.0027545765 0.0044749366 0.016746483 0.0016018879 0.0016017805 1.1704611 0 -3.0514943 0.28176307 -3.0503855 0.50355454 -3.0498924 1 -3.0514942 +381 0.04884836 0.040787876 0.0023445904 0.0035162935 0.017959209 0.0016017716 0.0016016898 1.1713862 0 -3.0514943 0.27120138 -3.0504399 0.50428218 -3.0498925 1 -3.0514942 diff --git a/examples/neb/log.19Jun17.neb.sivac.g++.4 b/examples/neb/log.19Jun17.neb.sivac.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..0d9880ca81bbc2f3d77371cf9e32b7e460805895 --- /dev/null +++ b/examples/neb/log.19Jun17.neb.sivac.g++.4 @@ -0,0 +1,17 @@ +LAMMPS (19 May 2017) +Running on 4 partitions of processors +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +0 7.5525391 1.6345605 0.16683659 7.5525391 7.5525391 1.5383951 0 1.6207355 0 -2213.3343 0.33333333 -2212.7428 0.66666667 -2212.2247 1 -2211.7959 +10 0.24005275 0.036502104 0.036483049 0.24005275 0.68351722 0.42916118 0.41794425 1.6989349 0 -2213.3365 0.32909183 -2212.9587 0.65386736 -2212.9073 1 -2213.3253 +20 0.07940898 0.016398055 0.024706844 0.07940898 0.71637784 0.41387872 0.41157886 1.7343662 0 -2213.3369 0.32478734 -2212.9621 0.65348766 -2212.923 1 -2213.3346 +30 0.094973707 0.0083631681 0.015145947 0.035267404 0.7535772 0.40072717 0.40024605 1.7504612 0 -2213.3372 0.32705584 -2212.9584 0.65894506 -2212.9365 1 -2213.3367 +40 0.027727472 0.0044528145 0.011618173 0.022562656 0.76133752 0.39614635 0.39591731 1.7547519 0 -2213.3373 0.32873163 -2212.9562 0.66124255 -2212.9411 1 -2213.337 +50 0.019429348 0.0030110281 0.0087135563 0.015391975 0.76952681 0.39274846 0.3926388 1.7578616 0 -2213.3373 0.33022595 -2212.9543 0.66307279 -2212.9446 1 -2213.3372 +60 0.019009471 0.0016234562 0.0053426307 0.0086166186 0.77759617 0.38936861 0.38933364 1.7610433 0 -2213.3374 0.33187548 -2212.9523 0.66497617 -2212.948 1 -2213.3373 +63 0.0097365134 0.0012734598 0.004777604 0.0076121987 0.77865149 0.38888778 0.38886047 1.7615294 0 -2213.3374 0.33212107 -2212.952 0.66525385 -2212.9485 1 -2213.3373 +Climbing replica = 3 +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +63 0.77865149 0.31085821 0.004777604 0.0076121987 0.77865149 0.38888778 0.38886047 1.7615294 0 -2213.3374 0.33212107 -2212.952 0.66525385 -2212.9485 1 -2213.3373 +73 0.098175496 0.033609035 0.0027886955 0.0042742148 0.036594003 0.51024838 0.51023983 1.7607181 0 -2213.3374 0.27574151 -2213.0416 0.50432348 -2212.8271 1 -2213.3374 +83 0.03341862 0.012760857 0.0020868177 0.0031625649 0.010189924 0.51014634 0.51014168 1.7602562 0 -2213.3374 0.26045338 -2213.0672 0.50355193 -2212.8272 1 -2213.3374 +93 0.0097374358 0.0028416114 0.0014003718 0.0020986584 0.0053485291 0.51011052 0.51010848 1.7601202 0 -2213.3374 0.25397887 -2213.0783 0.50388111 -2212.8273 1 -2213.3374 diff --git a/examples/neb/log.19Jun17.neb.sivac.g++.8 b/examples/neb/log.19Jun17.neb.sivac.g++.8 new file mode 100644 index 0000000000000000000000000000000000000000..260eb9e18b45081176deb84f6ed6afa4bb6729e3 --- /dev/null +++ b/examples/neb/log.19Jun17.neb.sivac.g++.8 @@ -0,0 +1,18 @@ +LAMMPS (19 May 2017) +Running on 4 partitions of processors +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +0 7.5525391 1.6345605 0.16683659 7.5525391 7.5525391 1.5383951 0 1.6207355 0 -2213.3343 0.33333333 -2212.7428 0.66666667 -2212.2247 1 -2211.7959 +10 0.24005275 0.036502104 0.036483049 0.24005275 0.68351722 0.42916118 0.41794425 1.6989349 0 -2213.3365 0.32909183 -2212.9587 0.65386736 -2212.9073 1 -2213.3253 +20 0.07940898 0.016398055 0.024706844 0.07940898 0.71637784 0.41387872 0.41157886 1.7343662 0 -2213.3369 0.32478734 -2212.9621 0.65348766 -2212.923 1 -2213.3346 +30 0.094973708 0.0083631681 0.015145947 0.035267404 0.7535772 0.40072717 0.40024605 1.7504612 0 -2213.3372 0.32705584 -2212.9584 0.65894506 -2212.9365 1 -2213.3367 +40 0.027727472 0.0044528144 0.011618173 0.022562656 0.76133752 0.39614635 0.39591731 1.7547519 0 -2213.3373 0.32873163 -2212.9562 0.66124255 -2212.9411 1 -2213.337 +50 0.019429341 0.0030110281 0.0087135565 0.015391975 0.7695268 0.39274846 0.3926388 1.7578616 0 -2213.3373 0.33022595 -2212.9543 0.66307279 -2212.9446 1 -2213.3372 +60 0.019048963 0.0016262345 0.0053426844 0.0086167196 0.77759655 0.38936867 0.3893337 1.7610433 0 -2213.3374 0.33187545 -2212.9523 0.66497615 -2212.948 1 -2213.3373 +63 0.0097037048 0.0012761841 0.0047749367 0.0076075138 0.77865545 0.38888554 0.38885827 1.7615318 0 -2213.3374 0.33212221 -2212.952 0.66525512 -2212.9485 1 -2213.3373 +Climbing replica = 3 +Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN +63 0.77865545 0.3108551 0.0047749367 0.0076075138 0.77865545 0.38888554 0.38885827 1.7615318 0 -2213.3374 0.33212221 -2212.952 0.66525512 -2212.9485 1 -2213.3373 +73 0.098595989 0.033659485 0.0027927196 0.0042813387 0.038224344 0.51024759 0.51023901 1.7607156 0 -2213.3374 0.27595612 -2213.0413 0.50453988 -2212.8271 1 -2213.3374 +83 0.033344977 0.012868685 0.0020880608 0.0031645847 0.010250413 0.51014677 0.5101421 1.7602601 0 -2213.3374 0.26053624 -2213.067 0.50358775 -2212.8272 1 -2213.3374 +93 0.013254873 0.0038176141 0.0014928226 0.0022407967 0.0058577818 0.51011371 0.51011138 1.7601272 0 -2213.3374 0.25452741 -2213.0774 0.50382161 -2212.8273 1 -2213.3374 +95 0.0099964951 0.0031053214 0.0014131665 0.0021184362 0.0053683638 0.51011105 0.51010897 1.7601232 0 -2213.3374 0.2540975 -2213.0781 0.50387313 -2212.8273 1 -2213.3374 diff --git a/examples/neb/log.5Oct16.neb.hop1.g++.4 b/examples/neb/log.5Oct16.neb.hop1.g++.4 deleted file mode 100644 index c678e694937ec301bec7633aa3a7ada4dd1c7b66..0000000000000000000000000000000000000000 --- a/examples/neb/log.5Oct16.neb.hop1.g++.4 +++ /dev/null @@ -1,10 +0,0 @@ -LAMMPS (5 Oct 2016) -Running on 4 partitions of processors -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -0 4327.2753 2746.3378 0.3387091 5.0075576 4514.5424 0.42933428 0.42323635 1.8941131 0 -3.0535948 0.33333333 -2.6242605 0.66666667 -2.7623811 1 -3.0474969 -100 0.10482184 0.085218486 0.014588241 0.066178594 0.19602237 0.0070900402 0.0022691875 2.3031875 0 -3.0535967 0.31839181 -3.0473647 0.63987598 -3.0465067 1 -3.0487759 -111 0.096708467 0.07803707 0.013922973 0.05417562 0.2023467 0.0070871172 0.0022668002 2.3052945 0 -3.0535968 0.31853431 -3.0473633 0.64178871 -3.0465096 1 -3.0487764 -Climbing replica = 3 -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -111 0.2023467 0.1777038 0.013922973 0.05417562 0.2023467 0.0070871172 0.0022668002 2.3052945 0 -3.0535968 0.31853431 -3.0473633 0.64178871 -3.0465096 1 -3.0487764 -179 0.096874474 0.090676856 0.01040177 0.023364005 0.096874474 0.0071047642 0.0022856172 2.3122768 0 -3.0535969 0.31577311 -3.0473955 0.61798541 -3.0464922 1 -3.0487778 diff --git a/examples/neb/log.5Oct16.neb.hop1.g++.8 b/examples/neb/log.5Oct16.neb.hop1.g++.8 deleted file mode 100644 index d70f02bd169ead3a66d31a986f20fcae6af7be35..0000000000000000000000000000000000000000 --- a/examples/neb/log.5Oct16.neb.hop1.g++.8 +++ /dev/null @@ -1,10 +0,0 @@ -LAMMPS (5 Oct 2016) -Running on 4 partitions of processors -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -0 4327.2753 2746.3378 0.3387091 5.0075576 4514.5424 0.42933428 0.42323635 1.8941131 0 -3.0535948 0.33333333 -2.6242605 0.66666667 -2.7623811 1 -3.0474969 -100 0.10482171 0.085218406 0.014588234 0.066178435 0.19602242 0.0070900401 0.0022691875 2.3031875 0 -3.0535967 0.31839181 -3.0473647 0.639876 -3.0465067 1 -3.0487759 -111 0.096708718 0.078036984 0.013922966 0.054175505 0.20234693 0.0070871172 0.0022668002 2.3052946 0 -3.0535968 0.31853431 -3.0473633 0.64178873 -3.0465096 1 -3.0487764 -Climbing replica = 3 -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -111 0.20234693 0.17770387 0.013922966 0.054175505 0.20234693 0.0070871172 0.0022668002 2.3052946 0 -3.0535968 0.31853431 -3.0473633 0.64178873 -3.0465096 1 -3.0487764 -178 0.09975409 0.093814031 0.010577358 0.024247224 0.09975409 0.0071042931 0.0022851195 2.312004 0 -3.0535969 0.31607934 -3.0473923 0.618931 -3.0464926 1 -3.0487777 diff --git a/examples/neb/log.5Oct16.neb.hop2.g++.4 b/examples/neb/log.5Oct16.neb.hop2.g++.4 deleted file mode 100644 index 99772873033421aec64500cf6c2ab85d88e85831..0000000000000000000000000000000000000000 --- a/examples/neb/log.5Oct16.neb.hop2.g++.4 +++ /dev/null @@ -1,18 +0,0 @@ -LAMMPS (5 Oct 2016) -Running on 4 partitions of processors -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -0 14.104748 10.419633 0.24852044 5.0039071 8.2116049 0.0018276223 0.00064050211 0.98401186 0 -3.0514921 0.33333333 -3.0496673 0.66666667 -3.0496645 1 -3.050305 -100 0.24646695 0.10792196 0.01781018 0.098854684 0.63725646 0.001516756 0.0015151635 1.165391 0 -3.0514939 0.2890334 -3.0503533 0.59718494 -3.0499771 1 -3.0514923 -200 0.061777741 0.050288749 0.012466513 0.020420207 0.88741041 0.0014465772 0.0014462528 1.1692938 0 -3.0514941 0.29975094 -3.0503052 0.62768286 -3.0500476 1 -3.0514938 -300 0.056346766 0.030000618 0.0093152917 0.013765031 1.0101529 0.0014069751 0.0014068154 1.1699608 0 -3.0514942 0.30992449 -3.0502613 0.64174291 -3.0500873 1 -3.0514941 -400 0.025589489 0.015671005 0.0061287063 0.008588518 1.1136424 0.001370987 0.0013709154 1.1704204 0 -3.0514943 0.32016645 -3.0502198 0.65324019 -3.0501233 1 -3.0514943 -500 0.014778626 0.0092108366 0.0042668521 0.0059963914 1.1636579 0.0013527466 0.0013527072 1.1706283 0 -3.0514944 0.32550275 -3.0501993 0.65875414 -3.0501416 1 -3.0514943 -600 0.08786211 0.020876327 0.0031421548 0.0051657363 1.1898894 0.0013430848 0.0013430599 1.1707681 0 -3.0514944 0.32831927 -3.0501889 0.66160681 -3.0501513 1 -3.0514944 -633 0.0098132678 0.0055392541 0.0030063464 0.0043091323 1.1924486 0.0013420127 0.0013419893 1.1707818 0 -3.0514944 0.32862625 -3.0501878 0.66191769 -3.0501524 1 -3.0514944 -Climbing replica = 3 -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -633 1.1924486 1.1648685 0.0030063464 0.0043091323 1.1924486 0.0013420127 0.0013419893 1.1707818 0 -3.0514944 0.32862625 -3.0501878 0.66191769 -3.0501524 1 -3.0514944 -733 0.095331134 0.089136608 0.0021551441 0.0031844438 0.043042998 0.0016022317 0.0016022168 1.170789 0 -3.0514944 0.29157063 -3.0503375 0.50358402 -3.0498922 1 -3.0514944 -833 0.10539135 0.030724373 0.0013749699 0.002221013 0.10539135 0.0016019798 0.001601971 1.1732118 0 -3.0514944 0.26249002 -3.0504848 0.50415223 -3.0498924 1 -3.0514944 -933 0.01883894 0.011496399 0.0011058925 0.0018178041 0.014621806 0.0016018934 0.0016018865 1.173866 0 -3.0514944 0.25788763 -3.0505113 0.50466375 -3.0498925 1 -3.0514944 -996 0.0082457876 0.0036336551 0.00077325986 0.0013910671 0.0068823708 0.0016018293 0.0016018244 1.174511 0 -3.0514944 0.2544553 -3.0505324 0.50520462 -3.0498926 1 -3.0514944 diff --git a/examples/neb/log.5Oct16.neb.hop2.g++.8 b/examples/neb/log.5Oct16.neb.hop2.g++.8 deleted file mode 100644 index 99772873033421aec64500cf6c2ab85d88e85831..0000000000000000000000000000000000000000 --- a/examples/neb/log.5Oct16.neb.hop2.g++.8 +++ /dev/null @@ -1,18 +0,0 @@ -LAMMPS (5 Oct 2016) -Running on 4 partitions of processors -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -0 14.104748 10.419633 0.24852044 5.0039071 8.2116049 0.0018276223 0.00064050211 0.98401186 0 -3.0514921 0.33333333 -3.0496673 0.66666667 -3.0496645 1 -3.050305 -100 0.24646695 0.10792196 0.01781018 0.098854684 0.63725646 0.001516756 0.0015151635 1.165391 0 -3.0514939 0.2890334 -3.0503533 0.59718494 -3.0499771 1 -3.0514923 -200 0.061777741 0.050288749 0.012466513 0.020420207 0.88741041 0.0014465772 0.0014462528 1.1692938 0 -3.0514941 0.29975094 -3.0503052 0.62768286 -3.0500476 1 -3.0514938 -300 0.056346766 0.030000618 0.0093152917 0.013765031 1.0101529 0.0014069751 0.0014068154 1.1699608 0 -3.0514942 0.30992449 -3.0502613 0.64174291 -3.0500873 1 -3.0514941 -400 0.025589489 0.015671005 0.0061287063 0.008588518 1.1136424 0.001370987 0.0013709154 1.1704204 0 -3.0514943 0.32016645 -3.0502198 0.65324019 -3.0501233 1 -3.0514943 -500 0.014778626 0.0092108366 0.0042668521 0.0059963914 1.1636579 0.0013527466 0.0013527072 1.1706283 0 -3.0514944 0.32550275 -3.0501993 0.65875414 -3.0501416 1 -3.0514943 -600 0.08786211 0.020876327 0.0031421548 0.0051657363 1.1898894 0.0013430848 0.0013430599 1.1707681 0 -3.0514944 0.32831927 -3.0501889 0.66160681 -3.0501513 1 -3.0514944 -633 0.0098132678 0.0055392541 0.0030063464 0.0043091323 1.1924486 0.0013420127 0.0013419893 1.1707818 0 -3.0514944 0.32862625 -3.0501878 0.66191769 -3.0501524 1 -3.0514944 -Climbing replica = 3 -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -633 1.1924486 1.1648685 0.0030063464 0.0043091323 1.1924486 0.0013420127 0.0013419893 1.1707818 0 -3.0514944 0.32862625 -3.0501878 0.66191769 -3.0501524 1 -3.0514944 -733 0.095331134 0.089136608 0.0021551441 0.0031844438 0.043042998 0.0016022317 0.0016022168 1.170789 0 -3.0514944 0.29157063 -3.0503375 0.50358402 -3.0498922 1 -3.0514944 -833 0.10539135 0.030724373 0.0013749699 0.002221013 0.10539135 0.0016019798 0.001601971 1.1732118 0 -3.0514944 0.26249002 -3.0504848 0.50415223 -3.0498924 1 -3.0514944 -933 0.01883894 0.011496399 0.0011058925 0.0018178041 0.014621806 0.0016018934 0.0016018865 1.173866 0 -3.0514944 0.25788763 -3.0505113 0.50466375 -3.0498925 1 -3.0514944 -996 0.0082457876 0.0036336551 0.00077325986 0.0013910671 0.0068823708 0.0016018293 0.0016018244 1.174511 0 -3.0514944 0.2544553 -3.0505324 0.50520462 -3.0498926 1 -3.0514944 diff --git a/examples/neb/log.5Oct16.neb.sivac.g++.3 b/examples/neb/log.5Oct16.neb.sivac.g++.3 deleted file mode 100644 index f6adae4a18a8b21212fed4fecbaf8fc4212c6947..0000000000000000000000000000000000000000 --- a/examples/neb/log.5Oct16.neb.sivac.g++.3 +++ /dev/null @@ -1,14 +0,0 @@ -LAMMPS (5 Oct 2016) -Running on 3 partitions of processors -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -0 7.5525391 1.6345605 0.16683659 7.5525391 7.5525391 1.5383951 0 1.6207355 0 -2213.3343 0.5 -2212.4096 1 -2211.7959 -10 0.27332818 0.040944923 0.039164338 0.27332818 0.17804882 0.51235911 0.497084 1.6790474 0 -2213.3364 0.49024121 -2212.824 1 -2213.3211 -20 0.1820396 0.018049916 0.024428411 0.1820396 0.08601739 0.51038174 0.5080746 1.7224961 0 -2213.337 0.49199582 -2212.8266 1 -2213.3347 -30 0.043288796 0.0068108825 0.017372479 0.043288796 0.049466709 0.51032316 0.5095943 1.7304745 0 -2213.3371 0.49553568 -2212.8268 1 -2213.3364 -40 0.0421393 0.0037035761 0.01173707 0.0421393 0.026104735 0.51022733 0.5100163 1.7366752 0 -2213.3373 0.49838067 -2212.8271 1 -2213.3371 -50 0.025897844 0.0022804241 0.0081056535 0.025897844 0.016908913 0.5101712 0.51008591 1.739143 0 -2213.3373 0.49923344 -2212.8272 1 -2213.3373 -59 0.00962839 0.0012946076 0.005657505 0.009365729 0.012040803 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955698 -2212.8272 1 -2213.3373 -Climbing replica = 2 -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -59 0.012040803 0.0031505502 0.005657505 0.009365729 0.012040803 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955698 -2212.8272 1 -2213.3373 -63 0.009152118 0.0016692472 0.0049645771 0.0081967836 0.009152118 0.51013743 0.51010776 1.7409028 0 -2213.3374 0.50022239 -2212.8272 1 -2213.3373 diff --git a/examples/neb/log.5Oct16.neb.sivac.g++.6 b/examples/neb/log.5Oct16.neb.sivac.g++.6 deleted file mode 100644 index e00069d0520f38d32aca4ff7c2d5889835bb6200..0000000000000000000000000000000000000000 --- a/examples/neb/log.5Oct16.neb.sivac.g++.6 +++ /dev/null @@ -1,14 +0,0 @@ -LAMMPS (5 Oct 2016) -Running on 3 partitions of processors -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -0 7.5525391 1.6345605 0.16683659 7.5525391 7.5525391 1.5383951 0 1.6207355 0 -2213.3343 0.5 -2212.4096 1 -2211.7959 -10 0.27332818 0.040944923 0.039164338 0.27332818 0.17804882 0.51235911 0.497084 1.6790474 0 -2213.3364 0.49024121 -2212.824 1 -2213.3211 -20 0.1820396 0.018049916 0.024428411 0.1820396 0.08601739 0.51038174 0.5080746 1.7224961 0 -2213.337 0.49199582 -2212.8266 1 -2213.3347 -30 0.043288796 0.0068108825 0.017372479 0.043288796 0.049466709 0.51032316 0.5095943 1.7304745 0 -2213.3371 0.49553568 -2212.8268 1 -2213.3364 -40 0.042139305 0.0037035764 0.01173707 0.042139305 0.026104735 0.51022733 0.5100163 1.7366752 0 -2213.3373 0.49838067 -2212.8271 1 -2213.3371 -50 0.025899631 0.0022805513 0.0081057075 0.025899631 0.016908929 0.5101712 0.51008591 1.739143 0 -2213.3373 0.49923345 -2212.8272 1 -2213.3373 -59 0.0096285044 0.0012946258 0.0056576061 0.0093678253 0.012040919 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955698 -2212.8272 1 -2213.3373 -Climbing replica = 2 -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -59 0.012040919 0.0031505771 0.0056576061 0.0093678253 0.012040919 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955698 -2212.8272 1 -2213.3373 -63 0.0091523813 0.0016692845 0.0049647607 0.0081998372 0.0091523813 0.51013743 0.51010775 1.7409028 0 -2213.3374 0.50022236 -2212.8272 1 -2213.3373 diff --git a/examples/neb/log.5Oct16.neb.sivac.g++.9 b/examples/neb/log.5Oct16.neb.sivac.g++.9 deleted file mode 100644 index 31ab7c9ac6761881474111cdf8edb1459686e4e6..0000000000000000000000000000000000000000 --- a/examples/neb/log.5Oct16.neb.sivac.g++.9 +++ /dev/null @@ -1,14 +0,0 @@ -LAMMPS (5 Oct 2016) -Running on 3 partitions of processors -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -0 7.5525391 1.6345605 0.16683659 7.5525391 7.5525391 1.5383951 0 1.6207355 0 -2213.3343 0.5 -2212.4096 1 -2211.7959 -10 0.27332818 0.040944923 0.039164338 0.27332818 0.17804882 0.51235911 0.497084 1.6790474 0 -2213.3364 0.49024121 -2212.824 1 -2213.3211 -20 0.1820396 0.018049916 0.024428411 0.1820396 0.08601739 0.51038174 0.5080746 1.7224961 0 -2213.337 0.49199582 -2212.8266 1 -2213.3347 -30 0.043288796 0.0068108825 0.017372479 0.043288796 0.049466709 0.51032316 0.5095943 1.7304745 0 -2213.3371 0.49553568 -2212.8268 1 -2213.3364 -40 0.042139318 0.0037035773 0.011737071 0.042139318 0.026104737 0.51022733 0.5100163 1.7366752 0 -2213.3373 0.49838067 -2212.8271 1 -2213.3371 -50 0.025904121 0.0022808707 0.0081058431 0.025904121 0.016908969 0.5101712 0.51008591 1.7391431 0 -2213.3373 0.49923346 -2212.8272 1 -2213.3373 -59 0.0096287928 0.0012946716 0.005657861 0.0093731008 0.01204121 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955696 -2212.8272 1 -2213.3373 -Climbing replica = 2 -Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN -59 0.01204121 0.0031506449 0.005657861 0.0093731008 0.01204121 0.51014185 0.51010207 1.7404554 0 -2213.3374 0.49955696 -2212.8272 1 -2213.3373 -63 0.0091530442 0.0016693787 0.0049652227 0.0082075097 0.0091530442 0.51013743 0.51010775 1.7409027 0 -2213.3374 0.50022228 -2212.8272 1 -2213.3373 diff --git a/lib/gpu/Makefile.linux.mixed b/lib/gpu/Makefile.linux.mixed index 42fc8e9740c8b63662db1f5cb9a8c1f4c9a37a57..6289163ac85d539af4014ba8b2532de9d374457d 100644 --- a/lib/gpu/Makefile.linux.mixed +++ b/lib/gpu/Makefile.linux.mixed @@ -8,7 +8,6 @@ EXTRAMAKE = Makefile.lammps.standard CUDA_HOME = /usr/local/cuda -CUDA_HOME = /home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37 NVCC = nvcc # Kepler CUDA diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile index 660544cfaaff1dc04f9049b4947dd304e9701067..ee2eb72632cadfc8d2ce126f034b245efebbe2eb 100644 --- a/lib/gpu/Nvidia.makefile +++ b/lib/gpu/Nvidia.makefile @@ -63,6 +63,7 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \ $(OBJ_DIR)/lal_lj_coul_debye.o $(OBJ_DIR)/lal_lj_coul_debye_ext.o \ $(OBJ_DIR)/lal_coul_dsf.o $(OBJ_DIR)/lal_coul_dsf_ext.o \ $(OBJ_DIR)/lal_sw.o $(OBJ_DIR)/lal_sw_ext.o \ + $(OBJ_DIR)/lal_vashishta.o $(OBJ_DIR)/lal_vashishta_ext.o \ $(OBJ_DIR)/lal_beck.o $(OBJ_DIR)/lal_beck_ext.o \ $(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \ $(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \ @@ -117,6 +118,7 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \ $(OBJ_DIR)/lj_coul_debye.cubin $(OBJ_DIR)/lj_coul_debye_cubin.h \ $(OBJ_DIR)/coul_dsf.cubin $(OBJ_DIR)/coul_dsf_cubin.h \ $(OBJ_DIR)/sw.cubin $(OBJ_DIR)/sw_cubin.h \ + $(OBJ_DIR)/vashishta.cubin $(OBJ_DIR)/vashishta_cubin.h \ $(OBJ_DIR)/beck.cubin $(OBJ_DIR)/beck_cubin.h \ $(OBJ_DIR)/mie.cubin $(OBJ_DIR)/mie_cubin.h \ $(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft_cubin.h \ @@ -613,6 +615,18 @@ $(OBJ_DIR)/lal_coul_dsf.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf.cpp $(OBJ_DIR)/c $(OBJ_DIR)/lal_coul_dsf_ext.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_coul_dsf_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/vashishta.cubin: lal_vashishta.cu lal_precision.h lal_preprocessor.h + $(CUDA) --cubin -DNV_KERNEL -o $@ lal_vashishta.cu + +$(OBJ_DIR)/vashishta_cubin.h: $(OBJ_DIR)/vashishta.cubin $(OBJ_DIR)/vashishta.cubin + $(BIN2C) -c -n vashishta $(OBJ_DIR)/vashishta.cubin > $(OBJ_DIR)/vashishta_cubin.h + +$(OBJ_DIR)/lal_vashishta.o: $(ALL_H) lal_vashishta.h lal_vashishta.cpp $(OBJ_DIR)/vashishta_cubin.h $(OBJ_DIR)/lal_base_three.o + $(CUDR) -o $@ -c lal_vashishta.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_vashishta_ext.o: $(ALL_H) lal_vashishta.h lal_vashishta_ext.cpp lal_base_three.h + $(CUDR) -o $@ -c lal_vashishta_ext.cpp -I$(OBJ_DIR) + $(OBJ_DIR)/sw.cubin: lal_sw.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_sw.cu diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile index 4a5959531388ebd3e43ba513f20d7448df45131c..f116508ae5337e86ed07b6570ef1125a4726d897 100644 --- a/lib/gpu/Opencl.makefile +++ b/lib/gpu/Opencl.makefile @@ -52,6 +52,7 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \ $(OBJ_DIR)/lal_lj_coul_debye.o $(OBJ_DIR)/lal_lj_coul_debye_ext.o \ $(OBJ_DIR)/lal_coul_dsf.o $(OBJ_DIR)/lal_coul_dsf_ext.o \ $(OBJ_DIR)/lal_sw.o $(OBJ_DIR)/lal_sw_ext.o \ + $(OBJ_DIR)/lal_vashishta.o $(OBJ_DIR)/lal_vashishta_ext.o \ $(OBJ_DIR)/lal_beck.o $(OBJ_DIR)/lal_beck_ext.o \ $(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \ $(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \ @@ -92,7 +93,7 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \ $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/tersoff_zbl_cl.h \ $(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/coul_cl.h \ $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \ - $(OBJ_DIR)/lj_cubic_cl.h + $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/vashishta_cl.h OCL_EXECS = $(BIN_DIR)/ocl_get_devices @@ -450,6 +451,15 @@ $(OBJ_DIR)/lal_sw.o: $(ALL_H) lal_sw.h lal_sw.cpp $(OBJ_DIR)/sw_cl.h $(OBJ_DIR) $(OBJ_DIR)/lal_sw_ext.o: $(ALL_H) lal_sw.h lal_sw_ext.cpp lal_base_three.h $(OCL) -o $@ -c lal_sw_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/vashishta_cl.h: lal_vashishta.cu $(PRE1_H) + $(BSH) ./geryon/file_to_cstr.sh vashishta $(PRE1_H) lal_vashishta.cu $(OBJ_DIR)/vashishta_cl.h; + +$(OBJ_DIR)/lal_vashishta.o: $(ALL_H) lal_vashishta.h lal_vashishta.cpp $(OBJ_DIR)/vashishta_cl.h $(OBJ_DIR)/vashishta_cl.h $(OBJ_DIR)/lal_base_three.o + $(OCL) -o $@ -c lal_vashishta.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_vashishta_ext.o: $(ALL_H) lal_vashishta.h lal_vashishta_ext.cpp lal_base_three.h + $(OCL) -o $@ -c lal_vashishta_ext.cpp -I$(OBJ_DIR) + $(OBJ_DIR)/beck_cl.h: lal_beck.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh beck $(PRE1_H) lal_beck.cu $(OBJ_DIR)/beck_cl.h; diff --git a/lib/gpu/lal_vashishta.cpp b/lib/gpu/lal_vashishta.cpp new file mode 100644 index 0000000000000000000000000000000000000000..96537e65d30bb3eecc7fea2390012f92fd9e44cf --- /dev/null +++ b/lib/gpu/lal_vashishta.cpp @@ -0,0 +1,283 @@ +/*************************************************************************** + vashishta.cpp + ------------------- + Anders Hafreager (UiO) + + Class for acceleration of the vashishta pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : Mon June 12, 2017 + email : andershaf@gmail.com + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "vashishta_cl.h" +#elif defined(USE_CUDART) +const char *vashishta=0; +#else +#include "vashishta_cubin.h" +#endif + +#include "lal_vashishta.h" +#include <cassert> +using namespace LAMMPS_AL; +#define VashishtaT Vashishta<numtyp, acctyp> + +extern Device<PRECISION,ACC_PRECISION> device; + +template <class numtyp, class acctyp> +VashishtaT::Vashishta() : BaseThree<numtyp,acctyp>(), _allocated(false) { +} + +template <class numtyp, class acctyp> +VashishtaT::~Vashishta() { + clear(); +} + +template <class numtyp, class acctyp> +int VashishtaT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template <class numtyp, class acctyp> +int VashishtaT::init(const int ntypes, const int nlocal, const int nall, const int max_nbors, + const double cell_size, const double gpu_split, FILE *_screen, + int* host_map, const int nelements, int*** host_elem2param, const int nparams, + const double* cutsq, const double* r0, + const double* gamma, const double* eta, + const double* lam1inv, const double* lam4inv, + const double* zizj, const double* mbigd, + const double* dvrc, const double* big6w, + const double* heta, const double* bigh, + const double* bigw, const double* c0, + const double* costheta, const double* bigb, + const double* big2b, const double* bigc) +{ + int success; + success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split, + _screen,vashishta,"k_vashishta","k_vashishta_three_center", + "k_vashishta_three_end"); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + int max_shared_types=this->device->max_shared_types(); + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + _nparams = nparams; + _nelements = nelements; + + UCL_H_Vec<numtyp4> dview(nparams,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; i<nparams; i++) { + dview[i].x=(numtyp)0; + dview[i].y=(numtyp)0; + dview[i].z=(numtyp)0; + dview[i].w=(numtyp)0; + } + + // pack coefficients into arrays + param1.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); + + for (int i=0; i<nparams; i++) { + dview[i].x=static_cast<numtyp>(eta[i]); + dview[i].y=static_cast<numtyp>(lam1inv[i]); + dview[i].z=static_cast<numtyp>(lam4inv[i]); + dview[i].w=static_cast<numtyp>(zizj[i]); + } + + ucl_copy(param1,dview,false); + param1_tex.get_texture(*(this->pair_program),"param1_tex"); + param1_tex.bind_float(param1,4); + + param2.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); + + for (int i=0; i<nparams; i++) { + dview[i].x=static_cast<numtyp>(mbigd[i]); + dview[i].y=static_cast<numtyp>(dvrc[i]); + dview[i].z=static_cast<numtyp>(big6w[i]); + dview[i].w=static_cast<numtyp>(heta[i]); + } + + ucl_copy(param2,dview,false); + param2_tex.get_texture(*(this->pair_program),"param2_tex"); + param2_tex.bind_float(param2,4); + + param3.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); + + for (int i=0; i<nparams; i++) { + dview[i].x=static_cast<numtyp>(bigh[i]); + dview[i].y=static_cast<numtyp>(bigw[i]); + dview[i].z=static_cast<numtyp>(dvrc[i]); + dview[i].w=static_cast<numtyp>(c0[i]); + } + + ucl_copy(param3,dview,false); + param3_tex.get_texture(*(this->pair_program),"param3_tex"); + param3_tex.bind_float(param3,4); + + param4.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); + + for (int i=0; i<nparams; i++) { + double r0sq = r0[i]*r0[i]-1e-4; // TODO: should we have the 1e-4? + + dview[i].x=static_cast<numtyp>(r0sq); + dview[i].y=static_cast<numtyp>(gamma[i]); + dview[i].z=static_cast<numtyp>(cutsq[i]); + dview[i].w=static_cast<numtyp>(r0[i]); + } + + ucl_copy(param4,dview,false); + param4_tex.get_texture(*(this->pair_program),"param4_tex"); + param4_tex.bind_float(param4,4); + + param5.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); + + for (int i=0; i<nparams; i++) { + dview[i].x=static_cast<numtyp>(bigc[i]); + dview[i].y=static_cast<numtyp>(costheta[i]); + dview[i].z=static_cast<numtyp>(bigb[i]); + dview[i].w=static_cast<numtyp>(big2b[i]); + } + + ucl_copy(param5,dview,false); + param5_tex.get_texture(*(this->pair_program),"param5_tex"); + param5_tex.bind_float(param5,4); + + UCL_H_Vec<int> dview_elem2param(nelements*nelements*nelements, + *(this->ucl_device), UCL_WRITE_ONLY); + + elem2param.alloc(nelements*nelements*nelements,*(this->ucl_device), + UCL_READ_ONLY); + + for (int i = 0; i < nelements; i++) + for (int j = 0; j < nelements; j++) + for (int k = 0; k < nelements; k++) { + int idx = i*nelements*nelements+j*nelements+k; + dview_elem2param[idx] = host_elem2param[i][j][k]; + } + + ucl_copy(elem2param,dview_elem2param,false); + + UCL_H_Vec<int> dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY); + for (int i = 0; i < ntypes; i++) + dview_map[i] = host_map[i]; + + map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY); + ucl_copy(map,dview_map,false); + + _allocated=true; + this->_max_bytes=param1.row_bytes()+param2.row_bytes()+param3.row_bytes()+param4.row_bytes()+param5.row_bytes()+ + map.row_bytes()+elem2param.row_bytes(); + return 0; +} + +template <class numtyp, class acctyp> +void VashishtaT::clear() { + if (!_allocated) + return; + _allocated=false; + + param1.clear(); + param2.clear(); + param3.clear(); + param4.clear(); + param5.clear(); + map.clear(); + elem2param.clear(); + this->clear_atomic(); +} + +template <class numtyp, class acctyp> +double VashishtaT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(Vashishta<numtyp,acctyp>); +} + +#define KTHREADS this->_threads_per_atom +#define JTHREADS this->_threads_per_atom +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template <class numtyp, class acctyp> +void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) { + // Compute the block size and grid size to keep all cores busy + int BX=this->block_pair(); + int eflag, vflag; + if (_eflag) + eflag=1; + else + eflag=0; + + if (_vflag) + vflag=1; + else + vflag=0; + + int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + // this->_nbor_data == nbor->dev_packed for gpu_nbor == 0 and tpa > 1 + // this->_nbor_data == nbor->dev_nbor for gpu_nbor == 1 or tpa == 1 + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5, + &map, &elem2param, &_nelements, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, + &eflag, &vflag, &ainum, &nbor_pitch, + &this->_threads_per_atom); + + BX=this->block_size(); + GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/ + (BX/(KTHREADS*JTHREADS)))); + + this->k_three_center.set_size(GX,BX); + this->k_three_center.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5, + &map, &elem2param, &_nelements, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, + &nbor_pitch, &this->_threads_per_atom, &evatom); + Answer<numtyp,acctyp> *end_ans; + #ifdef THREE_CONCURRENT + end_ans=this->ans2; + #else + end_ans=this->ans; + #endif + if (evatom!=0) { + + this->k_three_end_vatom.set_size(GX,BX); + this->k_three_end_vatom.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5, + &map, &elem2param, &_nelements, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->nbor->dev_acc, + &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, + &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); + } else { + + this->k_three_end.set_size(GX,BX); + this->k_three_end.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5, + &map, &elem2param, &_nelements, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->nbor->dev_acc, + &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, + &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); + } + + this->time_pair.stop(); +} + +template class Vashishta<PRECISION,ACC_PRECISION>; + diff --git a/lib/gpu/lal_vashishta.cu b/lib/gpu/lal_vashishta.cu new file mode 100644 index 0000000000000000000000000000000000000000..caa3c036134dbae3fd96429dff6eeaff1d27081c --- /dev/null +++ b/lib/gpu/lal_vashishta.cu @@ -0,0 +1,744 @@ +// ************************************************************************** +// vashishta.cu +// ------------------- +// Anders Hafreager (UiO) +// +// Device code for acceleration of the vashishta pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : Mon June 12, 2017 +// email : andershaf@gmail.com +// ***************************************************************************/ + +#ifdef NV_KERNEL +#include "lal_aux_fun1.h" + +#ifndef _DOUBLE_DOUBLE +texture<float4> pos_tex; +texture<float4> param1_tex; +texture<float4> param2_tex; +texture<float4> param3_tex; +texture<float4> param4_tex; +texture<float4> param5_tex; +#else +texture<int4,1> pos_tex; +texture<int4> param1_tex; +texture<int4> param2_tex; +texture<int4> param3_tex; +texture<int4> param4_tex; +texture<int4> param5_tex; +#endif + +#else +#define pos_tex x_ +#define param1_tex param1 +#define param2_tex param2 +#define param3_tex param3 +#define param4_tex param4 +#define param5_tex param5 +#endif + +#define THIRD (numtyp)0.66666666666666666667 + +//#define THREE_CONCURRENT + +#if (ARCH < 300) + +#define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, offset, \ + eflag, vflag, ans, engv) \ + if (t_per_atom>1) { \ + __local acctyp red_acc[6][BLOCK_ELLIPSE]; \ + red_acc[0][tid]=f.x; \ + red_acc[1][tid]=f.y; \ + red_acc[2][tid]=f.z; \ + red_acc[3][tid]=energy; \ + for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ + if (offset < s) { \ + for (int r=0; r<4; r++) \ + red_acc[r][tid] += red_acc[r][tid+s]; \ + } \ + } \ + f.x=red_acc[0][tid]; \ + f.y=red_acc[1][tid]; \ + f.z=red_acc[2][tid]; \ + energy=red_acc[3][tid]; \ + if (vflag>0) { \ + for (int r=0; r<6; r++) \ + red_acc[r][tid]=virial[r]; \ + for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ + if (offset < s) { \ + for (int r=0; r<6; r++) \ + red_acc[r][tid] += red_acc[r][tid+s]; \ + } \ + } \ + for (int r=0; r<6; r++) \ + virial[r]=red_acc[r][tid]; \ + } \ + } \ + if (offset==0) { \ + int ei=ii; \ + if (eflag>0) { \ + engv[ei]+=energy*(acctyp)0.5; \ + ei+=inum; \ + } \ + if (vflag>0) { \ + for (int i=0; i<6; i++) { \ + engv[ei]+=virial[i]*(acctyp)0.5; \ + ei+=inum; \ + } \ + } \ + acctyp4 old=ans[ii]; \ + old.x+=f.x; \ + old.y+=f.y; \ + old.z+=f.z; \ + ans[ii]=old; \ + } + +#else + +#define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, offset, \ + eflag, vflag, ans, engv) \ + if (t_per_atom>1) { \ + for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ + f.x += shfl_xor(f.x, s, t_per_atom); \ + f.y += shfl_xor(f.y, s, t_per_atom); \ + f.z += shfl_xor(f.z, s, t_per_atom); \ + energy += shfl_xor(energy, s, t_per_atom); \ + } \ + if (vflag>0) { \ + for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ + for (int r=0; r<6; r++) \ + virial[r] += shfl_xor(virial[r], s, t_per_atom); \ + } \ + } \ + } \ + if (offset==0) { \ + int ei=ii; \ + if (eflag>0) { \ + engv[ei]+=energy*(acctyp)0.5; \ + ei+=inum; \ + } \ + if (vflag>0) { \ + for (int i=0; i<6; i++) { \ + engv[ei]+=virial[i]*(acctyp)0.5; \ + ei+=inum; \ + } \ + } \ + acctyp4 old=ans[ii]; \ + old.x+=f.x; \ + old.y+=f.y; \ + old.z+=f.z; \ + ans[ii]=old; \ + } + +#endif + + +__kernel void k_vashishta(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict param1, + const __global numtyp4 *restrict param2, + const __global numtyp4 *restrict param3, + const __global numtyp4 *restrict param4, + const __global numtyp4 *restrict param5, + const __global int *restrict map, + const __global int *restrict elem2param, + const int nelements, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp4 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, const int t_per_atom) { + __local int n_stride; + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + acctyp energy=(acctyp)0; + acctyp4 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp virial[6]; + for (int i=0; i<6; i++) + virial[i]=(acctyp)0; + + __syncthreads(); + + if (ii<inum) { + int nbor, nbor_end; + int i, numj; + nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj, + n_stride,nbor_end,nbor); + + numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i]; + int itype=ix.w; + itype=map[itype]; + + for ( ; nbor<nbor_end; nbor+=n_stride) { + + int j=dev_packed[nbor]; + j &= NEIGHMASK; + + numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j]; + int jtype=jx.w; + jtype=map[jtype]; + + int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype]; + + // Compute r12 + numtyp delx = ix.x-jx.x; + numtyp dely = ix.y-jx.y; + numtyp delz = ix.z-jx.z; + numtyp rsq = delx*delx+dely*dely+delz*delz; + + if (rsq<param4[ijparam].z) { // cutsq = param4[ijparam].z + numtyp4 param1_ijparam; fetch4(param1_ijparam,ijparam,param1_tex); + numtyp param1_eta=param1_ijparam.x; + numtyp param1_lam1inv=param1_ijparam.y; + numtyp param1_lam4inv=param1_ijparam.z; + numtyp param1_zizj=param1_ijparam.w; + + numtyp4 param2_ijparam; fetch4(param2_ijparam,ijparam,param2_tex); + numtyp param2_mbigd=param2_ijparam.x; + numtyp param2_dvrc =param2_ijparam.y; + numtyp param2_big6w=param2_ijparam.z; + numtyp param2_heta =param2_ijparam.w; + + numtyp4 param3_ijparam; fetch4(param3_ijparam,ijparam,param3_tex); + numtyp param3_bigh=param3_ijparam.x; + numtyp param3_bigw=param3_ijparam.y; + numtyp param3_dvrc=param3_ijparam.z; + numtyp param3_c0 =param3_ijparam.w; + + numtyp r=sqrt(rsq); + numtyp rinvsq=1.0/rsq; + numtyp r4inv = rinvsq*rinvsq; + numtyp r6inv = rinvsq*r4inv; + + numtyp reta = pow(r,-param1_eta); + numtyp lam1r = r*param1_lam1inv; + numtyp lam4r = r*param1_lam4inv; + numtyp vc2 = param1_zizj * exp(-lam1r)/r; + numtyp vc3 = param2_mbigd * r4inv*exp(-lam4r); + + numtyp force = (param2_dvrc*r + - (4.0*vc3 + lam4r*vc3+param2_big6w*r6inv + - param2_heta*reta - vc2 - lam1r*vc2) + ) * rinvsq; + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + if (eflag>0) + energy += (param3_bigh*reta+vc2-vc3-param3_bigw*r6inv-r*param3_dvrc+param3_c0); + + if (vflag>0) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + } // for nbor + + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); + } // if ii + +} + +#define threebody(delr1x, delr1y, delr1z, eflag, energy) \ +{ \ + numtyp r1 = ucl_sqrt(rsq1); \ + numtyp rinvsq1 = ucl_recip(rsq1); \ + numtyp rainv1 = ucl_recip(r1 - param_r0_ij); \ + numtyp gsrainv1 = param_gamma_ij * rainv1; \ + numtyp gsrainvsq1 = gsrainv1*rainv1/r1; \ + numtyp expgsrainv1 = ucl_exp(gsrainv1); \ + \ + numtyp r2 = ucl_sqrt(rsq2); \ + numtyp rinvsq2 = ucl_recip(rsq2); \ + numtyp rainv2 = ucl_recip(r2 - param_r0_ik); \ + numtyp gsrainv2 = param_gamma_ik * rainv2; \ + numtyp gsrainvsq2 = gsrainv2*rainv2/r2; \ + numtyp expgsrainv2 = ucl_exp(gsrainv2); \ + \ + numtyp rinv12 = ucl_recip(r1*r2); \ + numtyp cs = (delr1x*delr2x + delr1y*delr2y + delr1z*delr2z) * rinv12; \ + numtyp delcs = cs - param_costheta_ijk; \ + numtyp delcssq = delcs*delcs; \ + numtyp pcsinv = param_bigc_ijk*delcssq+1.0; \ + numtyp pcsinvsq = pcsinv*pcsinv; \ + numtyp pcs = delcssq/pcsinv; \ + \ + numtyp facexp = expgsrainv1*expgsrainv2; \ + \ + numtyp facrad = param_bigb_ijk * facexp*pcs; \ + numtyp frad1 = facrad*gsrainvsq1; \ + numtyp frad2 = facrad*gsrainvsq2; \ + numtyp facang = param_big2b_ijk * facexp*delcs/pcsinvsq; \ + numtyp facang12 = rinv12*facang; \ + numtyp csfacang = cs*facang; \ + numtyp csfac1 = rinvsq1*csfacang; \ + \ + fjx = delr1x*(frad1+csfac1)-delr2x*facang12; \ + fjy = delr1y*(frad1+csfac1)-delr2y*facang12; \ + fjz = delr1z*(frad1+csfac1)-delr2z*facang12; \ + \ + numtyp csfac2 = rinvsq2*csfacang; \ + \ + fkx = delr2x*(frad2+csfac2)-delr1x*facang12; \ + fky = delr2y*(frad2+csfac2)-delr1y*facang12; \ + fkz = delr2z*(frad2+csfac2)-delr1z*facang12; \ + \ + if (eflag>0) \ + energy+=facrad; \ + if (vflag>0) { \ + virial[0] += delr1x*fjx + delr2x*fkx; \ + virial[1] += delr1y*fjy + delr2y*fky; \ + virial[2] += delr1z*fjz + delr2z*fkz; \ + virial[3] += delr1x*fjy + delr2x*fky; \ + virial[4] += delr1x*fjz + delr2x*fkz; \ + virial[5] += delr1y*fjz + delr2y*fkz; \ + } \ +} + +#define threebody_half(delr1x, delr1y, delr1z) \ +{ \ + numtyp r1 = ucl_sqrt(rsq1); \ + numtyp rinvsq1 = ucl_recip(rsq1); \ + numtyp rainv1 = ucl_recip(r1 - param_r0_ij); \ + numtyp gsrainv1 = param_gamma_ij * rainv1; \ + numtyp gsrainvsq1 = gsrainv1*rainv1/r1; \ + numtyp expgsrainv1 = ucl_exp(gsrainv1); \ + \ + numtyp r2 = ucl_sqrt(rsq2); \ + numtyp rainv2 = ucl_recip(r2 - param_r0_ik); \ + numtyp gsrainv2 = param_gamma_ik * rainv2; \ + numtyp expgsrainv2 = ucl_exp(gsrainv2); \ + \ + numtyp rinv12 = ucl_recip(r1*r2); \ + numtyp cs = (delr1x*delr2x + delr1y*delr2y + delr1z*delr2z) * rinv12; \ + numtyp delcs = cs - param_costheta_ijk; \ + numtyp delcssq = delcs*delcs; \ + numtyp pcsinv = param_bigc_ijk*delcssq+1.0; \ + numtyp pcsinvsq = pcsinv*pcsinv; \ + numtyp pcs = delcssq/pcsinv; \ + \ + numtyp facexp = expgsrainv1*expgsrainv2; \ + \ + numtyp facrad = param_bigb_ijk * facexp*pcs; \ + numtyp frad1 = facrad*gsrainvsq1; \ + numtyp facang = param_big2b_ijk * facexp*delcs/pcsinvsq; \ + numtyp facang12 = rinv12*facang; \ + numtyp csfacang = cs*facang; \ + numtyp csfac1 = rinvsq1*csfacang; \ + \ + fjx = delr1x*(frad1+csfac1)-delr2x*facang12; \ + fjy = delr1y*(frad1+csfac1)-delr2y*facang12; \ + fjz = delr1z*(frad1+csfac1)-delr2z*facang12; \ +} + +__kernel void k_vashishta_three_center(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict param1, + const __global numtyp4 *restrict param2, + const __global numtyp4 *restrict param3, + const __global numtyp4 *restrict param4, + const __global numtyp4 *restrict param5, + const __global int *restrict map, + const __global int *restrict elem2param, + const int nelements, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp4 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, + const int inum, const int nbor_pitch, + const int t_per_atom, const int evatom) { + __local int tpa_sq, n_stride; + tpa_sq=fast_mul(t_per_atom,t_per_atom); + numtyp param_gamma_ij, param_r0sq_ij, param_r0_ij, param_gamma_ik, param_r0sq_ik, param_r0_ik; + numtyp param_costheta_ijk, param_bigc_ijk, param_bigb_ijk, param_big2b_ijk; + + int tid, ii, offset; + atom_info(tpa_sq,ii,tid,offset); + + acctyp energy=(acctyp)0; + acctyp4 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp virial[6]; + for (int i=0; i<6; i++) + virial[i]=(acctyp)0; + + __syncthreads(); + + if (ii<inum) { + int i, numj, nbor_j, nbor_end; + + int offset_j=offset/t_per_atom; + nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, + n_stride,nbor_end,nbor_j); + int offset_k=tid & (t_per_atom-1); + + numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i]; + int itype=ix.w; + itype=map[itype]; + + for ( ; nbor_j<nbor_end; nbor_j+=n_stride) { + + int j=dev_packed[nbor_j]; + j &= NEIGHMASK; + + numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j]; + int jtype=jx.w; + jtype=map[jtype]; + + // Compute r12 + numtyp delr1x = jx.x-ix.x; + numtyp delr1y = jx.y-ix.y; + numtyp delr1z = jx.z-ix.z; + numtyp rsq1 = delr1x*delr1x+delr1y*delr1y+delr1z*delr1z; + + int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype]; + + numtyp4 param4_ijparam; fetch4(param4_ijparam,ijparam,param4_tex); + param_r0sq_ij=param4_ijparam.x; + if (rsq1 > param_r0sq_ij) continue; + param_gamma_ij=param4_ijparam.y; + param_r0_ij=param4_ijparam.w; + + int nbor_k=nbor_j-offset_j+offset_k; + if (nbor_k<=nbor_j) + nbor_k+=n_stride; + + for ( ; nbor_k<nbor_end; nbor_k+=n_stride) { + int k=dev_packed[nbor_k]; + k &= NEIGHMASK; + + numtyp4 kx; fetch4(kx,k,pos_tex); + int ktype=kx.w; + ktype=map[ktype]; + int ikparam=elem2param[itype*nelements*nelements+ktype*nelements+ktype]; + numtyp4 param4_ikparam; fetch4(param4_ikparam,ikparam,param4_tex); + + numtyp delr2x = kx.x-ix.x; + numtyp delr2y = kx.y-ix.y; + numtyp delr2z = kx.z-ix.z; + numtyp rsq2 = delr2x*delr2x + delr2y*delr2y + delr2z*delr2z; + + param_r0sq_ik=param4_ikparam.x; + if (rsq2 < param_r0sq_ik) { + param_gamma_ik=param4_ikparam.y; + param_r0_ik=param4_ikparam.w; + + int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype]; + numtyp4 param5_ijkparam; fetch4(param5_ijkparam,ijkparam,param5_tex); + param_bigc_ijk=param5_ijkparam.x; + param_bigb_ijk=param5_ijkparam.z; + param_big2b_ijk=param5_ijkparam.w; + param_costheta_ijk=param5_ijkparam.y; + + numtyp fjx, fjy, fjz, fkx, fky, fkz; + threebody(delr1x,delr1y,delr1z,eflag,energy); + + f.x -= fjx + fkx; + f.y -= fjy + fky; + f.z -= fjz + fkz; + } + } + } // for nbor + + numtyp pre; + if (evatom==1) + pre=THIRD; + else + pre=(numtyp)2.0; + energy*=pre; + for (int i=0; i<6; i++) + virial[i]*=pre; + + store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,offset, + eflag,vflag,ans,engv); + + } // if ii +} + +__kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict param1, + const __global numtyp4 *restrict param2, + const __global numtyp4 *restrict param3, + const __global numtyp4 *restrict param4, + const __global numtyp4 *restrict param5, + const __global int *restrict map, + const __global int *restrict elem2param, + const int nelements, + const __global int * dev_nbor, + const __global int * dev_packed, + const __global int * dev_acc, + __global acctyp4 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, + const int inum, const int nbor_pitch, + const int t_per_atom, const int gpu_nbor) { + __local int tpa_sq, n_stride; + tpa_sq=fast_mul(t_per_atom,t_per_atom); + numtyp param_gamma_ij, param_r0sq_ij, param_r0_ij, param_gamma_ik, param_r0sq_ik, param_r0_ik; + numtyp param_costheta_ijk, param_bigc_ijk, param_bigb_ijk, param_big2b_ijk; + + int tid, ii, offset; + atom_info(tpa_sq,ii,tid,offset); + + acctyp energy=(acctyp)0; + acctyp4 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp virial[6]; + for (int i=0; i<6; i++) + virial[i]=(acctyp)0; + + __syncthreads(); + + if (ii<inum) { + int i, numj, nbor_j, nbor_end, k_end; + + int offset_j=offset/t_per_atom; + nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, + n_stride,nbor_end,nbor_j); + int offset_k=tid & (t_per_atom-1); + + numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i]; + int itype=ix.w; + itype=map[itype]; + + for ( ; nbor_j<nbor_end; nbor_j+=n_stride) { + int j=dev_packed[nbor_j]; + j &= NEIGHMASK; + + numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j]; + int jtype=jx.w; + jtype=map[jtype]; + + // Compute r12 + numtyp delr1x = ix.x-jx.x; + numtyp delr1y = ix.y-jx.y; + numtyp delr1z = ix.z-jx.z; + numtyp rsq1 = delr1x*delr1x+delr1y*delr1y+delr1z*delr1z; + + int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype]; + numtyp4 param4_ijparam; fetch4(param4_ijparam,ijparam,param4_tex); + param_r0sq_ij = param4_ijparam.x; + if (rsq1 > param_r0sq_ij) continue; + + param_gamma_ij=param4_ijparam.y; + param_r0_ij = param4_ijparam.w; + + int nbor_k,numk; + if (dev_nbor==dev_packed) { + if (gpu_nbor) nbor_k=j+nbor_pitch; + else nbor_k=dev_acc[j]+nbor_pitch; + numk=dev_nbor[nbor_k]; + nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); + k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); + nbor_k+=offset_k; + } else { + nbor_k=dev_acc[j]+nbor_pitch; + numk=dev_nbor[nbor_k]; + nbor_k+=nbor_pitch; + nbor_k=dev_nbor[nbor_k]; + k_end=nbor_k+numk; + nbor_k+=offset_k; + } + + for ( ; nbor_k<k_end; nbor_k+=n_stride) { + int k=dev_packed[nbor_k]; + k &= NEIGHMASK; + + if (k == i) continue; + + numtyp4 kx; fetch4(kx,k,pos_tex); + int ktype=kx.w; + ktype=map[ktype]; + int ikparam=elem2param[jtype*nelements*nelements+ktype*nelements+ktype]; //jk + + numtyp delr2x = kx.x - jx.x; + numtyp delr2y = kx.y - jx.y; + numtyp delr2z = kx.z - jx.z; + numtyp rsq2 = delr2x*delr2x + delr2y*delr2y + delr2z*delr2z; + numtyp4 param4_ikparam; fetch4(param4_ikparam,ikparam,param4_tex); + param_r0sq_ik=param4_ikparam.x; + + if (rsq2 < param_r0sq_ik) { + param_gamma_ik=param4_ikparam.y; + param_r0_ik=param4_ikparam.w; + + int ijkparam=elem2param[jtype*nelements*nelements+itype*nelements+ktype]; //jik + numtyp4 param5_ijkparam; fetch4(param5_ijkparam,ijkparam,param5_tex); + param_bigc_ijk=param5_ijkparam.x; + param_costheta_ijk=param5_ijkparam.y; + param_bigb_ijk=param5_ijkparam.z; + param_big2b_ijk=param5_ijkparam.w; + + numtyp fjx, fjy, fjz; + //if (evatom==0) { + threebody_half(delr1x,delr1y,delr1z); + //} else { + // numtyp fkx, fky, fkz; + // threebody(delr1x,delr1y,delr1z,eflag,energy); + //} + + f.x += fjx; + f.y += fjy; + f.z += fjz; + } + } + + } // for nbor + #ifdef THREE_CONCURRENT + store_answers(f,energy,virial,ii,inum,tid,tpa_sq,offset, + eflag,vflag,ans,engv); + #else + store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,offset, + eflag,vflag,ans,engv); + #endif + } // if ii +} + +__kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict param1, + const __global numtyp4 *restrict param2, + const __global numtyp4 *restrict param3, + const __global numtyp4 *restrict param4, + const __global numtyp4 *restrict param5, + const __global int *restrict map, + const __global int *restrict elem2param, + const int nelements, + const __global int * dev_nbor, + const __global int * dev_packed, + const __global int * dev_acc, + __global acctyp4 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, + const int inum, const int nbor_pitch, + const int t_per_atom, const int gpu_nbor) { + __local int tpa_sq, n_stride; + tpa_sq=fast_mul(t_per_atom,t_per_atom); + numtyp param_gamma_ij, param_r0sq_ij, param_r0_ij, param_gamma_ik, param_r0sq_ik, param_r0_ik; + numtyp param_costheta_ijk, param_bigc_ijk, param_bigb_ijk, param_big2b_ijk; + + int tid, ii, offset; + atom_info(tpa_sq,ii,tid,offset); + + acctyp energy=(acctyp)0; + acctyp4 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp virial[6]; + for (int i=0; i<6; i++) + virial[i]=(acctyp)0; + + __syncthreads(); + + if (ii<inum) { + int i, numj, nbor_j, nbor_end, k_end; + + int offset_j=offset/t_per_atom; + nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj, + n_stride,nbor_end,nbor_j); + int offset_k=tid & (t_per_atom-1); + + numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i]; + int itype=ix.w; + itype=map[itype]; + + for ( ; nbor_j<nbor_end; nbor_j+=n_stride) { + int j=dev_packed[nbor_j]; + j &= NEIGHMASK; + + numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j]; + int jtype=jx.w; + jtype=map[jtype]; + + // Compute r12 + numtyp delr1x = ix.x-jx.x; + numtyp delr1y = ix.y-jx.y; + numtyp delr1z = ix.z-jx.z; + numtyp rsq1 = delr1x*delr1x+delr1y*delr1y+delr1z*delr1z; + + int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype]; + numtyp4 param4_ijparam; fetch4(param4_ijparam,ijparam,param4_tex); + param_r0sq_ij=param4_ijparam.x; + if (rsq1 > param_r0sq_ij) continue; + + param_gamma_ij=param4_ijparam.y; + param_r0_ij=param4_ijparam.w; + + int nbor_k,numk; + if (dev_nbor==dev_packed) { + if (gpu_nbor) nbor_k=j+nbor_pitch; + else nbor_k=dev_acc[j]+nbor_pitch; + numk=dev_nbor[nbor_k]; + nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1); + k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1)); + nbor_k+=offset_k; + } else { + nbor_k=dev_acc[j]+nbor_pitch; + numk=dev_nbor[nbor_k]; + nbor_k+=nbor_pitch; + nbor_k=dev_nbor[nbor_k]; + k_end=nbor_k+numk; + nbor_k+=offset_k; + } + + for ( ; nbor_k<k_end; nbor_k+=n_stride) { + int k=dev_packed[nbor_k]; + k &= NEIGHMASK; + + if (k == i) continue; + + numtyp4 kx; fetch4(kx,k,pos_tex); + int ktype=kx.w; + ktype=map[ktype]; + int ikparam=elem2param[jtype*nelements*nelements+ktype*nelements+ktype]; // jk + numtyp4 param4_ikparam; fetch4(param4_ikparam,ikparam,param4_tex); + + numtyp delr2x = kx.x - jx.x; + numtyp delr2y = kx.y - jx.y; + numtyp delr2z = kx.z - jx.z; + numtyp rsq2 = delr2x*delr2x + delr2y*delr2y + delr2z*delr2z; + param_r0sq_ik=param4_ikparam.x; + + if (rsq2 < param_r0sq_ik) { + param_gamma_ik=param4_ikparam.y; + param_r0_ik=param4_ikparam.w; + + int ijkparam=elem2param[jtype*nelements*nelements+itype*nelements+ktype]; // jik + numtyp4 param5_ijkparam; fetch4(param5_ijkparam,ijkparam,param5_tex); + param_bigc_ijk=param5_ijkparam.x; + param_costheta_ijk=param5_ijkparam.y; + param_bigb_ijk=param5_ijkparam.z; + param_big2b_ijk=param5_ijkparam.w; + + numtyp fjx, fjy, fjz, fkx, fky, fkz; + threebody(delr1x,delr1y,delr1z,eflag,energy); + + f.x += fjx; + f.y += fjy; + f.z += fjz; + } + } + + } // for nbor + energy*=THIRD; + for (int i=0; i<6; i++) + virial[i]*=THIRD; + #ifdef THREE_CONCURRENT + store_answers(f,energy,virial,ii,inum,tid,tpa_sq,offset, + eflag,vflag,ans,engv); + #else + store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,offset, + eflag,vflag,ans,engv); + #endif + } // if ii +} + diff --git a/lib/gpu/lal_vashishta.h b/lib/gpu/lal_vashishta.h new file mode 100644 index 0000000000000000000000000000000000000000..6eea8362cc2b2cd299d459a6364b9b471c83b9b2 --- /dev/null +++ b/lib/gpu/lal_vashishta.h @@ -0,0 +1,97 @@ +/*************************************************************************** + vashishta.h + ------------------- + Anders Hafreager (UiO9) + + Class for acceleration of the vashishta pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : Mon June 12, 2017 + email : andershaf@gmail.com + ***************************************************************************/ + +#ifndef LAL_VASHISHTA_H +#define LAL_VASHISHTA_H + +#include "lal_base_three.h" + +namespace LAMMPS_AL { + +template <class numtyp, class acctyp> +class Vashishta : public BaseThree<numtyp, acctyp> { + public: + Vashishta(); + ~Vashishta(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successfull + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, const int nlocal, const int nall, const int max_nbors, + const double cell_size, const double gpu_split, FILE *screen, + int* host_map, const int nelements, int*** host_elem2param, const int nparams, + const double* cutsq, const double* r0, + const double* gamma, const double* eta, + const double* lam1inv, const double* lam4inv, + const double* zizj, const double* mbigd, + const double* dvrc, const double* big6w, + const double* heta, const double* bigh, + const double* bigw, const double* c0, + const double* costheta, const double* bigb, + const double* big2b, const double* bigc); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + // --------------------------- TYPE DATA -------------------------- + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + /// param1.x = eta, param1.y = lam1inv, param1.z = lam4inv, param1.w = zizj + UCL_D_Vec<numtyp4> param1; + /// param2.x = mbigd, param2.y = dvrc, param2.z = big6w, param2.w = heta + UCL_D_Vec<numtyp4> param2; + /// param3.x = bigh, param3.y = bigw, param3.z = dvrc, param3.w = c0 + UCL_D_Vec<numtyp4> param3; + /// param4.x = r0sq, param4.y = gamma, param4.z = cutsq, param4.w = r0 + UCL_D_Vec<numtyp4> param4; + /// param5.x = bigc, param5.y = costheta, param5.z = bigb, param5.w = big2b + UCL_D_Vec<numtyp4> param5; + + UCL_D_Vec<int> elem2param; + UCL_D_Vec<int> map; + int _nparams,_nelements; + + UCL_Texture param1_tex, param2_tex, param3_tex, param4_tex, param5_tex; + + private: + bool _allocated; + void loop(const bool _eflag, const bool _vflag, const int evatom); + +}; + +} + +#endif + diff --git a/lib/gpu/lal_vashishta_ext.cpp b/lib/gpu/lal_vashishta_ext.cpp new file mode 100644 index 0000000000000000000000000000000000000000..22f530a7ed01dd580ad1122b06ac5785676f51a6 --- /dev/null +++ b/lib/gpu/lal_vashishta_ext.cpp @@ -0,0 +1,134 @@ +/*************************************************************************** + vashishta_ext.cpp + ------------------- + Anders Hafreager (UiO) + + Class for acceleration of the vashishta pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : Mon June 12, 2017 + email : andershaf@gmail.com + ***************************************************************************/ + +#include <iostream> +#include <cassert> +#include <math.h> + +#include "lal_vashishta.h" +using namespace LAMMPS_AL; + +static Vashishta<PRECISION,ACC_PRECISION> VashishtaMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors, + const double cell_size, int &gpu_mode, FILE *screen, + int* host_map, const int nelements, int*** host_elem2param, const int nparams, + const double* cutsq, const double* r0, + const double* gamma, const double* eta, + const double* lam1inv, const double* lam4inv, + const double* zizj, const double* mbigd, + const double* dvrc, const double* big6w, + const double* heta, const double* bigh, + const double* bigw, const double* c0, + const double* costheta, const double* bigb, + const double* big2b, const double* bigc) { + VashishtaMF.clear(); + gpu_mode=VashishtaMF.device->gpu_mode(); + double gpu_split=VashishtaMF.device->particle_split(); + int first_gpu=VashishtaMF.device->first_device(); + int last_gpu=VashishtaMF.device->last_device(); + int world_me=VashishtaMF.device->world_me(); + int gpu_rank=VashishtaMF.device->gpu_rank(); + int procs_per_gpu=VashishtaMF.device->procs_per_gpu(); + + // disable host/device split for now + if (gpu_split != 1.0) + return -8; + + VashishtaMF.device->init_message(screen,"vashishta/gpu",first_gpu,last_gpu); + + bool message=false; + if (VashishtaMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=VashishtaMF.init(ntypes, inum, nall, 500, cell_size, gpu_split, screen, + host_map, nelements, host_elem2param, nparams, + cutsq, r0, gamma, eta, lam1inv, + lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw, + c0, costheta, bigb, big2b, bigc); + + VashishtaMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; i<procs_per_gpu; i++) { + if (message) { + if (last_gpu-first_gpu==0) + fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i); + else + fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu, + last_gpu,i); + fflush(screen); + } + if (gpu_rank==i && world_me!=0) + init_ok=VashishtaMF.init(ntypes, inum, nall, 500, cell_size, gpu_split, screen, + host_map, nelements, host_elem2param, nparams, + cutsq, r0, gamma, eta, lam1inv, + lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw, + c0, costheta, bigb, big2b, bigc); + + VashishtaMF.device->gpu_barrier(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + VashishtaMF.estimate_gpu_overhead(); + + return init_ok; +} + +void vashishta_gpu_clear() { + VashishtaMF.clear(); +} + +int ** vashishta_gpu_compute_n(const int ago, const int inum_full, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success) { + return VashishtaMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success); +} + +void vashishta_gpu_compute(const int ago, const int nlocal, const int nall, + const int nlist, double **host_x, int *host_type, + int *ilist, int *numj, int **firstneigh, const bool eflag, + const bool vflag, const bool eatom, const bool vatom, + int &host_start, const double cpu_time, bool &success) { + VashishtaMF.compute(ago,nlocal,nall,nlist,host_x,host_type,ilist,numj, + firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success); +} + +double vashishta_gpu_bytes() { + return VashishtaMF.host_memory_usage(); +} + + diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index c6fe991b9761d5ef20af649f54224b03f2dd7fe8..acb54ff22fb5383ab5a243c805fe90e56c9129f5 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,53 @@ # Change Log + +## [2.03.05](https://github.com/kokkos/kokkos/tree/2.03.05) (2017-05-27) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.00...2.03.05) + +**Implemented enhancements:** + +- Harmonize Custom Reductions over nesting levels [\#802](https://github.com/kokkos/kokkos/issues/802) +- Prevent users directly including KokkosCore\_config.h [\#815](https://github.com/kokkos/kokkos/issues/815) +- DualView aborts on concurrent host/device modify \(in debug mode\) [\#814](https://github.com/kokkos/kokkos/issues/814) +- Abort when running on a NVIDIA CC5.0 or higher architecture with code compiled for CC \< 5.0 [\#813](https://github.com/kokkos/kokkos/issues/813) +- Add "name" function to ExecSpaces [\#806](https://github.com/kokkos/kokkos/issues/806) +- Allow null Future in task spawn dependences [\#795](https://github.com/kokkos/kokkos/issues/795) +- Add Unit Tests for Kokkos::complex [\#785](https://github.com/kokkos/kokkos/issues/785) +- Add pow function for Kokkos::complex [\#784](https://github.com/kokkos/kokkos/issues/784) +- Square root of a complex [\#729](https://github.com/kokkos/kokkos/issues/729) +- Command line processing of --threads argument prevents users from having any commandline arguments starting with --threads [\#760](https://github.com/kokkos/kokkos/issues/760) +- Protected deprecated API with appropriate macro [\#756](https://github.com/kokkos/kokkos/issues/756) +- Allow task scheduler memory pool to be used by tasks [\#747](https://github.com/kokkos/kokkos/issues/747) +- View bounds checking on host-side performance: constructing a std::string [\#723](https://github.com/kokkos/kokkos/issues/723) +- Add check for AppleClang as compiler distinct from check for Clang. [\#705](https://github.com/kokkos/kokkos/issues/705) +- Uninclude source files for specific configurations to prevent link warning. [\#701](https://github.com/kokkos/kokkos/issues/701) +- Add --small option to snapshot script [\#697](https://github.com/kokkos/kokkos/issues/697) +- CMake Standalone Support [\#674](https://github.com/kokkos/kokkos/issues/674) +- CMake build unit test and install [\#808](https://github.com/kokkos/kokkos/issues/808) +- CMake: Fix having kokkos as a subdirectory in a pure cmake project [\#629](https://github.com/kokkos/kokkos/issues/629) +- Tribits macro assumes build directory is in top level source directory [\#654](https://github.com/kokkos/kokkos/issues/654) +- Use bin/nvcc\_wrapper, not config/nvcc\_wrapper [\#562](https://github.com/kokkos/kokkos/issues/562) +- Allow MemoryPool::allocate\(\) to be called from multiple threads per warp. [\#487](https://github.com/kokkos/kokkos/issues/487) +- Allow MemoryPool::allocate\\(\\) to be called from multiple threads per warp. [\#487](https://github.com/kokkos/kokkos/issues/487) +- Move OpenMP 4.5 OpenMPTarget backend into Develop [\#456](https://github.com/kokkos/kokkos/issues/456) +- Testing on ARM testbed [\#288](https://github.com/kokkos/kokkos/issues/288) + +**Fixed bugs:** + +- Fix label in OpenMP parallel\_reduce verify\_initialized [\#834](https://github.com/kokkos/kokkos/issues/834) +- TeamScratch Level 1 on Cuda hangs [\#820](https://github.com/kokkos/kokkos/issues/820) +- \[bug\] memory pool. [\#786](https://github.com/kokkos/kokkos/issues/786) +- Some Reduction Tests fail on Intel 18 with aggressive vectorization on [\#774](https://github.com/kokkos/kokkos/issues/774) +- Error copying dynamic view on copy of memory pool [\#773](https://github.com/kokkos/kokkos/issues/773) +- CUDA stack overflow with TaskDAG test [\#758](https://github.com/kokkos/kokkos/issues/758) +- ThreadVectorRange Customized Reduction Bug [\#739](https://github.com/kokkos/kokkos/issues/739) +- set\_scratch\_size overflows [\#726](https://github.com/kokkos/kokkos/issues/726) +- Get wrong results for compiler checks in Makefile on OS X. [\#706](https://github.com/kokkos/kokkos/issues/706) +- Fix check if multiple host architectures enabled. [\#702](https://github.com/kokkos/kokkos/issues/702) +- Threads Backend Does not Pass on Cray Compilers [\#609](https://github.com/kokkos/kokkos/issues/609) +- Rare bug in memory pool where allocation can finish on superblock in empty state [\#452](https://github.com/kokkos/kokkos/issues/452) +- LDFLAGS in core/unit\_test/Makefile: potential "undefined reference" to pthread lib [\#148](https://github.com/kokkos/kokkos/issues/148) + ## [2.03.00](https://github.com/kokkos/kokkos/tree/2.03.00) (2017-04-25) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.15...2.03.00) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 1c820660ae375006e83bd50c0d4bbd8472ed0258..b2771ed5273e0f94438580cf4b0f06e4b479a854 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -5,11 +5,12 @@ ELSE() ENDIF() IF(NOT KOKKOS_HAS_TRILINOS) - CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR) - INCLUDE(cmake/tribits.cmake) - SET(CMAKE_CXX_STANDARD 11) -ENDIF() + cmake_minimum_required(VERSION 3.1 FATAL_ERROR) + project(Kokkos CXX) + INCLUDE(cmake/kokkos.cmake) +ELSE() +#------------------------------------------------------------------------------ # # A) Forward delcare the package so that certain options are also defined for # subpackages @@ -17,14 +18,13 @@ ENDIF() TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) + #------------------------------------------------------------------------------ # # B) Define the common options for Kokkos first so they can be used by # subpackages as well. # - - # mfh 01 Aug 2016: See Issue #61: # # https://github.com/kokkos/kokkos/issues/61 @@ -83,10 +83,10 @@ TRIBITS_ADD_OPTION_AND_DEFINE( ) ASSERT_DEFINED(TPL_ENABLE_Pthread) -IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread) +IF(Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread) MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.") -ENDIF () -IF (NOT TPL_ENABLE_Pthread) +ENDIF() +IF(NOT TPL_ENABLE_Pthread) ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0) ENDIF() @@ -98,12 +98,13 @@ TRIBITS_ADD_OPTION_AND_DEFINE( ) TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Qthreads + Kokkos_ENABLE_QTHREAD KOKKOS_HAVE_QTHREADS "Enable Qthreads support in Kokkos." - "${TPL_ENABLE_QTHREADS}" + "${TPL_ENABLE_QTHREAD}" ) +# TODO: No longer an option in Kokkos. Needs to be removed. TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_CXX11 KOKKOS_HAVE_CXX11 @@ -118,6 +119,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE( "${TPL_ENABLE_HWLOC}" ) +# TODO: This is currently not used in Kokkos. Should it be removed? TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_MPI KOKKOS_HAVE_MPI @@ -154,13 +156,27 @@ TRIBITS_ADD_OPTION_AND_DEFINE( "${Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT}" ) +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Debug_DualView_Modify_Check + KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + "Enable abort when Kokkos::DualView modified on host and device without sync." + "${Kokkos_ENABLE_DEBUG}" + ) + TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_Profiling - KOKKOS_ENABLE_PROFILING_INTERNAL + KOKKOS_ENABLE_PROFILING "Enable KokkosP profiling support for kernel data collections." "${TPL_ENABLE_DLlib}" ) +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Profiling_Load_Print + KOKKOS_ENABLE_PROFILING_LOAD_PRINT + "Print to standard output which profiling library was loaded." + OFF + ) + # placeholder for future device... TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_Winthread @@ -169,6 +185,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE( "${TPL_ENABLE_Winthread}" ) +# TODO: No longer an option in Kokkos. Needs to be removed. # use new/old View TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_USING_DEPRECATED_VIEW @@ -177,12 +194,12 @@ TRIBITS_ADD_OPTION_AND_DEFINE( OFF ) + #------------------------------------------------------------------------------ # # C) Install Kokkos' executable scripts # - # nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler. # Kokkos needs nvcc_wrapper in order to build. Other libraries and # executables also need nvcc_wrapper. Thus, we need to install it. @@ -199,6 +216,8 @@ INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin) TRIBITS_PROCESS_SUBPACKAGES() + +#------------------------------------------------------------------------------ # # E) If Kokkos itself is enabled, process the Kokkos package # @@ -213,3 +232,4 @@ TRIBITS_EXCLUDE_FILES( ) TRIBITS_PACKAGE_POSTPROCESS() +ENDIF() diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 5b094dba8cb786c94c9119a5865fcc0dadf9a76f..24cd772e008c6dff867317203c0f7e0f57b7256d 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -35,23 +35,26 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "e # Check for advanced settings. KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l)) KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l)) +KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_dualview_modify_check" | wc -l)) +KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "enable_profile_load_print" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l)) # Check for Kokkos Host Execution Spaces one of which must be on. -KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l)) +KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l)) +KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l)) KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l)) KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l)) KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l)) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) -ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) -ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) - KOKKOS_INTERNAL_USE_SERIAL := 1 -endif -endif + ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) + ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) + KOKKOS_INTERNAL_USE_SERIAL := 1 + endif + endif endif # Check for other Execution Spaces. @@ -64,24 +67,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif # Check OS. -KOKKOS_OS := $(shell uname -s) -KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname -s | grep CYGWIN | wc -l) -KOKKOS_INTERNAL_OS_LINUX := $(shell uname -s | grep Linux | wc -l) -KOKKOS_INTERNAL_OS_DARWIN := $(shell uname -s | grep Darwin | wc -l) +KOKKOS_OS := $(strip $(shell uname -s)) +KOKKOS_INTERNAL_OS_CYGWIN := $(strip $(shell uname -s | grep CYGWIN | wc -l)) +KOKKOS_INTERNAL_OS_LINUX := $(strip $(shell uname -s | grep Linux | wc -l)) +KOKKOS_INTERNAL_OS_DARWIN := $(strip $(shell uname -s | grep Darwin | wc -l)) # Check compiler. -KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l) -KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l) -KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l) -KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l) -KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l) +KOKKOS_INTERNAL_COMPILER_INTEL := $(strip $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)) +KOKKOS_INTERNAL_COMPILER_PGI := $(strip $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)) +KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)) +KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) +KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(CXX) --version 2>&1 | grep nvcc | wc -l)) +KOKKOS_INTERNAL_COMPILER_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep clang | wc -l)) +KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep "apple-darwin" | wc -l)) ifneq ($(OMPI_CXX),) - KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l) + KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l)) endif ifneq ($(MPICH_CXX),) - KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l) + KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l)) endif -KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) KOKKOS_INTERNAL_COMPILER_CLANG = 1 @@ -90,6 +94,11 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2) KOKKOS_INTERNAL_COMPILER_XL = 1 endif +# Apple Clang passes both clang and apple clang tests, so turn off clang. +ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) + KOKKOS_INTERNAL_COMPILER_CLANG = 0 +endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.') @@ -97,29 +106,43 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0) $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher) endif + KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1 endif endif +# Set OpenMP flags. ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -mp else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp else - ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp + ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - # OpenMP is turned on by default in Cray compiler environment. - KOKKOS_INTERNAL_OPENMP_FLAG := + ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp else - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + # OpenMP is turned on by default in Cray compiler environment. + KOKKOS_INTERNAL_OPENMP_FLAG := + else + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + endif endif endif endif endif +ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh +else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp + endif +endif +# Set C++11 flags. ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) KOKKOS_INTERNAL_CXX11_FLAG := --c++11 else @@ -146,7 +169,7 @@ KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) # NVIDIA based. -NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper +NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l)) @@ -180,10 +203,20 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) + ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) + CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH) + endif + endif +endif # ARM based. KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc)) # IBM based. KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l)) @@ -206,8 +239,11 @@ KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc )) +# Decide whether we can support transactional memory +KOKKOS_INTERNAL_USE_TM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) + # Incompatible flags? -KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc )) +KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc )) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) @@ -240,12 +276,22 @@ tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.t tmp := $(shell date >> KokkosCore_config.tmp) tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp) +tmp := $(shell echo '\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)' >> KokkosCore_config.tmp) +tmp := $(shell echo '\#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."' >> KokkosCore_config.tmp) +tmp := $(shell echo '\#else' >> KokkosCore_config.tmp) +tmp := $(shell echo '\#define KOKKOS_CORE_CONFIG_H' >> KokkosCore_config.tmp) +tmp := $(shell echo '\#endif' >> KokkosCore_config.tmp) + tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) endif +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp) +endif + ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) endif @@ -262,6 +308,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) endif +ifeq ($(KOKKOS_INTERNAL_USE_TM), 1) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ENABLE_TM" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) +endif + ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) @@ -293,13 +345,21 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) endif ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) -ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_CXXFLAGS += -lineinfo -endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_CXXFLAGS += -lineinfo + endif + KOKKOS_CXXFLAGS += -g KOKKOS_LDFLAGS += -g -ldl tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0) + tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK 1" >> KokkosCore_config.tmp ) + endif +endif + +ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) + tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) @@ -311,8 +371,6 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp ) KOKKOS_LIBS += -lrt endif @@ -323,8 +381,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp ) endif -ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1) - tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING 0" >> KokkosCore_config.tmp ) +ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0) + tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING" >> KokkosCore_config.tmp ) endif tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp) @@ -336,39 +394,44 @@ endif tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) + endif + endif -ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) -endif + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) + endif -ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) -endif + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += --relocatable-device-code=true + KOKKOS_LDFLAGS += --relocatable-device-code=true + endif -ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += --relocatable-device-code=true - KOKKOS_LDFLAGS += --relocatable-device-code=true -endif + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -expt-extended-lambda + else + $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.) + endif + endif -ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -expt-extended-lambda - else - $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.) endif endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_CUDA_CLANG_WORKAROUND" >> KokkosCore_config.tmp ) endif endif -endif - # Add Architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) @@ -469,7 +532,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) endif endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) @@ -491,6 +554,28 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX2 + KOKKOS_LDFLAGS += -xCORE-AVX2 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += -tp=haswell + KOKKOS_LDFLAGS += -tp=haswell + else + # Assume that this is a really a GNU compiler. + KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm + KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm + endif + endif + endif +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) @@ -501,12 +586,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) else # Asssume that this is really a GNU compiler. - KOKKOS_CXXFLAGS += -march=knl - KOKKOS_LDFLAGS += -march=knl + KOKKOS_CXXFLAGS += -march=knl -mtune=knl + KOKKOS_LDFLAGS += -march=knl -mtune=knl endif endif endif @@ -526,8 +611,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) else # Nothing here yet. - KOKKOS_CXXFLAGS += -march=skylake-avx512 - KOKKOS_LDFLAGS += -march=skylake-avx512 + KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm + KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm endif endif endif @@ -541,70 +626,67 @@ endif # Figure out the architecture flag for Cuda. ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch + endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch + KOKKOS_CXXFLAGS += -x cuda + endif -ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch -endif -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=--cuda-gpu-arch - KOKKOS_CXXFLAGS += -x cuda -endif - -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 -endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 + endif + ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) + endif + endif endif KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h) @@ -630,9 +712,24 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) - KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include + KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 KOKKOS_LIBS += -lcudart -lcuda + + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH) + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp) + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) + else + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) + endif + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) @@ -666,10 +763,27 @@ endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=) - KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC + KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) endif +# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning. +ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) + KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC)) +endif + +# Don't include Kokkos_Profiling_Interface.cpp if not using profiling to avoid a link warning. +ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1) + KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp,$(KOKKOS_SRC)) +endif + +# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial +# device to avoid a link warning. +ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC)) + KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC)) +endif + # With Cygwin functions such as fdopen and fileno are not defined # when strict ansi is enabled. strict ansi gets enabled with --std=c++11 # though. So we hard undefine it here. Not sure if that has any bad side effects diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 54cacb741b4f35a0033d8de0e57ded9d4dab0a00..3cb52a04cd3093faf9a495c19de5e22652bd79f4 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -53,11 +53,20 @@ Kokkos_Qthreads_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/K endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) -Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp +Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp endif +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) +Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp +Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp +#Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp +# $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp +endif + Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp diff --git a/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp b/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..9c08a088b0e108f78728fcc00742baaab441d4e2 100644 --- a/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp +++ b/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp @@ -0,0 +1 @@ +void KOKKOS_ALGORITHMS_SRC_DUMMY_PREVENT_LINK_ERROR() {} diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index bd73582362eed46161ee0ac0cf36fec4d5178129..42c115b7a5cec83616bff6a6e513d4e2aafd8a3a 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -674,7 +674,7 @@ namespace Kokkos { const double V = 2.0*drand() - 1.0; S = U*U+V*V; } - return U*sqrt(-2.0*log(S)/S); + return U*std::sqrt(-2.0*log(S)/S); } KOKKOS_INLINE_FUNCTION @@ -917,7 +917,7 @@ namespace Kokkos { const double V = 2.0*drand() - 1.0; S = U*U+V*V; } - return U*sqrt(-2.0*log(S)/S); + return U*std::sqrt(-2.0*log(S)/S); } KOKKOS_INLINE_FUNCTION @@ -1171,7 +1171,7 @@ namespace Kokkos { const double V = 2.0*drand() - 1.0; S = U*U+V*V; } - return U*sqrt(-2.0*log(S)/S); + return U*std::sqrt(-2.0*log(S)/S); } KOKKOS_INLINE_FUNCTION diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile index 3027c6a94b9826ba9da11648539dc5c83bebaa77..b74192ef18d0c132e7b734bcb27796304744ed66 100644 --- a/lib/kokkos/algorithms/unit_tests/Makefile +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -8,7 +8,7 @@ default: build_all echo "End Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/config/nvcc_wrapper + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper else CXX = g++ endif @@ -21,8 +21,8 @@ include $(KOKKOS_PATH)/Makefile.kokkos KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests -TEST_TARGETS = -TARGETS = +TEST_TARGETS = +TARGETS = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o @@ -49,16 +49,16 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) endif KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda + $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Cuda KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads + $(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Threads KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP + $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_OpenMP KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial + $(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Serial test-cuda: KokkosAlgorithms_UnitTest_Cuda ./KokkosAlgorithms_UnitTest_Cuda @@ -76,7 +76,7 @@ build_all: $(TARGETS) test: $(TEST_TARGETS) -clean: kokkos-clean +clean: kokkos-clean rm -f *.o $(TARGETS) # Compilation rules @@ -84,6 +84,5 @@ clean: kokkos-clean %.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc - diff --git a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp index ba3938f497d9b95f3e29652351ddb0b1a6336992..710eeb8ada08065dc4f1af33c3f1ee616ceda241 100644 --- a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,12 +36,15 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include <stdint.h> +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_CUDA + +#include <cstdint> #include <iostream> #include <iomanip> @@ -49,8 +52,6 @@ #include <Kokkos_Core.hpp> -#ifdef KOKKOS_ENABLE_CUDA - #include <TestRandom.hpp> #include <TestSort.hpp> @@ -105,6 +106,7 @@ CUDA_SORT_UNSIGNED(171) #undef CUDA_RANDOM_XORSHIFT1024 #undef CUDA_SORT_UNSIGNED } - +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTCUDA_PREVENT_LINK_ERROR() {} #endif /* #ifdef KOKKOS_ENABLE_CUDA */ diff --git a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp index f4d582d0bb849a10826846884f0136360f350bc0..1e7ee68549a2aa439d3c86935409bc3ba6528eba 100644 --- a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,13 +36,16 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include <gtest/gtest.h> +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_OPENMP + +#include <gtest/gtest.h> #include <Kokkos_Core.hpp> //---------------------------------------------------------------------------- @@ -52,7 +55,6 @@ namespace Test { -#ifdef KOKKOS_ENABLE_OPENMP class openmp : public ::testing::Test { protected: static void SetUpTestCase() @@ -97,6 +99,8 @@ OPENMP_SORT_UNSIGNED(171) #undef OPENMP_RANDOM_XORSHIFT64 #undef OPENMP_RANDOM_XORSHIFT1024 #undef OPENMP_SORT_UNSIGNED -#endif } // namespace test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} +#endif diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp index c906b9f2cdc69735a225698c2bb5dc0e152160cb..9cf02f74b4980ee2845e6c055392099f883d3b5b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -295,7 +295,7 @@ struct test_random_scalar { parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result); //printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2); - double tolerance = 1.6*sqrt(1.0/num_draws); + double tolerance = 1.6*std::sqrt(1.0/num_draws); double mean_expect = 0.5*Kokkos::rand<rnd_type,Scalar>::max(); double variance_expect = 1.0/3.0*mean_expect*mean_expect; double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0; @@ -321,7 +321,7 @@ struct test_random_scalar { typedef test_histogram1d_functor<typename RandomGenerator::device_type> functor_type; parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result); - double tolerance = 6*sqrt(1.0/HIST_DIM1D); + double tolerance = 6*std::sqrt(1.0/HIST_DIM1D); double mean_expect = 1.0*num_draws*3/HIST_DIM1D; double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D; @@ -354,7 +354,7 @@ struct test_random_scalar { typedef test_histogram3d_functor<typename RandomGenerator::device_type> functor_type; parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result); - double tolerance = 6*sqrt(1.0/HIST_DIM1D); + double tolerance = 6*std::sqrt(1.0/HIST_DIM1D); double mean_expect = 1.0*num_draws/HIST_DIM1D; double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D; diff --git a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp index 6ac80cf73add7fa4ac15f1b4f51318594b09c905..a1df93e07b51482408c861919968953674de4c2e 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_SERIAL + #include <gtest/gtest.h> #include <Kokkos_Core.hpp> @@ -55,7 +58,6 @@ namespace Test { -#ifdef KOKKOS_ENABLE_SERIAL class serial : public ::testing::Test { protected: static void SetUpTestCase() @@ -93,7 +95,9 @@ SERIAL_SORT_UNSIGNED(171) #undef SERIAL_RANDOM_XORSHIFT1024 #undef SERIAL_SORT_UNSIGNED -#endif // KOKKOS_ENABLE_SERIAL } // namespace Test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTSERIAL_PREVENT_LINK_ERROR() {} +#endif // KOKKOS_ENABLE_SERIAL diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index 61ffa6f43a39ecbb1640a71de5afb9be33cd10dd..04be98f1cc4a61600c29a6891965e6553a4f6ab5 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,12 +35,12 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER -#ifndef TESTSORT_HPP_ -#define TESTSORT_HPP_ +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP #include <gtest/gtest.h> #include<Kokkos_Core.hpp> @@ -212,7 +212,12 @@ void test_dynamic_view_sort(unsigned int n ) const size_t upper_bound = 2 * n ; typename KeyDynamicViewType::memory_pool - pool( memory_space() , 2 * n * sizeof(KeyType) ); + pool( memory_space() + , n * sizeof(KeyType) * 1.2 + , 500 /* min block size in bytes */ + , 30000 /* max block size in bytes */ + , 1000000 /* min superblock size in bytes */ + ); KeyDynamicViewType keys("Keys",pool,upper_bound); @@ -272,4 +277,4 @@ void test_sort(unsigned int N) } } -#endif /* TESTSORT_HPP_ */ +#endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */ diff --git a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp index 36d438b6433f4a11b8bc2bbf5f19ce822cd4bf9f..08749779ff56a692cebad20fcf94815c5ff7b732 100644 --- a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_THREADS + #include <gtest/gtest.h> #include <Kokkos_Core.hpp> @@ -55,7 +58,6 @@ namespace Test { -#ifdef KOKKOS_ENABLE_PTHREAD class threads : public ::testing::Test { protected: static void SetUpTestCase() @@ -107,7 +109,9 @@ THREADS_SORT_UNSIGNED(171) #undef THREADS_RANDOM_XORSHIFT1024 #undef THREADS_SORT_UNSIGNED -#endif } // namespace Test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTTHREADS_PREVENT_LINK_ERROR() {} +#endif diff --git a/lib/kokkos/benchmarks/bytes_and_flops/Makefile b/lib/kokkos/benchmarks/bytes_and_flops/Makefile index 6a1917a523170bb392c6e81855e60489085bf113..5ddf78f28eca91f1bbbb0869d8e8b691411ed20d 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/Makefile +++ b/lib/kokkos/benchmarks/bytes_and_flops/Makefile @@ -7,7 +7,7 @@ default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ${KOKKOS_PATH}/config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper EXE = bytes_and_flops.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -22,7 +22,7 @@ CXXFLAGS = -O3 -g DEPFLAGS = -M LINK = ${CXX} -LINKFLAGS = +LINKFLAGS = OBJ = $(SRC:.cpp=.o) LIB = @@ -34,7 +34,7 @@ build: $(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) -clean: kokkos-clean +clean: kokkos-clean rm -f *.o *.cuda *.host # Compilation rules diff --git a/lib/kokkos/benchmarks/gather/Makefile b/lib/kokkos/benchmarks/gather/Makefile index fd1feab6fa8c22d13c726dad7312e769bbdffc37..0ea9fb1dd27b47d0d35d578f9ff1fa862e20a8c6 100644 --- a/lib/kokkos/benchmarks/gather/Makefile +++ b/lib/kokkos/benchmarks/gather/Makefile @@ -7,7 +7,7 @@ default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ${KOKKOS_PATH}/config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper EXE = gather.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -22,7 +22,7 @@ CXXFLAGS = -O3 -g DEPFLAGS = -M LINK = ${CXX} -LINKFLAGS = +LINKFLAGS = OBJ = $(SRC:.cpp=.o) LIB = @@ -35,10 +35,10 @@ build: $(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) -clean: kokkos-clean +clean: kokkos-clean rm -f *.o *.cuda *.host # Compilation rules -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/cmake/KokkosConfig.cmake.in b/lib/kokkos/cmake/KokkosConfig.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..fc099a494ce25af9068e113a688904e06458fcbe --- /dev/null +++ b/lib/kokkos/cmake/KokkosConfig.cmake.in @@ -0,0 +1,18 @@ +# - Config file for the Kokkos package +# It defines the following variables +# Kokkos_INCLUDE_DIRS - include directories for Kokkos +# Kokkos_LIBRARIES - libraries to link against + +# Compute paths +GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +SET(Kokkos_INCLUDE_DIRS "@CONF_INCLUDE_DIRS@") + +# Our library dependencies (contains definitions for IMPORTED targets) +IF(NOT TARGET kokkos AND NOT Kokkos_BINARY_DIR) + INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") +ENDIF() + +# These are IMPORTED targets created by KokkosTargets.cmake +SET(Kokkos_LIBRARY_DIRS @INSTALL_LIB_DIR@) +SET(Kokkos_LIBRARIES @Kokkos_LIBRARIES_NAMES@) +SET(Kokkos_TPL_LIBRARIES @KOKKOS_LIBS@) diff --git a/lib/kokkos/cmake/Modules/FindHWLOC.cmake b/lib/kokkos/cmake/Modules/FindHWLOC.cmake new file mode 100644 index 0000000000000000000000000000000000000000..273dcb5c8aac9dc7f442844ff48d0827488afe0c --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindHWLOC.cmake @@ -0,0 +1,20 @@ +#.rst: +# FindHWLOC +# ---------- +# +# Try to find HWLOC. +# +# The following variables are defined: +# +# HWLOC_FOUND - System has HWLOC +# HWLOC_INCLUDE_DIR - HWLOC include directory +# HWLOC_LIBRARIES - Libraries needed to use HWLOC + +find_path(HWLOC_INCLUDE_DIR hwloc.h) +find_library(HWLOC_LIBRARIES hwloc) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(HWLOC DEFAULT_MSG + HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) + +mark_as_advanced(HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) diff --git a/lib/kokkos/cmake/Modules/FindMemkind.cmake b/lib/kokkos/cmake/Modules/FindMemkind.cmake new file mode 100644 index 0000000000000000000000000000000000000000..245fb44c19a2ff71a389c5f139cb3f6ac38f924e --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindMemkind.cmake @@ -0,0 +1,20 @@ +#.rst: +# FindMemkind +# ---------- +# +# Try to find Memkind. +# +# The following variables are defined: +# +# MEMKIND_FOUND - System has Memkind +# MEMKIND_INCLUDE_DIR - Memkind include directory +# MEMKIND_LIBRARIES - Libraries needed to use Memkind + +find_path(MEMKIND_INCLUDE_DIR memkind.h) +find_library(MEMKIND_LIBRARIES memkind) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Memkind DEFAULT_MSG + MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES) + +mark_as_advanced(MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES) diff --git a/lib/kokkos/cmake/Modules/FindQthreads.cmake b/lib/kokkos/cmake/Modules/FindQthreads.cmake new file mode 100644 index 0000000000000000000000000000000000000000..a254b0e996d23e01897f5f186a316c285f64e9ee --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindQthreads.cmake @@ -0,0 +1,20 @@ +#.rst: +# FindQthreads +# ---------- +# +# Try to find Qthreads. +# +# The following variables are defined: +# +# QTHREADS_FOUND - System has Qthreads +# QTHREADS_INCLUDE_DIR - Qthreads include directory +# QTHREADS_LIBRARIES - Libraries needed to use Qthreads + +find_path(QTHREADS_INCLUDE_DIR qthread.h) +find_library(QTHREADS_LIBRARIES qthread) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Qthreads DEFAULT_MSG + QTHREADS_INCLUDE_DIR QTHREADS_LIBRARIES) + +mark_as_advanced(QTHREADS_INCLUDE_DIR QTHREADS_LIBRARIES) diff --git a/lib/kokkos/cmake/kokkos.cmake b/lib/kokkos/cmake/kokkos.cmake new file mode 100644 index 0000000000000000000000000000000000000000..235b7eaba47f295aaa712cdd07bc8318a3731dbf --- /dev/null +++ b/lib/kokkos/cmake/kokkos.cmake @@ -0,0 +1,1198 @@ + + +# Set which Kokkos backend to use. +set(KOKKOS_ENABLE_CUDA OFF CACHE BOOL "Use Kokkos CUDA backend") +set(KOKKOS_ENABLE_OPENMP ON CACHE BOOL "Use Kokkos OpenMP backend") +set(KOKKOS_ENABLE_PTHREAD OFF CACHE BOOL "Use Kokkos Pthreads backend") +set(KOKKOS_ENABLE_QTHREADS OFF CACHE BOOL "Use Kokkos Qthreads backend") +set(KOKKOS_ENABLE_SERIAL ON CACHE BOOL "Use Kokkos Serial backend") + +# List of possible host architectures. +list(APPEND KOKKOS_HOST_ARCH_LIST + None # No architecture optimization + AMDAVX # AMD chip + ARMv80 # ARMv8.0 Compatible CPU + ARMv81 # ARMv8.1 Compatible CPU + ARMv8-ThunderX # ARMv8 Cavium ThunderX CPU + SNB # Intel Sandy/Ivy Bridge CPUs + HSW # Intel Haswell CPUs + BDW # Intel Broadwell Xeon E-class CPUs + SKX # Intel Sky Lake Xeon E-class HPC CPUs (AVX512) + KNC # Intel Knights Corner Xeon Phi + KNL # Intel Knights Landing Xeon Phi + BGQ # IBM Blue Gene Q + Power7 # IBM POWER7 CPUs + Power8 # IBM POWER8 CPUs + Power9 # IBM POWER9 CPUs + ) + +# Setting this variable to a value other than "None" can improve host +# performance by turning on architecture specific code. +set(KOKKOS_HOST_ARCH "None" CACHE STRING "Optimize for specific host architecture.") +set_property(CACHE KOKKOS_HOST_ARCH PROPERTY STRINGS ${KOKKOS_HOST_ARCH_LIST}) + +# List of possible GPU architectures. +list(APPEND KOKKOS_GPU_ARCH_LIST + None # No architecture optimization + Kepler # NVIDIA Kepler default (generation CC 3.5) + Kepler30 # NVIDIA Kepler generation CC 3.0 + Kepler32 # NVIDIA Kepler generation CC 3.2 + Kepler35 # NVIDIA Kepler generation CC 3.5 + Kepler37 # NVIDIA Kepler generation CC 3.7 + Maxwell # NVIDIA Maxwell default (generation CC 5.0) + Maxwell50 # NVIDIA Maxwell generation CC 5.0 + Maxwell52 # NVIDIA Maxwell generation CC 5.2 + Maxwell53 # NVIDIA Maxwell generation CC 5.3 + Pascal60 # NVIDIA Pascal generation CC 6.0 + Pascal61 # NVIDIA Pascal generation CC 6.1 + ) + +# Setting this variable to a value other than "None" can improve GPU +# performance by turning on architecture specific code. +set(KOKKOS_GPU_ARCH "None" CACHE STRING "Optimize for specific GPU architecture.") +set_property(CACHE KOKKOS_GPU_ARCH PROPERTY STRINGS ${KOKKOS_GPU_ARCH_LIST}) + +set(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkoscontainers, and kokkosalgorithms.") + +# Enable hwloc library. +set(KOKKOS_ENABLE_HWLOC OFF CACHE BOOL "Enable hwloc for better process placement.") +set(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library.") + +# Enable memkind library. +set(KOKKOS_ENABLE_MEMKIND OFF CACHE BOOL "Enable memkind.") +set(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library.") + +set(KOKKOS_ENABLE_LIBRT OFF CACHE BOOL "Enable librt for more precise timer.") + +# Enable debugging. +set(KOKKOS_DEBUG OFF CACHE BOOL "Enable debugging in Kokkos.") + +# Enable profiling. +set(KOKKOS_ENABLE_PROFILING ON CACHE BOOL "Enable profiling.") + +# Enable aggressive vectorization. +set(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION OFF CACHE BOOL "Enable aggressive vectorization.") + +# Qthreads options. +set(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.") + +# CUDA options. +set(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.") +set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC OFF CACHE BOOL "Enable CUDA LDG.") +set(KOKKOS_ENABLE_CUDA_UVM OFF CACHE BOOL "Enable CUDA unified virtual memory.") +set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OFF CACHE BOOL "Enable relocatable device code for CUDA.") +set(KOKKOS_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Enable lambdas for CUDA.") + +################################### FUNCTIONS ################################## + +# Sets the following compiler variables that are analogous to the CMAKE_* +# versions. We add the ability to detect NVCC (really nvcc_wrapper). +# KOKKOS_CXX_COMPILER +# KOKKOS_CXX_COMPILER_ID +# KOKKOS_CXX_COMPILER_VERSION +# +# Also verifies the compiler version meets the minimum required by Kokkos. +function(set_kokkos_cxx_compiler) + # Since CMake doesn't recognize the nvcc compiler until 3.8, we use our own + # version of the CMake variables and detect nvcc ourselves. Initially set to + # the CMake variable values. + set(INTERNAL_CXX_COMPILER ${CMAKE_CXX_COMPILER}) + set(INTERNAL_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) + set(INTERNAL_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION}) + + # Check if the compiler is nvcc (which really means nvcc_wrapper). + execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version + COMMAND grep nvcc + COMMAND wc -l + OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC + OUTPUT_STRIP_TRAILING_WHITESPACE) + + string(REGEX REPLACE "^ +" "" + INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) + + if(INTERNAL_HAVE_COMPILER_NVCC) + # Set the compiler id to nvcc. We use the value used by CMake 3.8. + set(INTERNAL_CXX_COMPILER_ID NVIDIA) + + # Set nvcc's compiler version. + execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version + COMMAND grep release + OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) + + string(REGEX MATCH "[0-9]+\.[0-9]+\.[0-9]+$" + INTERNAL_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) + endif() + + # Enforce the minimum compilers supported by Kokkos. + set(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.7.2 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 14.0.4 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 7.0.28 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n") + + if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 3.5.2) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL GNU) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.7.2) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL Intel) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 14.0.4) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 7.0.28) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL PGI) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 17.1) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + endif() + + # Enforce that extensions are turned off for nvcc_wrapper. + if(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) + if(NOT DEFINED CMAKE_CXX_EXTENSIONS OR CMAKE_CXX_EXTENSIONS STREQUAL ON) + message(FATAL_ERROR "NVCC doesn't support C++ extensions. Set CMAKE_CXX_EXTENSIONS to OFF in your CMakeLists.txt.") + endif() + endif() + + if(KOKKOS_ENABLE_CUDA) + # Enforce that the compiler can compile CUDA code. + if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.0.0) + message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.") + endif() + elseif(NOT INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) + message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang.") + endif() + endif() + + set(KOKKOS_CXX_COMPILER ${INTERNAL_CXX_COMPILER} PARENT_SCOPE) + set(KOKKOS_CXX_COMPILER_ID ${INTERNAL_CXX_COMPILER_ID} PARENT_SCOPE) + set(KOKKOS_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION} PARENT_SCOPE) +endfunction() + +# Transitively enforces that the appropriate CXX standard compile flags (C++11 +# or above) are added to targets that use the Kokkos library. Compile features +# are used if possible. Otherwise, the appropriate flags are added to +# KOKKOS_CXX_FLAGS. Values set by the user to CMAKE_CXX_STANDARD and +# CMAKE_CXX_EXTENSIONS are honored. +function(set_kokkos_compiler_standard) + # The following table lists the versions of CMake that supports CXX_STANDARD + # and the CXX compile features for different compilers. The versions are + # based on CMake documentation, looking at CMake code, and verifying by + # testing with specific CMake versions. + # + # COMPILER CXX_STANDARD Compile Features + # --------------------------------------------------------------- + # Clang 3.1 3.1 + # GNU 3.1 3.2 + # AppleClang 3.2 3.2 + # Intel 3.6 3.6 + # Cray No No + # PGI No No + # XL No No + # + # For compiling CUDA code using nvcc_wrapper, we will use the host compiler's + # flags for turning on C++11. Since for compiler ID and versioning purposes + # CMake recognizes the host compiler when calling nvcc_wrapper, this just + # works. Both NVCC and nvcc_wrapper only recognize '-std=c++11' which means + # that we can only use host compilers for CUDA builds that use those flags. + # It also means that extensions (gnu++11) can't be turned on for CUDA builds. + + # Check if we can use compile features. + if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) + if(NOT CMAKE_VERSION VERSION_LESS 3.1) + set(INTERNAL_USE_COMPILE_FEATURES ON) + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang OR CMAKE_CXX_COMPILER_ID STREQUAL GNU) + if(NOT CMAKE_VERSION VERSION_LESS 3.2) + set(INTERNAL_USE_COMPILE_FEATURES ON) + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) + if(NOT CMAKE_VERSION VERSION_LESS 3.6) + set(INTERNAL_USE_COMPILE_FEATURES ON) + endif() + endif() + endif() + + if(INTERNAL_USE_COMPILE_FEATURES) + # Use the compile features aspect of CMake to transitively cause C++ flags + # to populate to user code. + + # I'm using a hack by requiring features that I know force the lowest version + # of the compilers we want to support. Clang 3.3 and later support all of + # the C++11 standard. With CMake 3.8 and higher, we could switch to using + # cxx_std_11. + set(KOKKOS_CXX11_FEATURES + cxx_nonstatic_member_init # Forces GCC 4.7 or later and Intel 14.0 or later. + PARENT_SCOPE + ) + else() + # CXX compile features are not yet implemented for this combination of + # compiler and version of CMake. + + if(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang) + # Versions of CMAKE before 3.2 don't support CXX_STANDARD or C++ compile + # features for the AppleClang compiler. Set compiler flags transitively + # here such that they trickle down to a call to target_compile_options(). + + # The following two blocks of code were copied from + # /Modules/Compiler/AppleClang-CXX.cmake from CMake 3.7.2 and then + # modified. + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=gnu++11") + endif() + + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.1) + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++14") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++14") + elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) + # AppleClang 5.0 knows this flag, but does not set a __cplusplus macro + # greater than 201103L. + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++1y") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++1y") + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) + # Versions of CMAKE before 3.6 don't support CXX_STANDARD or C++ compile + # features for the Intel compiler. Set compiler flags transitively here + # such that they trickle down to a call to target_compile_options(). + + # The following three blocks of code were copied from + # /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified. + if("x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC") + set(_std -Qstd) + set(_ext c++) + else() + set(_std -std) + set(_ext gnu++) + endif() + + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++14") + # TODO: There is no gnu++14 value supported; figure out what to do. + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++14") + elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.0) + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++1y") + # TODO: There is no gnu++14 value supported; figure out what to do. + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++1y") + endif() + + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.0) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}11") + elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.1) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++0x") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}0x") + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL Cray) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the Cray + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-hstd=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-hstd=c++11") + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-hstd=c++11") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-hstd=c++11") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL PGI) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the PGI + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "--c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "--c++11") + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "--c++11") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "--c++11") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL XL) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the XL + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=c++11") + else() + # Assume GNU. CMAKE_CXX_STANDARD is handled correctly by CMake 3.1 and + # above for this compiler. If the user explicitly requests a C++ + # standard, CMake takes care of it. If not, transitively require C++11. + if(NOT CMAKE_CXX_STANDARD) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION ${CMAKE_CXX11_STANDARD_COMPILE_OPTION}) + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION ${CMAKE_CXX11_EXTENSION_COMPILE_OPTION}) + endif() + endif() + + # Set the C++ standard info for Kokkos respecting user set values for + # CMAKE_CXX_STANDARD and CMAKE_CXX_EXTENSIONS. + if(CMAKE_CXX_STANDARD EQUAL 14) + if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL OFF) + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_STANDARD_COMPILE_OPTION}) + else() + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_EXTENSION_COMPILE_OPTION}) + endif() + elseif(CMAKE_CXX_STANDARD EQUAL 11) + if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL OFF) + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) + else() + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) + endif() + else() + # The user didn't explicitly request a standard, transitively require + # C++11 respecting CMAKE_CXX_EXTENSIONS. + if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL OFF) + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) + else() + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) + endif() + endif() + + set(KOKKOS_CXX_FLAGS ${INTERNAL_CXX_FLAGS} PARENT_SCOPE) + endif() +endfunction() + +########################## COMPILER AND FEATURE CHECKS ######################### + +# TODO: We are assuming that nvcc_wrapper is using g++ as the host compiler. +# Should we allow the user the option to change this? The host compiler +# for nvcc_wrapper can be set via the NVCC_WRAPPER_DEFAULT_COMPILER +# environment variable or by passing a different host compiler with the +# -ccbin flag. + +# TODO: Fully add CUDA support for Clang. +set_kokkos_cxx_compiler() + +set_kokkos_compiler_standard() + +######################### INITIALIZE INTERNAL VARIABLES ######################## + +# Add Kokkos' modules to CMake's module path. +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") + +# Start with all global variables set to false. This guarantees correct +# results with changes and multiple configures. +set(KOKKOS_HAVE_CUDA OFF CACHE INTERNAL "") +set(KOKKOS_USE_CUDA_UVM OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_CUDA_RDC OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_CUDA_LAMBDA OFF CACHE INTERNAL "") +set(KOKKOS_CUDA_CLANG_WORKAROUND OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_OPENMP OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_PTHREAD OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_QTHREADS OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_SERIAL OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_HWLOC OFF CACHE INTERNAL "") +set(KOKKOS_ENABLE_HBWSPACE OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_DEBUG OFF CACHE INTERNAL "") +set(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK OFF CACHE INTERNAL "") +set(KOKKOS_ENABLE_ISA_X86_64 OFF CACHE INTERNAL "") +set(KOKKOS_ENABLE_ISA_KNC OFF CACHE INTERNAL "") +set(KOKKOS_ENABLE_ISA_POWERPCLE OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_ARMV80 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_ARMV81 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_ARMV8_THUNDERX OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_AVX OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_AVX2 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_AVX512MIC OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_AVX512XEON OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KNC OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_POWER8 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_POWER9 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KEPLER OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KEPLER30 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KEPLER32 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KEPLER35 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KEPLER37 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_MAXWELL OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_MAXWELL50 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_MAXWELL52 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_MAXWELL53 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_PASCAL OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_PASCAL60 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_PASCAL61 OFF CACHE INTERNAL "") + +############################## SET BACKEND OPTIONS ############################# + +# Make sure at least one backend is selected. +if(NOT KOKKOS_ENABLE_CUDA AND NOT KOKKOS_ENABLE_OPENMP AND NOT KOKKOS_ENABLE_PTHREAD AND NOT KOKKOS_ENABLE_QTHREADS AND NOT KOKKOS_ENABLE_SERIAL) + message(FATAL_ERROR "Must set one of KOKKOS_ENABLE_CUDA, KOKKOS_ENABLE_OPENMP, KOKKOS_ENABLE_PTHREAD, KOKKOS_ENABLE_QTHREADS, or KOKKOS_ENABLE_SERIAL") +endif() + +# Only one of OpenMP, Pthreads, and Qthreads can be set. +set(KOKKOS_MESSAGE_TEXT "Only one of KOKKOS_ENABLE_OPENMP, KOKKOS_ENABLE_PTHREAD, and KOKKOS_ENABLE_QTHREADS can be selected") +if(KOKKOS_ENABLE_OPENMP AND KOKKOS_ENABLE_PTHREAD) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") +elseif(KOKKOS_ENABLE_OPENMP AND KOKKOS_ENABLE_QTHREADS) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") +elseif(KOKKOS_ENABLE_PTHREAD AND KOKKOS_ENABLE_QTHREADS) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") +endif() + +# Get source files. +file(GLOB KOKKOS_CORE_SRCS core/src/impl/*.cpp) +file(GLOB KOKKOS_CONTAINERS_SRCS containers/src/impl/*.cpp) + +# Set options if using CUDA backend. +if(KOKKOS_ENABLE_CUDA) + if(KOKKOS_CUDA_DIR) + set(CUDA_TOOLKIT_ROOT_DIR ${KOKKOS_CUDA_DIR}) + endif() + + find_package(CUDA) + + if(NOT CUDA_FOUND) + if(KOKKOS_CUDA_DIR) + message(FATAL_ERROR "Couldn't find CUDA in default locations, and KOKKOS_CUDA_DIR points to an invalid installation.") + else() + message(FATAL_ERROR "Couldn't find CUDA in default locations. Set KOKKOS_CUDA_DIR.") + endif() + endif() + + list(APPEND KOKKOS_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) + list(APPEND KOKKOS_LD_FLAGS -L${CUDA_TOOLKIT_ROOT_DIR}/lib64) + list(APPEND KOKKOS_LIBS cudart cuda) + + set(KOKKOS_HAVE_CUDA ON CACHE INTERNAL "") + file(GLOB KOKKOS_CUDA_SRCS core/src/Cuda/*.cpp) + list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_CUDA_SRCS}) + + # Set CUDA UVM if requested. + if(KOKKOS_ENABLE_CUDA_UVM) + set(KOKKOS_USE_CUDA_UVM ON CACHE INTERNAL "") + endif() + + # Set CUDA relocatable device code if requested. + if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) + set(KOKKOS_HAVE_CUDA_RDC ON CACHE INTERNAL "") + list(APPEND KOKKOS_CXX_FLAGS --relocatable-device-code=true) + list(APPEND KOKKOS_LD_FLAGS --relocatable-device-code=true) + endif() + + # Set CUDA lambda if requested. + if(KOKKOS_ENABLE_CUDA_LAMBDA) + set(KOKKOS_HAVE_CUDA_LAMBDA ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + if(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7.5) + message(FATAL_ERROR "CUDA lambda support requires CUDA 7.5 or higher. Disable it or use a 7.5 or later compiler.") + else() + list(APPEND KOKKOS_CXX_FLAGS -expt-extended-lambda) + endif() + endif() + endif() + + # Set Clang specific options. + if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + list(APPEND KOKKOS_CXX_FLAGS --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}) + + set(KOKKOS_CUDA_CLANG_WORKAROUND ON CACHE INTERNAL "") + + # Force CUDA_LDG_INTRINSIC on when using Clang. + set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC ON CACHE BOOL "Enable CUDA LDG." FORCE) + endif() +endif() + +# Set options if using OpenMP backend. +if(KOKKOS_ENABLE_OPENMP) + find_package(OpenMP REQUIRED) + + if(OPENMP_FOUND) + if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + list(APPEND KOKKOS_CXX_FLAGS -Xcompiler) + endif() + + list(APPEND KOKKOS_CXX_FLAGS ${OpenMP_CXX_FLAGS}) + list(APPEND KOKKOS_LD_FLAGS ${OpenMP_CXX_FLAGS}) + endif() + + set(KOKKOS_HAVE_OPENMP ON CACHE INTERNAL "") + file(GLOB KOKKOS_OPENMP_SRCS core/src/OpenMP/*.cpp) + list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_OPENMP_SRCS}) +endif() + +# Set options if using Pthreads backend. +if(KOKKOS_ENABLE_PTHREAD) + find_package(Threads REQUIRED) + + list(APPEND KOKKOS_LIBS Threads::Threads) + + set(KOKKOS_HAVE_PTHREAD ON CACHE INTERNAL "") + file(GLOB KOKKOS_PTHREAD_SRCS core/src/Threads/*.cpp) + list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_PTHREAD_SRCS}) +endif() + +# Set options if using Qthreads backend. +if(KOKKOS_ENABLE_QTHREADS) + if(KOKKOS_QTHREADS_DIR) + list(APPEND CMAKE_PREFIX_PATH ${KOKKOS_QTHREADS_DIR}) + endif() + + find_package(Qthreads) + + if(NOT QTHREADS_FOUND) + if(KOKKOS_QTHREADS_DIR) + message(FATAL_ERROR "Couldn't find Qthreads in default locations, and KOKKOS_QTHREADS_DIR points to an invalid installation.") + else() + message(FATAL_ERROR "Couldn't find Qthreads in default locations. Set KOKKOS_QTHREADS_DIR.") + endif() + endif() + + list(APPEND KOKKOS_INCLUDE_DIRS ${QTHREADS_INCLUDE_DIR}) + list(APPEND KOKKOS_LIBS ${QTHREADS_LIBRARIES}) + + set(KOKKOS_HAVE_QTHREADS ON CACHE INTERNAL "") + file(GLOB KOKKOS_QTHREADS_SRCS core/src/Threads/*.cpp) + list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_QTHREADS_SRCS}) + + if(KOKKOS_QTHREADS_DIR) + list(REMOVE_AT CMAKE_PREFIX_PATH -1) + endif() +endif() + +# Set options if using Serial backend. +if(KOKKOS_ENABLE_SERIAL) + set(KOKKOS_HAVE_SERIAL ON CACHE INTERNAL "") +else() + # Remove serial source files. + list(REMOVE_ITEM KOKKOS_CORE_SRCS + "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_Serial.cpp" + "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_Serial_Task.cpp") +endif() + +########################### SET ARCHITECTURE OPTIONS ########################### + +# Make sure the host architecture option is valid. Need to verify in case user +# passes the option via the command line. +list(FIND KOKKOS_HOST_ARCH_LIST "${KOKKOS_HOST_ARCH}" KOKKOS_VALID_HOST_ARCH) +if(KOKKOS_VALID_HOST_ARCH EQUAL -1) + set(KOKKOS_ARCH_TEXT "\n ${KOKKOS_HOST_ARCH_LIST}") + string(REPLACE ";" "\n " KOKKOS_ARCH_TEXT "${KOKKOS_ARCH_TEXT}") + set(KOKKOS_MESSAGE_TEXT "Invalid architecture for KOKKOS_HOST_ARCH: '${KOKKOS_HOST_ARCH}'") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Choices:${KOKKOS_ARCH_TEXT}\n") + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") +endif() + +# Make sure the GPU architecture option is valid. Need to verify in case user +# passes the option via the command line. +list(FIND KOKKOS_GPU_ARCH_LIST "${KOKKOS_GPU_ARCH}" KOKKOS_VALID_GPU_ARCH) +if(KOKKOS_VALID_GPU_ARCH EQUAL -1) + set(KOKKOS_ARCH_TEXT "\n ${KOKKOS_GPU_ARCH_LIST}") + string(REPLACE ";" "\n " KOKKOS_ARCH_TEXT "${KOKKOS_ARCH_TEXT}") + set(KOKKOS_MESSAGE_TEXT "Invalid architecture for KOKKOS_GPU_ARCH: '${KOKKOS_GPU_ARCH}'") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Choices:${KOKKOS_ARCH_TEXT}\n") + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") +endif() + +# Decide what ISA level we are able to support. +if(KOKKOS_HOST_ARCH STREQUAL SNB OR KOKKOS_HOST_ARCH STREQUAL HSW OR KOKKOS_HOST_ARCH STREQUAL BDW OR + KOKKOS_HOST_ARCH STREQUAL SKX OR KOKKOS_HOST_ARCH STREQUAL KNL) + set(KOKKOS_ENABLE_ISA_X86_64 ON CACHE INTERNAL "") +endif() + +if(KOKKOS_HOST_ARCH STREQUAL KNC) + set(KOKKOS_ENABLE_ISA_KNC ON CACHE INTERNAL "") +endif() + +if(KOKKOS_HOST_ARCH STREQUAL Power8 OR KOKKOS_HOST_ARCH STREQUAL Power9) + set(KOKKOS_ENABLE_ISA_POWERPCLE ON CACHE INTERNAL "") +endif() + +# Add host architecture options. +if(KOKKOS_HOST_ARCH STREQUAL ARMv80) + set(KOKKOS_ARCH_ARMV80 ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=armv8-a) + list(APPEND KOKKOS_LD_FLAGS -march=armv8-a) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL ARMv81) + set(KOKKOS_ARCH_ARMV81 ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=armv8.1-a) + list(APPEND KOKKOS_LD_FLAGS -march=armv8.1-a) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL ARMv8-ThunderX) + set(KOKKOS_ARCH_ARMV80 ON CACHE INTERNAL "") + set(KOKKOS_ARCH_ARMV8_THUNDERX ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=armv8-a -mtune=thunderx) + list(APPEND KOKKOS_LD_FLAGS -march=armv8-a -mtune=thunderx) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL SNB OR KOKKOS_HOST_ARCH STREQUAL AMDAVX) + set(KOKKOS_ARCH_AVX ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + list(APPEND KOKKOS_CXX_FLAGS -mavx) + list(APPEND KOKKOS_LD_FLAGS -mavx) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + list(APPEND KOKKOS_CXX_FLAGS -tp=sandybridge) + list(APPEND KOKKOS_LD_FLAGS -tp=sandybridge) + else() + list(APPEND KOKKOS_CXX_FLAGS -mavx) + list(APPEND KOKKOS_LD_FLAGS -mavx) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL HSW OR KOKKOS_HOST_ARCH STREQUAL BDW) + set(KOKKOS_ARCH_AVX2 ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + list(APPEND KOKKOS_CXX_FLAGS -xCORE-AVX2) + list(APPEND KOKKOS_LD_FLAGS -xCORE-AVX2) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + list(APPEND KOKKOS_CXX_FLAGS -tp=haswell) + list(APPEND KOKKOS_LD_FLAGS -tp=haswell) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=core-avx2 -mtune=core-avx2) + list(APPEND KOKKOS_LD_FLAGS -march=core-avx2 -mtune=core-avx2) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL KNL) + set(KOKKOS_ARCH_AVX512MIC ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + list(APPEND KOKKOS_CXX_FLAGS -xMIC-AVX512) + list(APPEND KOKKOS_LD_FLAGS -xMIC-AVX512) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=knl) + list(APPEND KOKKOS_LD_FLAGS -march=knl) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL SKX) + set(KOKKOS_ARCH_AVX512XEON ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + list(APPEND KOKKOS_CXX_FLAGS -xCORE-AVX512) + list(APPEND KOKKOS_LD_FLAGS -xCORE-AVX512) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=skylake-avx512) + list(APPEND KOKKOS_LD_FLAGS -march=skylake-avx512) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL KNC) + set(KOKKOS_ARCH_KNC ON CACHE INTERNAL "") + list(APPEND KOKKOS_CXX_FLAGS -mmic) + list(APPEND KOKKOS_LD_FLAGS -mmic) +elseif(KOKKOS_HOST_ARCH STREQUAL Power8) + set(KOKKOS_ARCH_POWER8 ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -mcpu=power8 -mtune=power8) + list(APPEND KOKKOS_LD_FLAGS -mcpu=power8 -mtune=power8) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL Power9) + set(KOKKOS_ARCH_POWER9 ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -mcpu=power9 -mtune=power9) + list(APPEND KOKKOS_LD_FLAGS -mcpu=power9 -mtune=power9) + endif() +endif() + +# Add GPU architecture options. +if(KOKKOS_ENABLE_CUDA) + if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + set(KOKKOS_GPU_ARCH_FLAG -arch) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + list(APPEND KOKKOS_CXX_FLAGS -x cuda) + set(KOKKOS_GPU_ARCH_FLAG --cuda-gpu-arch) + endif() + + if(KOKKOS_GPU_ARCH STREQUAL Kepler30) + set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") + set(KOKKOS_ARCH_KEPLER30 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_30) + elseif(KOKKOS_GPU_ARCH STREQUAL Kepler32) + set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") + set(KOKKOS_ARCH_KEPLER32 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_32) + elseif(KOKKOS_GPU_ARCH STREQUAL Kepler35 OR KOKKOS_GPU_ARCH STREQUAL Kepler) + set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") + set(KOKKOS_ARCH_KEPLER35 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_35) + elseif(KOKKOS_GPU_ARCH STREQUAL Kepler37) + set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") + set(KOKKOS_ARCH_KEPLER37 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_37) + elseif(KOKKOS_GPU_ARCH STREQUAL Maxwell50 OR KOKKOS_GPU_ARCH STREQUAL Maxwell) + set(KOKKOS_ARCH_MAXWELL ON CACHE INTERNAL "") + set(KOKKOS_ARCH_MAXWELL50 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_50) + elseif(KOKKOS_GPU_ARCH STREQUAL Maxwell52) + set(KOKKOS_ARCH_MAXWELL ON CACHE INTERNAL "") + set(KOKKOS_ARCH_MAXWELL52 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_52) + elseif(KOKKOS_GPU_ARCH STREQUAL Maxwell53) + set(KOKKOS_ARCH_MAXWELL ON CACHE INTERNAL "") + set(KOKKOS_ARCH_MAXWELL53 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_53) + elseif(KOKKOS_GPU_ARCH STREQUAL Pascal60) + set(KOKKOS_ARCH_PASCAL ON CACHE INTERNAL "") + set(KOKKOS_ARCH_PASCAL60 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_60) + elseif(KOKKOS_GPU_ARCH STREQUAL Pascal61) + set(KOKKOS_ARCH_PASCAL ON CACHE INTERNAL "") + set(KOKKOS_ARCH_PASCAL61 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_61) + endif() + + if(NOT KOKKOS_GPU_ARCH STREQUAL None) + list(APPEND KOKKOS_CXX_FLAGS ${KOKKOS_GPU_ARCH_FLAG}) + + if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + list(APPEND KOKKOS_LD_FLAGS ${KOKKOS_GPU_ARCH_FLAG}) + endif() + endif() +endif() + +############################### SET OTHER OPTIONS ############################## + +# Set options if using hwloc. +if(KOKKOS_ENABLE_HWLOC) + if(KOKKOS_HWLOC_DIR) + list(APPEND CMAKE_PREFIX_PATH ${KOKKOS_HWLOC_DIR}) + endif() + + find_package(HWLOC) + + if(NOT HWLOC_FOUND) + if(KOKKOS_HWLOC_DIR) + message(FATAL_ERROR "Couldn't find HWLOC in default locations, and KOKKOS_HWLOC_DIR points to an invalid installation.") + else() + message(FATAL_ERROR "Couldn't find HWLOC in default locations. Set KOKKOS_HWLOC_DIR.") + endif() + endif() + + list(APPEND KOKKOS_INCLUDE_DIRS ${HWLOC_INCLUDE_DIR}) + list(APPEND KOKKOS_LIBS ${HWLOC_LIBRARIES}) + + set(KOKKOS_HAVE_HWLOC ON CACHE INTERNAL "") + + if(KOKKOS_HWLOC_DIR) + list(REMOVE_AT CMAKE_PREFIX_PATH -1) + endif() +endif() + +# Set options if using memkind. +if(KOKKOS_ENABLE_MEMKIND) + if(KOKKOS_MEMKIND_DIR) + list(APPEND CMAKE_PREFIX_PATH ${KOKKOS_MEMKIND_DIR}) + endif() + + find_package(Memkind) + + if(NOT MEMKIND_FOUND) + if(KOKKOS_MEMKIND_DIR) + message(FATAL_ERROR "Couldn't find Memkind in default locations, and KOKKOS_MEMKIND_DIR points to an invalid installation.") + else() + message(FATAL_ERROR "Couldn't find Memkind in default locations. Set KOKKOS_MEMKIND_DIR.") + endif() + endif() + + set(KOKKOS_ENABLE_HBWSPACE ON CACHE INTERNAL "") + list(APPEND KOKKOS_INCLUDE_DIRS ${MEMKIND_INCLUDE_DIR}) + list(APPEND KOKKOS_LIBS ${MEMKIND_LIBRARIES}) + + if(KOKKOS_MEMKIND_DIR) + list(REMOVE_AT CMAKE_PREFIX_PATH -1) + endif() +else() + # Remove HBW source file. + list(REMOVE_ITEM KOKKOS_CORE_SRCS + "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_HBWSpace.cpp") +endif() + +# Set options if using librt. +if(KOKKOS_ENABLE_LIBRT) + list(APPEND KOKKOS_LIBS rt) +endif() + +# Set debugging if requested. +if(KOKKOS_DEBUG) + set(KOKKOS_HAVE_DEBUG ON CACHE INTERNAL "") + set(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COVIDIA) + list(APPEND KOKKOS_CXX_FLAGS -lineinfo) + endif() + + list(APPEND KOKKOS_CXX_FLAGS -g) + list(APPEND KOKKOS_LD_FLAGS -g) +endif() + +# Set profiling if requested. +if(KOKKOS_ENABLE_PROFILING) + list(APPEND KOKKOS_LIBS dl) +else() + # Remove profiling source file. + list(REMOVE_ITEM KOKKOS_CORE_SRCS + "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_Profiling_Interface.cpp") +endif() + +# Use GCC toolchain with Clang. +if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND NOT APPLE) + find_program(KOKKOS_GCC_PATH g++) + if(NOT KOKKOS_GCC_PATH) + message(FATAL_ERROR "Can't find GCC path to get toolchain for Clang.") + endif() + string(REPLACE "/bin/g++" "" KOKKOS_GCC_PATH ${KOKKOS_GCC_PATH}) + + list(APPEND KOKKOS_CXX_FLAGS --gcc-toolchain=${KOKKOS_GCC_PATH}) + list(APPEND KOKKOS_LD_FLAGS --gcc-toolchain=${KOKKOS_GCC_PATH}) +endif() + +############################ Detect if submodule ############################### +# +# With thanks to StackOverflow: +# http://stackoverflow.com/questions/25199677/how-to-detect-if-current-scope-has-a-parent-in-cmake +# +get_directory_property(HAS_PARENT PARENT_DIRECTORY) +if(HAS_PARENT) + message(STATUS "Submodule build") + SET(KOKKOS_HEADER_DIR "include/kokkos") +else() + message(STATUS "Standalone build") + SET(KOKKOS_HEADER_DIR "include") +endif() + +############################ PRINT CONFIGURE STATUS ############################ + +message(STATUS "") +message(STATUS "****************** Kokkos Settings ******************") +message(STATUS "Execution Spaces") + +if(KOKKOS_ENABLE_CUDA) + message(STATUS " Device Parallel: Cuda") +else() + message(STATUS " Device Parallel: None") +endif() + +if(KOKKOS_ENABLE_OPENMP) + message(STATUS " Host Parallel: OpenMP") +elseif(KOKKOS_ENABLE_PTHREAD) + message(STATUS " Host Parallel: Pthread") +elseif(KOKKOS_ENABLE_QTHREADS) + message(STATUS " Host Parallel: Qthreads") +else() + message(STATUS " Host Parallel: None") +endif() + +if(KOKKOS_ENABLE_SERIAL) + message(STATUS " Host Serial: Serial") +else() + message(STATUS " Host Serial: None") +endif() + +message(STATUS "") +message(STATUS "Architectures") +message(STATUS " Host Architecture: ${KOKKOS_HOST_ARCH}") +message(STATUS " Device Architecture: ${KOKKOS_GPU_ARCH}") + +message(STATUS "") +message(STATUS "Enabled options") + +if(KOKKOS_SEPARATE_LIBS) + message(STATUS " KOKKOS_SEPARATE_LIBS") +endif() + +if(KOKKOS_ENABLE_HWLOC) + message(STATUS " KOKKOS_ENABLE_HWLOC") +endif() + +if(KOKKOS_ENABLE_MEMKIND) + message(STATUS " KOKKOS_ENABLE_MEMKIND") +endif() + +if(KOKKOS_DEBUG) + message(STATUS " KOKKOS_DEBUG") +endif() + +if(KOKKOS_ENABLE_PROFILING) + message(STATUS " KOKKOS_ENABLE_PROFILING") +endif() + +if(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) + message(STATUS " KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION") +endif() + +if(KOKKOS_ENABLE_CUDA) + if(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC) + message(STATUS " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") + endif() + + if(KOKKOS_ENABLE_CUDA_UVM) + message(STATUS " KOKKOS_ENABLE_CUDA_UVM") + endif() + + if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) + message(STATUS " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE") + endif() + + if(KOKKOS_ENABLE_CUDA_LAMBDA) + message(STATUS " KOKKOS_ENABLE_CUDA_LAMBDA") + endif() + + if(KOKKOS_CUDA_DIR) + message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}") + endif() +endif() + +if(KOKKOS_QTHREADS_DIR) + message(STATUS " KOKKOS_QTHREADS_DIR: ${KOKKOS_QTHREADS_DIR}") +endif() + +if(KOKKOS_HWLOC_DIR) + message(STATUS " KOKKOS_HWLOC_DIR: ${KOKKOS_HWLOC_DIR}") +endif() + +if(KOKKOS_MEMKIND_DIR) + message(STATUS " KOKKOS_MEMKIND_DIR: ${KOKKOS_MEMKIND_DIR}") +endif() + +message(STATUS "*****************************************************") +message(STATUS "") + +################################ SET UP PROJECT ################################ + +configure_file( + ${Kokkos_SOURCE_DIR}/core/cmake/KokkosCore_config.h.in + ${Kokkos_BINARY_DIR}/KokkosCore_config.h +) + +SET(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries") +SET(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables") +SET(INSTALL_INCLUDE_DIR ${KOKKOS_HEADER_DIR} CACHE PATH + "Installation directory for header files") +IF(WIN32 AND NOT CYGWIN) + SET(DEF_INSTALL_CMAKE_DIR CMake) +ELSE() + SET(DEF_INSTALL_CMAKE_DIR lib/CMake/Kokkos) +ENDIF() + +SET(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH + "Installation directory for CMake files") + +# Make relative paths absolute (needed later on) +FOREACH(p LIB BIN INCLUDE CMAKE) + SET(var INSTALL_${p}_DIR) + IF(NOT IS_ABSOLUTE "${${var}}") + SET(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") + ENDIF() +ENDFOREACH() + +# set up include-directories +SET (Kokkos_INCLUDE_DIRS + ${Kokkos_SOURCE_DIR}/core/src + ${Kokkos_SOURCE_DIR}/containers/src + ${Kokkos_SOURCE_DIR}/algorithms/src + ${Kokkos_BINARY_DIR} # to find KokkosCore_config.h +) + +INCLUDE_DIRECTORIES(${Kokkos_INCLUDE_DIRS}) + +IF(KOKKOS_SEPARATE_LIBS) + # kokkoscore + ADD_LIBRARY( + kokkoscore + ${KOKKOS_CORE_SRCS} + ) + + target_compile_options( + kokkoscore + PUBLIC ${KOKKOS_CXX_FLAGS} + ) + + target_compile_features( + kokkoscore + PUBLIC ${KOKKOS_CXX11_FEATURES} + ) + + # Install the kokkoscore library + INSTALL (TARGETS kokkoscore + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin + ) + + # Install the kokkoscore headers + INSTALL (DIRECTORY + ${Kokkos_SOURCE_DIR}/core/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + + # Install KokkosCore_config.h header + INSTALL (FILES + ${Kokkos_BINARY_DIR}/KokkosCore_config.h + DESTINATION ${KOKKOS_HEADER_DIR} + ) + + TARGET_LINK_LIBRARIES( + kokkoscore + ${KOKKOS_LD_FLAGS} + ${KOKKOS_LIBS} + ) + + # kokkoscontainers + ADD_LIBRARY( + kokkoscontainers + ${KOKKOS_CONTAINERS_SRCS} + ) + + TARGET_LINK_LIBRARIES( + kokkoscontainers + kokkoscore + ) + + # Install the kokkocontainers library + INSTALL (TARGETS kokkoscontainers + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) + + # Install the kokkoscontainers headers + INSTALL (DIRECTORY + ${Kokkos_SOURCE_DIR}/containers/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + + # kokkosalgorithms - Build as interface library since no source files. + ADD_LIBRARY( + kokkosalgorithms + INTERFACE + ) + + target_include_directories( + kokkosalgorithms + INTERFACE ${Kokkos_SOURCE_DIR}/algorithms/src + ) + + TARGET_LINK_LIBRARIES( + kokkosalgorithms + INTERFACE kokkoscore + ) + + # Install the kokkoalgorithms library + INSTALL (TARGETS kokkosalgorithms + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) + + # Install the kokkosalgorithms headers + INSTALL (DIRECTORY + ${Kokkos_SOURCE_DIR}/algorithms/src/ + DESTINATION ${KOKKOS_INSTALL_INDLUDE_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + + SET (Kokkos_LIBRARIES_NAMES kokkoscore kokkoscontainers kokkosalgorithms) + +ELSE() + # kokkos + ADD_LIBRARY( + kokkos + ${KOKKOS_CORE_SRCS} + ${KOKKOS_CONTAINERS_SRCS} + ) + + target_compile_options( + kokkos + PUBLIC ${KOKKOS_CXX_FLAGS} + ) + + target_compile_features( + kokkos + PUBLIC ${KOKKOS_CXX11_FEATURES} + ) + + TARGET_LINK_LIBRARIES( + kokkos + ${KOKKOS_LD_FLAGS} + ${KOKKOS_LIBS} + ) + + # Install the kokkos library + INSTALL (TARGETS kokkos + EXPORT KokkosTargets + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) + + + # Install the kokkos headers + INSTALL (DIRECTORY + EXPORT KokkosTargets + ${Kokkos_SOURCE_DIR}/core/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + INSTALL (DIRECTORY + EXPORT KokkosTargets + ${Kokkos_SOURCE_DIR}/containers/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + INSTALL (DIRECTORY + EXPORT KokkosTargets + ${Kokkos_SOURCE_DIR}/algorithms/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + + INSTALL (FILES + ${Kokkos_BINARY_DIR}/KokkosCore_config.h + DESTINATION ${KOKKOS_HEADER_DIR} + ) + + include_directories(${Kokkos_BINARY_DIR}) + include_directories(${Kokkos_SOURCE_DIR}/core/src) + include_directories(${Kokkos_SOURCE_DIR}/containers/src) + include_directories(${Kokkos_SOURCE_DIR}/algorithms/src) + + + SET (Kokkos_LIBRARIES_NAMES kokkos) + +endif() + +# Add all targets to the build-tree export set +export(TARGETS ${Kokkos_LIBRARIES_NAMES} + FILE "${Kokkos_BINARY_DIR}/KokkosTargets.cmake") + +# Export the package for use from the build-tree +# (this registers the build-tree with a global CMake-registry) +export(PACKAGE Kokkos) + +# Create the KokkosConfig.cmake and KokkosConfigVersion files +file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}" + "${INSTALL_INCLUDE_DIR}") +# ... for the build tree +set(CONF_INCLUDE_DIRS "${Kokkos_SOURCE_DIR}" "${Kokkos_BINARY_DIR}") +configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in + "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" @ONLY) +# ... for the install tree +set(CONF_INCLUDE_DIRS "\${Kokkos_CMAKE_DIR}/${REL_INCLUDE_DIR}") +configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in + "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" @ONLY) + +# Install the KokkosConfig.cmake and KokkosConfigVersion.cmake +install(FILES + "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" + DESTINATION "${INSTALL_CMAKE_DIR}") + +# Install the export set for use with the install-tree +INSTALL(EXPORT KokkosTargets DESTINATION + "${INSTALL_CMAKE_DIR}") diff --git a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt index 961e4186ec6e0fd24c3b71bddcbcbaa2873a41ca..0f2448781425bdf8b4099ab9c60017811bda840b 100644 --- a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt +++ b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt @@ -60,34 +60,12 @@ Step 2: // -------------------------------------------------------------------------------- // Step 3: - 3.1. Build and test Trilinos with 3 different configurations; a configure-all script is provided in Trilinos and should be modified to test each of the following 3 configurations with appropriate environment variable(s): + 3.1. Build and test Trilinos with 4 different configurations; Run scripts for white and shepard are provided in kokkos/config/trilinos-integration - - GCC/4.7.2-OpenMP/Complex - Run tests with the following environment variable: + Usually its a good idea to run those script via nohup. + You can run all four at the same time, use separate directories for each. - export OMP_NUM_THREADS=2 - - - - Intel/15.0.2-Serial/NoComplex - - - - GCC/4.8.4/CUDA/7.5.18-Cuda/Serial/NoComplex - Run tests with the following environment variables: - - export CUDA_LAUNCH_BLOCKING=1 - export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 - - - mkdir Build - cd Build - cp TRILINOS_PATH/sampleScripts/Sandia-SEMS/configure-all ./ - ** Set the path to Trilinos appropriately within the configure-all script ** - source $SEMS_MODULE_ROOT/utils/sems-modules-init.sh kokkos - source configure-all - make -k (-k means "keep going" to get past build errors; -j12 can also be specified to build with 12 threads, for example) - ctest - - 3.2. Compare the failed test output to the test output on the dashboard ( testing.sandia.gov/cdash select Trilinos ); investigate and fix problems if new tests fail after the Kokkos snapshot + 3.2. Compare the failed test output between the pristine and the updated runs; investigate and fix problems if new tests fail after the Kokkos snapshot // -------------------------------------------------------------------------------- // @@ -134,7 +112,7 @@ Step 4: Once all Trilinos tests pass promote Kokkos develop branch to master on master: sha1 develop: sha1 - git push --follow-tags origin master + 4.4. Do NOT push yet // -------------------------------------------------------------------------------- // @@ -156,9 +134,15 @@ Step 5: python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages - 5.3. Push the updated develop branch of Trilinos to Github - congratulations!!! + 5.3. Run checkin-test to push to trilinos using the CI build modules (gcc/4.9.3) - (From Trilinos directory): - git push + The modules are listed in kokkos/config/trilinos-integration/checkin-test + Run checkin-test, forward dependencies and optional dependencies must be enabled + If push failed because someone else clearly broke something, push manually. + If push failed for unclear reasons, investigate, fix, and potentially start over from step 2 after reseting your local kokkos/master branch + +Step 6: Push Kokkos to master + + git push --follow-tags origin master // -------------------------------------------------------------------------------- // diff --git a/lib/kokkos/config/kokkos_dev/config-core-all.sh b/lib/kokkos/config/kokkos_dev/config-core-all.sh index d4fb25a8e139c315a862306173a0b1d2a07e7cbd..1867de7204af4e1a0d0a83a881937256a4c51ee7 100755 --- a/lib/kokkos/config/kokkos_dev/config-core-all.sh +++ b/lib/kokkos/config/kokkos_dev/config-core-all.sh @@ -13,7 +13,7 @@ # module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu # # The 'nvcc-wrapper' module should load a script that matches -# kokkos/config/nvcc_wrapper +# kokkos/bin/nvcc_wrapper # #----------------------------------------------------------------------------- # Source and installation directories: diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh index c2e17bb9443ad37576b490149d63e1d7b9f9b1ef..5a6cc1493eb7cdbca7ac29ff2c2104249116d061 100755 --- a/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh +++ b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh @@ -13,7 +13,7 @@ # module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu # # The 'nvcc-wrapper' module should load a script that matches -# kokkos/config/nvcc_wrapper +# kokkos/bin/nvcc_wrapper # #----------------------------------------------------------------------------- # Source and installation directories: diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh index 39b72d5ce136ff2ea00c6e1cc4a049eb02d606ee..606755da81f0cb7afa16c136798e15155c02b7d5 100755 --- a/lib/kokkos/config/kokkos_dev/config-core-cuda.sh +++ b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh @@ -13,7 +13,7 @@ # module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu # # The 'nvcc-wrapper' module should load a script that matches -# kokkos/config/nvcc_wrapper +# kokkos/bin/nvcc_wrapper # #----------------------------------------------------------------------------- # Source and installation directories: diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt index 9eaecb5031b1328989e114b50a86ac07c78b8e29..cc6f4c97d74930de20e63dd39d7879bdfde728c6 100644 --- a/lib/kokkos/config/master_history.txt +++ b/lib/kokkos/config/master_history.txt @@ -5,4 +5,5 @@ tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304 tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966 tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6 -tag: 2.03.00 date: 04:25:2017 master: 120d9ce7 develop: 015ba641 +tag: 2.03.00 date: 04:25:2017 master: 120d9ce7 develop: 015ba641 +tag: 2.03.05 date: 05:27:2017 master: 36b92f43 develop: 79073186 diff --git a/lib/kokkos/config/snapshot.py b/lib/kokkos/config/snapshot.py index d816cd0c9c1962c0c95c672467319474a72669fb..bfa97bf48a2909fe5395d2cea08b6eb6336d6ee3 100755 --- a/lib/kokkos/config/snapshot.py +++ b/lib/kokkos/config/snapshot.py @@ -27,7 +27,7 @@ import subprocess, argparse, re, doctest, os, datetime, traceback def parse_cmdline(description): parser = argparse.ArgumentParser(usage="snapshot.py [options] source destination", description=description) - parser.add_argument("-n", "--no-comit", action="store_false", dest="create_commit", default=True, + parser.add_argument("-n", "--no-commit", action="store_false", dest="create_commit", default=True, help="Do not perform a commit or create a commit message.") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose_mode", default=False, help="Enable verbose mode.") @@ -39,6 +39,8 @@ def parse_cmdline(description): help="Type of repository of the source, use none to skip all repository operations.") parser.add_argument("--dest-repo", choices=["git","none"], default="", help="Type of repository of the destination, use none to skip all repository operations.") + parser.add_argument("--small", action="store_true", dest="small_mode", + help="Don't include tests and other extra files when copying.") parser.add_argument("source", help="Source project to snapshot from.") parser.add_argument("destination", help="Destination to snapshot too.") @@ -58,9 +60,9 @@ def validate_options(options): options.source = os.path.abspath(options.source) options.destination = os.path.abspath(options.destination) - + if os.path.exists(options.source): - apparent_source_repo_type, source_root = deterimine_repo_type(options.source) + apparent_source_repo_type, source_root = determine_repo_type(options.source) else: raise RuntimeError("Could not find source directory of %s." % options.source) options.source_root = source_root @@ -69,7 +71,7 @@ def validate_options(options): print "Could not find destination directory of %s so it will be created." % options.destination os.makedirs(options.destination) - apparent_dest_repo_type, dest_root = deterimine_repo_type(options.destination) + apparent_dest_repo_type, dest_root = determine_repo_type(options.destination) options.dest_root = dest_root #error on svn repo types for now @@ -111,7 +113,7 @@ def run_cmd(cmd, options, working_dir="."): print "==== %s stderr ====" % cmd_str print proc_stderr print "==== %s stderr ====" % cmd_str - + if ret_val != 0: raise RuntimeError("Command '%s' failed with error code %d. Error message:%s%s%sstdout:%s" % \ (cmd_str, ret_val, os.linesep, proc_stderr, os.linesep, proc_stdout)) @@ -119,7 +121,7 @@ def run_cmd(cmd, options, working_dir="."): return proc_stdout, proc_stderr #end run_cmd -def deterimine_repo_type(location): +def determine_repo_type(location): apparent_repo_type = "none" while location != "": @@ -133,16 +135,32 @@ def deterimine_repo_type(location): location = location[:location.rfind(os.sep)] return apparent_repo_type, location - -#end deterimine_repo_type +#end determine_repo_type def rsync(source, dest, options): rsync_cmd = ["rsync", "-ar", "--delete"] if options.debug_mode: rsync_cmd.append("-v") + if options.small_mode or options.source_repo == "git": + rsync_cmd.append("--delete-excluded") + + if options.small_mode: + rsync_cmd.append("--include=config/master_history.txt") + rsync_cmd.append("--include=cmake/tpls") + rsync_cmd.append("--exclude=benchmarks/") + rsync_cmd.append("--exclude=config/*") + rsync_cmd.append("--exclude=doc/") + rsync_cmd.append("--exclude=example/") + rsync_cmd.append("--exclude=tpls/") + rsync_cmd.append("--exclude=HOW_TO_SNAPSHOT") + rsync_cmd.append("--exclude=unit_test") + rsync_cmd.append("--exclude=unit_tests") + rsync_cmd.append("--exclude=perf_test") + rsync_cmd.append("--exclude=performance_tests") + if options.source_repo == "git": - rsync_cmd.append("--exclude=.git") + rsync_cmd.append("--exclude=.git*") rsync_cmd.append(options.source) rsync_cmd.append(options.destination) @@ -171,28 +189,27 @@ def find_git_commit_information(options): ('sems', 'software.sandia.gov:/git/sems') """ git_log_cmd = ["git", "log", "-1"] - + output, error = run_cmd(git_log_cmd, options, options.source) - + commit_match = re.match("commit ([0-9a-fA-F]+)", output) commit_id = commit_match.group(1) commit_log = output - + git_remote_cmd = ["git", "remote", "-v"] output, error = run_cmd(git_remote_cmd, options, options.source) - + remote_match = re.search("origin\s([^ ]*/([^ ]+))", output, re.MULTILINE) if not remote_match: raise RuntimeError("Could not find origin of repo at %s. Consider using none for source repo type." % (options.source)) source_location = remote_match.group(1) source_name = remote_match.group(2).strip() - + if source_name[-1] == "/": source_name = source_name[:-1] return commit_id, commit_log, source_name, source_location - #end find_git_commit_information def do_git_commit(message, options): @@ -201,10 +218,10 @@ def do_git_commit(message, options): git_add_cmd = ["git", "add", "-A"] run_cmd(git_add_cmd, options, options.destination) - + git_commit_cmd = ["git", "commit", "-m%s" % message] run_cmd(git_commit_cmd, options, options.destination) - + git_log_cmd = ["git", "log", "--format=%h", "-1"] commit_sha1, error = run_cmd(git_log_cmd, options, options.destination) @@ -214,7 +231,7 @@ def do_git_commit(message, options): def verify_git_repo_clean(location, options): git_status_cmd = ["git", "status", "--porcelain"] output, error = run_cmd(git_status_cmd, options, location) - + if output != "": if options.no_validate_repo == False: raise RuntimeError("%s is not clean.%sPlease commit or stash all changes before running snapshot." @@ -223,7 +240,6 @@ def verify_git_repo_clean(location, options): print "WARNING: %s is not clean. Proceeding anyway." % location print "WARNING: This could lead to differences in the source and destination." print "WARNING: It could also lead to extra files being included in the snapshot commit." - #end verify_git_repo_clean def main(options): @@ -238,14 +254,14 @@ def main(options): commit_log = "Unknown commit from %s snapshotted at: %s" % (options.source, datetime.datetime.now()) repo_name = options.source repo_location = options.source - + commit_message = create_commit_message(commit_id, commit_log, repo_name, repo_location) + os.linesep*2 - + if options.dest_repo == "git": verify_git_repo_clean(options.destination, options) rsync(options.source, options.destination, options) - + if options.dest_repo == "git": do_git_commit(commit_message, options) elif options.dest_repo == "none": @@ -256,10 +272,6 @@ def main(options): cwd = os.getcwd() print "No commit done by request. Please use file at:" print "%s%sif you wish to commit this to a repo later." % (cwd+"/"+file_name, os.linesep) - - - - #end main if (__name__ == "__main__"): @@ -267,7 +279,7 @@ if (__name__ == "__main__"): doctest.testmod() sys.exit(0) - try: + try: options = parse_cmdline(__doc__) main(options) except RuntimeError, e: @@ -275,5 +287,5 @@ if (__name__ == "__main__"): if "--debug" in sys.argv: traceback.print_exc() sys.exit(1) - else: + else: sys.exit(0) diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia index 6909606643df6b83c2dc77c2469768e02a13844d..8e1246bf8bd85b36f0fae947a1ee280e820426e9 100755 --- a/lib/kokkos/config/test_all_sandia +++ b/lib/kokkos/config/test_all_sandia @@ -24,6 +24,8 @@ elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name fi elif [[ "$HOSTNAME" =~ apollo ]]; then MACHINE=apollo +elif [[ "$HOSTNAME" =~ sullivan ]]; then + MACHINE=sullivan elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then MACHINE=sems else @@ -152,7 +154,7 @@ if [ "$MACHINE" = "sems" ]; then "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" - "cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) @@ -164,6 +166,7 @@ if [ "$MACHINE" = "sems" ]; then "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" @@ -184,7 +187,7 @@ elif [ "$MACHINE" = "white" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" - "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) if [ -z "$ARCH_FLAG" ]; then @@ -221,7 +224,7 @@ elif [ "$MACHINE" = "sullivan" ]; then BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS") + COMPILERS=("gcc/6.1.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS") if [ -z "$ARCH_FLAG" ]; then ARCH_FLAG="--arch=ARMv8-ThunderX" @@ -278,11 +281,11 @@ elif [ "$MACHINE" = "apollo" ]; then "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" "clang/head $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS" - "cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "clang/head $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS" "gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" @@ -295,8 +298,8 @@ elif [ "$MACHINE" = "apollo" ]; then "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" - "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi diff --git a/lib/kokkos/config/testing_scripts/jenkins_test_driver b/lib/kokkos/config/testing_scripts/jenkins_test_driver index 9cba7fa51856b59706a8c6655aef42b39847766b..f393940304ee8e679440871414376283f8eef9a7 100755 --- a/lib/kokkos/config/testing_scripts/jenkins_test_driver +++ b/lib/kokkos/config/testing_scripts/jenkins_test_driver @@ -48,7 +48,7 @@ esac #nvcc wrapper and make the wrapper the compiler. if [ $cuda_compiler != "" ]; then export NVCC_WRAPPER_DEFAULT_COMPILER=$compiler - compiler=$kokkos_path/config/nvcc_wrapper + compiler=$kokkos_path/bin/nvcc_wrapper fi if [ $host_compiler_brand == "intel" -a $cuda_compiler != "" ]; then diff --git a/lib/kokkos/config/trilinos-integration/checkin-test b/lib/kokkos/config/trilinos-integration/checkin-test new file mode 100644 index 0000000000000000000000000000000000000000..92a1b1c06882d3ee73e9c27f5054bd3544acdd0e --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/checkin-test @@ -0,0 +1,4 @@ +module purge +module load sems-env sems-gcc/4.9.3 sems-openmpi/1.10.1 sems-hdf5/1.8.12/parallel sems-netcdf/4.3.2/parallel sems-python/2.7.9 sems-zlib/1.2.8/base sems-cmake/3.5.2 sems-parmetis/4.0.3/64bit_parallel sems-scotch/6.0.3/nopthread_64bit_parallel sems-boost/1.59.0/base + +#Run Trilinos CheckinTest diff --git a/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh b/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh index 2692f76038f2f7e70a7ba205f493949bf02d94a3..b81a3b1566f103ac5284b78d17378467de691f39 100755 --- a/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh +++ b/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh @@ -1,5 +1,18 @@ #!/bin/bash -le +TRILINOS_UPDATE_BRANCH=$1 +TRILINOS_PRISTINE_BRANCH=$2 + +if [ -z $TRILINOS_UPDATE_BRANCH ] +then + TRILINOS_UPDATE_BRANCH=develop +fi + +if [ -z $TRILINOS_PRISTINE_BRANCH ] +then + TRILINOS_PRISTINE_BRANCH=develop +fi + export TRILINOS_UPDATED_PATH=${PWD}/trilinos-update export TRILINOS_PRISTINE_PATH=${PWD}/trilinos-pristine @@ -16,8 +29,8 @@ if [ ! -d "${TRILINOS_PRISTINE_PATH}" ]; then fi cd ${TRILINOS_UPDATED_PATH} -git checkout develop -git reset --hard origin/develop +git checkout $TRILINOS_UPDATE_BRANCH +git reset --hard origin/$TRILINOS_UPDATE_BRANCH git pull cd .. @@ -28,18 +41,14 @@ echo "" echo "" echo "Trilinos State:" git log --pretty=oneline --since=7.days -SHA=`git log --pretty=oneline --since=7.days | head -n 2 | tail -n 1 | awk '{print $1}'` cd .. cd ${TRILINOS_PRISTINE_PATH} git status -git log --pretty=oneline --since=7.days -echo "Checkout develop" -git checkout develop +echo "Checkout $TRILINOS_PRISTINE_BRANCH" +git checkout $TRILINOS_PRISTINE_BRANCH echo "Pull" git pull -echo "Checkout SHA" -git checkout ${SHA} cd .. cd ${TRILINOS_PRISTINE_PATH} diff --git a/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel new file mode 100755 index 0000000000000000000000000000000000000000..23968e8c0f8581866c6f7ed99ef3417ffc4c0442 --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel @@ -0,0 +1,60 @@ +#!/bin/bash -el +ulimit -c 0 +module load devpack/openmpi/1.10.0/intel/16.1.056/cuda/none + +KOKKOS_BRANCH=$1 +TRILINOS_UPDATE_BRANCH=$2 +TRILINOS_PRISTINE_BRANCH=$3 + +if [ -z $KOKKOS_BRANCH ] +then + KOKKOS_BRANCH=develop +fi + +if [ -z $TRILINOS_UPDATE_BRANCH ] +then + TRILINOS_UPDATE_BRANCH=develop +fi + +if [ -z $TRILINOS_PRISTINE_BRANCH ] +then + TRILINOS_PRISTINE_BRANCH=develop +fi + +export OMP_NUM_THREADS=8 +export JENKINS_DO_CUDA=OFF +export JENKINS_DO_OPENMP=OFF +export JENKINS_DO_PTHREAD=ON +export JENKINS_DO_SERIAL=OFF +export JENKINS_DO_COMPLEX=OFF + +export ARCH_CXX_FLAG="-xCORE-AVX2 -mkl" +export ARCH_C_FLAG="-xCORE-AVX2 -mkl" +export BLAS_LIBRARIES="-mkl;${MKLROOT}/lib/intel64/libmkl_intel_lp64.a;${MKLROOT}/lib/intel64/libmkl_intel_thread.a;${MKLROOT}/lib/intel64/libmkl_core.a" +export LAPACK_LIBRARIES=${BLAS_LIBRARIES} + +export JENKINS_DO_TESTS=ON +export JENKINS_DO_EXAMPLES=ON +export JENKINS_DO_SHARED=OFF + +export QUEUE=haswell + + +module load python + + +export KOKKOS_PATH=${PWD}/kokkos + +#Already done: +if [ ! -d "${KOKKOS_PATH}" ]; then + git clone https://github.com/kokkos/kokkos ${KOKKOS_PATH} +fi + +cd ${KOKKOS_PATH} +git checkout $KOKKOS_BRANCH +git pull +cd .. + +source ${KOKKOS_PATH}/config/trilinos-integration/prepare_trilinos_repos.sh $TRILINOS_UPDATE_BRANCH $TRILINOS_PRISTINE_BRANCH + +${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/run_repo_comparison_slurm ${TRILINOS_UPDATED_PATH} ${TRILINOS_PRISTINE_PATH} ${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/configure-testbeds-jenkins-all TestCompare ${QUEUE} diff --git a/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel new file mode 100755 index 0000000000000000000000000000000000000000..964de3a0026f7ccf7fdbc776e546d3787c856c53 --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel @@ -0,0 +1,60 @@ +#!/bin/bash -el +ulimit -c 0 +module load devpack/openmpi/1.10.0/intel/16.1.056/cuda/none + +KOKKOS_BRANCH=$1 +TRILINOS_UPDATE_BRANCH=$2 +TRILINOS_PRISTINE_BRANCH=$3 + +if [ -z $KOKKOS_BRANCH ] +then + KOKKOS_BRANCH=develop +fi + +if [ -z $TRILINOS_UPDATE_BRANCH ] +then + TRILINOS_UPDATE_BRANCH=develop +fi + +if [ -z $TRILINOS_PRISTINE_BRANCH ] +then + TRILINOS_PRISTINE_BRANCH=develop +fi + +export OMP_NUM_THREADS=8 +export JENKINS_DO_CUDA=OFF +export JENKINS_DO_OPENMP=OFF +export JENKINS_DO_PTHREAD=OFF +export JENKINS_DO_SERIAL=ON +export JENKINS_DO_COMPLEX=ON + +export ARCH_CXX_FLAG="-xCORE-AVX2 -mkl" +export ARCH_C_FLAG="-xCORE-AVX2 -mkl" +export BLAS_LIBRARIES="-mkl;${MKLROOT}/lib/intel64/libmkl_intel_lp64.a;${MKLROOT}/lib/intel64/libmkl_intel_thread.a;${MKLROOT}/lib/intel64/libmkl_core.a" +export LAPACK_LIBRARIES=${BLAS_LIBRARIES} + +export JENKINS_DO_TESTS=ON +export JENKINS_DO_EXAMPLES=ON +export JENKINS_DO_SHARED=OFF + +export QUEUE=haswell + + +module load python + + +export KOKKOS_PATH=${PWD}/kokkos + +#Already done: +if [ ! -d "${KOKKOS_PATH}" ]; then + git clone https://github.com/kokkos/kokkos ${KOKKOS_PATH} +fi + +cd ${KOKKOS_PATH} +git checkout $KOKKOS_BRANCH +git pull +cd .. + +source ${KOKKOS_PATH}/config/trilinos-integration/prepare_trilinos_repos.sh $TRILINOS_UPDATE_BRANCH $TRILINOS_PRISTINE_BRANCH + +${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/run_repo_comparison_slurm ${TRILINOS_UPDATED_PATH} ${TRILINOS_PRISTINE_PATH} ${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/configure-testbeds-jenkins-all TestCompare ${QUEUE} diff --git a/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_cuda b/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_cuda new file mode 100755 index 0000000000000000000000000000000000000000..52af024858de4c59796773d00d47bc9641e6ab85 --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_cuda @@ -0,0 +1,63 @@ +#!/bin/bash -el +ulimit -c 0 + +KOKKOS_BRANCH=$1 +TRILINOS_UPDATE_BRANCH=$2 +TRILINOS_PRISTINE_BRANCH=$3 + +if [ -z $KOKKOS_BRANCH ] +then + KOKKOS_BRANCH=develop +fi + +if [ -z $TRILINOS_UPDATE_BRANCH ] +then + TRILINOS_UPDATE_BRANCH=develop +fi + +if [ -z $TRILINOS_PRISTINE_BRANCH ] +then + TRILINOS_PRISTINE_BRANCH=develop +fi + +module load devpack/openmpi/1.10.4/gcc/5.4.0/cuda/8.0.44 +export OMP_NUM_THREADS=8 +export JENKINS_DO_CUDA=ON +export JENKINS_DO_OPENMP=OFF +export JENKINS_DO_PTHREAD=OFF +export JENKINS_DO_SERIAL=ON +export JENKINS_DO_COMPLEX=OFF + +export JENKINS_ARCH_CXX_FLAG="-mcpu=power8 -arch=sm_37" +export JENKINS_ARCH_C_FLAG="-mcpu=power8" +export BLAS_LIBRARIES="${BLAS_ROOT}/lib/libblas.a;gfortran;gomp" +export LAPACK_LIBRARIES="${LAPACK_ROOT}/lib/liblapack.a;gfortran;gomp" + +export JENKINS_DO_TESTS=ON +export JENKINS_DO_EXAMPLES=ON + +export QUEUE=rhel7F + +module load python + +export KOKKOS_PATH=${PWD}/kokkos + +#Already done: +if [ ! -d "${KOKKOS_PATH}" ]; then + git clone https://github.com/kokkos/kokkos ${KOKKOS_PATH} +fi + +export OMPI_CXX=${KOKKOS_PATH}/bin/nvcc_wrapper + +cd ${KOKKOS_PATH} +git checkout $KOKKOS_BRANCH +git pull +cd .. + +export CUDA_LAUNCH_BLOCKING=1 +export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 + +source ${KOKKOS_PATH}/config/trilinos-integration/prepare_trilinos_repos.sh $TRILINOS_UPDATE_BRANCH $TRILINOS_PRISTINE_BRANCH + +${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/run_repo_comparison_lsf ${TRILINOS_UPDATED_PATH} ${TRILINOS_PRISTINE_PATH} ${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/configure-testbeds-jenkins-all TestCompare ${QUEUE} + diff --git a/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_omp b/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_omp new file mode 100755 index 0000000000000000000000000000000000000000..452165eef2e869601693d94711a134c7b04f9927 --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_omp @@ -0,0 +1,58 @@ +#!/bin/bash -el +ulimit -c 0 + +KOKKOS_BRANCH=$1 +TRILINOS_UPDATE_BRANCH=$2 +TRILINOS_PRISTINE_BRANCH=$3 + +if [ -z $KOKKOS_BRANCH ] +then + KOKKOS_BRANCH=develop +fi + +if [ -z $TRILINOS_UPDATE_BRANCH ] +then + TRILINOS_UPDATE_BRANCH=develop +fi + +if [ -z $TRILINOS_PRISTINE_BRANCH ] +then + TRILINOS_PRISTINE_BRANCH=develop +fi + +module load devpack/openmpi/1.10.4/gcc/5.4.0/cuda/8.0.44 +export OMP_NUM_THREADS=8 +export JENKINS_DO_CUDA=OFF +export JENKINS_DO_OPENMP=ON +export JENKINS_DO_PTHREAD=OFF +export JENKINS_DO_SERIAL=OFF +export JENKINS_DO_COMPLEX=OFF + +export JENKINS_ARCH_CXX_FLAG="-mcpu=power8" +export JENKINS_ARCH_C_FLAG="-mcpu=power8" +export BLAS_LIBRARIES="${BLAS_ROOT}/lib/libblas.a;gfortran;gomp" +export LAPACK_LIBRARIES="${LAPACK_ROOT}/lib/liblapack.a;gfortran;gomp" + +export JENKINS_DO_TESTS=ON +export JENKINS_DO_EXAMPLES=ON + +export QUEUE=rhel7F + +module load python + +export KOKKOS_PATH=${PWD}/kokkos + +#Already done: +if [ ! -d "${KOKKOS_PATH}" ]; then + git clone https://github.com/kokkos/kokkos ${KOKKOS_PATH} +fi + +cd ${KOKKOS_PATH} +git checkout $KOKKOS_BRANCH +git pull +cd .. + +source ${KOKKOS_PATH}/config/trilinos-integration/prepare_trilinos_repos.sh $TRILINOS_UPDATE_BRANCH $TRILINOS_PRISTINE_BRANCH + +${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/run_repo_comparison_lsf ${TRILINOS_UPDATED_PATH} ${TRILINOS_PRISTINE_PATH} ${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/configure-testbeds-jenkins-all TestCompare ${QUEUE} + diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile index fa3bc777013fd5148a2a49c26c00df4aba9786e7..edaaf1ee51f8bbe5b41a1efb418a0fb83dd1de0b 100644 --- a/lib/kokkos/containers/performance_tests/Makefile +++ b/lib/kokkos/containers/performance_tests/Makefile @@ -8,7 +8,7 @@ default: build_all echo "End Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/config/nvcc_wrapper + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper else CXX = g++ endif @@ -21,8 +21,8 @@ include $(KOKKOS_PATH)/Makefile.kokkos KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests -TEST_TARGETS = -TARGETS = +TEST_TARGETS = +TARGETS = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o @@ -65,7 +65,7 @@ build_all: $(TARGETS) test: $(TEST_TARGETS) -clean: kokkos-clean +clean: kokkos-clean rm -f *.o $(TARGETS) # Compilation rules @@ -73,6 +73,5 @@ clean: kokkos-clean %.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc - diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp index d5cad06a47d3944c37e56c0a3d0bf4d40c0941b8..208387425f07fe5bef5803715d8b0eda27d25cb4 100644 --- a/lib/kokkos/containers/performance_tests/TestCuda.cpp +++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,12 +36,15 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include <stdint.h> +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_CUDA ) + +#include <cstdint> #include <string> #include <iostream> #include <iomanip> @@ -52,8 +55,6 @@ #include <Kokkos_Core.hpp> -#if defined( KOKKOS_ENABLE_CUDA ) - #include <TestDynRankView.hpp> #include <Kokkos_UnorderedMap.hpp> @@ -79,7 +80,7 @@ protected: } }; -TEST_F( cuda, dynrankview_perf ) +TEST_F( cuda, dynrankview_perf ) { std::cout << "Cuda" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; @@ -105,5 +106,6 @@ TEST_F( cuda, unordered_map_performance_far) } } - +#else +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTCUDA_PREVENT_EMPTY_LINK_ERROR() {} #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp index d96a3f74324046862b4740c4d9c3ae7a178937d8..4c0ccb6b88407b25937a342c806944831f15746b 100644 --- a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -1,13 +1,13 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -263,3 +263,4 @@ void test_dynrankview_op_perf( const int par_size ) } //end Performance #endif + diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp index da74d32ac1fad932f7354d73384ddcb9bec75354..b674ec4a7450b6a3ef0f4077f837ab8d51c92d9e 100644 --- a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp +++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_OPENMP ) + #include <gtest/gtest.h> #include <Kokkos_Core.hpp> @@ -93,7 +96,7 @@ protected: } }; -TEST_F( openmp, dynrankview_perf ) +TEST_F( openmp, dynrankview_perf ) { std::cout << "OpenMP" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; @@ -137,4 +140,7 @@ TEST_F( openmp, unordered_map_performance_far) } } // namespace test +#else +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTOPENMP_PREVENT_EMPTY_LINK_ERROR() {} +#endif diff --git a/lib/kokkos/containers/performance_tests/TestThreads.cpp b/lib/kokkos/containers/performance_tests/TestThreads.cpp index 4179b7de4c79cc095d83ef4fcdd179593a575f08..a8910a3c72a11eb26c1e7707e160b602d0ef220b 100644 --- a/lib/kokkos/containers/performance_tests/TestThreads.cpp +++ b/lib/kokkos/containers/performance_tests/TestThreads.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_THREADS ) + #include <gtest/gtest.h> #include <Kokkos_Core.hpp> @@ -87,7 +90,7 @@ protected: } }; -TEST_F( threads, dynrankview_perf ) +TEST_F( threads, dynrankview_perf ) { std::cout << "Threads" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; @@ -132,4 +135,7 @@ TEST_F( threads, unordered_map_performance_far) } // namespace Performance +#else +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTTHREADS_PREVENT_EMPTY_LINK_ERROR() {} +#endif diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp index 74da5f61b5d1e9506bf426595e0de9574384662b..7714506e9217b3dbebc900244398e9c319afa383 100644 --- a/lib/kokkos/containers/src/Kokkos_Bitset.hpp +++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -435,3 +435,4 @@ void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src) } // namespace Kokkos #endif //KOKKOS_BITSET_HPP + diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp index 3a0196ee4c5ea48fcd9e1895212f655c6b81e6a1..937eab0d889d70ae9b289a12a7083037601347d0 100644 --- a/lib/kokkos/containers/src/Kokkos_DualView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -442,6 +442,17 @@ public: modified_host () = (modified_device () > modified_host () ? modified_device () : modified_host ()) + 1; } + +#ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + if (modified_host() && modified_device()) { + std::string msg = "Kokkos::DualView::modify ERROR: "; + msg += "Concurrent modification of host and device views "; + msg += "in DualView \""; + msg += d_view.label(); + msg += "\"\n"; + Kokkos::abort(msg.c_str()); + } +#endif } //@} @@ -624,3 +635,4 @@ deep_copy (const ExecutionSpace& exec , } // namespace Kokkos #endif + diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp index acb37f7f7520d104659b028991d624d5da81c81c..8e464506f9da6ed12278ed6435f48f63ab56e6aa 100644 --- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -140,21 +140,21 @@ struct DynRankDimTraits { static typename std::enable_if< (std::is_same<Layout , Kokkos::LayoutStride>::value) , Layout>::type createLayout( const Layout& layout ) { return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 - , layout.stride[0] + , layout.stride[0] , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 - , layout.stride[1] + , layout.stride[1] , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 - , layout.stride[2] + , layout.stride[2] , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 - , layout.stride[3] + , layout.stride[3] , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 - , layout.stride[4] + , layout.stride[4] , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 - , layout.stride[5] + , layout.stride[5] , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 - , layout.stride[6] + , layout.stride[6] , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 - , layout.stride[7] + , layout.stride[7] ); } @@ -188,7 +188,7 @@ struct DynRankDimTraits { KOKKOS_INLINE_FUNCTION static typename std::enable_if< (std::is_same<Layout , Kokkos::LayoutRight>::value || std::is_same<Layout , Kokkos::LayoutLeft>::value) && std::is_integral<iType>::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank ) { - return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) + return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) , dynrank > 1 ? layout.dimension[1] : ~size_t(0) , dynrank > 2 ? layout.dimension[2] : ~size_t(0) , dynrank > 3 ? layout.dimension[3] : ~size_t(0) @@ -205,27 +205,27 @@ struct DynRankDimTraits { static typename std::enable_if< (std::is_same<Layout , Kokkos::LayoutStride>::value) && std::is_integral<iType>::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank ) { return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) - , dynrank > 0 ? layout.stride[0] : (0) + , dynrank > 0 ? layout.stride[0] : (0) , dynrank > 1 ? layout.dimension[1] : ~size_t(0) - , dynrank > 1 ? layout.stride[1] : (0) + , dynrank > 1 ? layout.stride[1] : (0) , dynrank > 2 ? layout.dimension[2] : ~size_t(0) - , dynrank > 2 ? layout.stride[2] : (0) + , dynrank > 2 ? layout.stride[2] : (0) , dynrank > 3 ? layout.dimension[3] : ~size_t(0) - , dynrank > 3 ? layout.stride[3] : (0) + , dynrank > 3 ? layout.stride[3] : (0) , dynrank > 4 ? layout.dimension[4] : ~size_t(0) - , dynrank > 4 ? layout.stride[4] : (0) + , dynrank > 4 ? layout.stride[4] : (0) , dynrank > 5 ? layout.dimension[5] : ~size_t(0) - , dynrank > 5 ? layout.stride[5] : (0) + , dynrank > 5 ? layout.stride[5] : (0) , dynrank > 6 ? layout.dimension[6] : ~size_t(0) - , dynrank > 6 ? layout.stride[6] : (0) + , dynrank > 6 ? layout.stride[6] : (0) , dynrank > 7 ? layout.dimension[7] : ~size_t(0) - , dynrank > 7 ? layout.stride[7] : (0) + , dynrank > 7 ? layout.stride[7] : (0) ); } /** \brief Debug bounds-checking routines */ -// Enhanced debug checking - most infrastructure matches that of functions in +// Enhanced debug checking - most infrastructure matches that of functions in // Kokkos_ViewMapping; additional checks for extra arguments beyond rank are 0 template< unsigned , typename iType0 , class MapType > KOKKOS_INLINE_FUNCTION @@ -235,20 +235,20 @@ bool dyn_rank_view_verify_operator_bounds( const iType0 & , const MapType & ) template< unsigned R , typename iType0 , class MapType , typename iType1 , class ... Args > KOKKOS_INLINE_FUNCTION bool dyn_rank_view_verify_operator_bounds - ( const iType0 & rank + ( const iType0 & rank , const MapType & map , const iType1 & i , Args ... args ) { - if ( static_cast<iType0>(R) < rank ) { + if ( static_cast<iType0>(R) < rank ) { return ( size_t(i) < map.extent(R) ) && dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... ); } else if ( i != 0 ) { printf("DynRankView Debug Bounds Checking Error: at rank %u\n Extra arguments beyond the rank must be zero \n",R); return ( false ) - && dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... ); + && dyn_rank_view_verify_operator_bounds<R+1>( rank , map , args ... ); } else { return ( true ) @@ -281,20 +281,24 @@ void dyn_rank_view_error_operator_bounds } // op_rank = rank of the operator version that was called -template< typename iType0 , typename iType1 , class MapType , class ... Args > +template< typename MemorySpace + , typename iType0 , typename iType1 , class MapType , class ... Args > KOKKOS_INLINE_FUNCTION void dyn_rank_view_verify_operator_bounds - ( const iType0 & op_rank , const iType1 & rank , const char* label , const MapType & map , Args ... args ) + ( const iType0 & op_rank , const iType1 & rank + , const Kokkos::Impl::SharedAllocationTracker & tracker + , const MapType & map , Args ... args ) { if ( static_cast<iType0>(rank) > op_rank ) { - Kokkos::abort( "DynRankView Bounds Checking Error: Need at least rank arguments to the operator()" ); + Kokkos::abort( "DynRankView Bounds Checking Error: Need at least rank arguments to the operator()" ); } if ( ! dyn_rank_view_verify_operator_bounds<0>( rank , map , args ... ) ) { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) enum { LEN = 1024 }; char buffer[ LEN ]; - int n = snprintf(buffer,LEN,"DynRankView bounds error of view %s (", label); + const std::string label = tracker.template get_label<MemorySpace>(); + int n = snprintf(buffer,LEN,"DynRankView bounds error of view %s (", label.c_str()); dyn_rank_view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... ); Kokkos::Impl::throw_runtime_exception(std::string(buffer)); #else @@ -347,7 +351,7 @@ private: std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value || std::is_same< typename DstTraits::array_layout - , Kokkos::LayoutStride >::value + , Kokkos::LayoutStride >::value }; public: @@ -381,9 +385,9 @@ public: } //end Impl /* \class DynRankView - * \brief Container that creates a Kokkos view with rank determined at runtime. + * \brief Container that creates a Kokkos view with rank determined at runtime. * Essentially this is a rank 7 view that wraps the access operators - * to yield the functionality of a view + * to yield the functionality of a view * * Changes from View * 1. The rank of the DynRankView is returned by the method rank() @@ -410,14 +414,14 @@ class DynRankView : public ViewTraits< DataType , Properties ... > { static_assert( !std::is_array<DataType>::value && !std::is_pointer<DataType>::value , "Cannot template DynRankView with array or pointer datatype - must be pod" ); -private: +private: template < class , class ... > friend class DynRankView ; template < class , class ... > friend class Impl::ViewMapping ; -public: +public: typedef ViewTraits< DataType , Properties ... > drvtraits ; - typedef View< DataType******* , Properties...> view_type ; + typedef View< DataType******* , Properties...> view_type ; typedef ViewTraits< DataType******* , Properties ... > traits ; @@ -430,7 +434,7 @@ private: map_type m_map ; unsigned m_rank; -public: +public: KOKKOS_INLINE_FUNCTION view_type & DownCast() const { return ( view_type & ) (*this); } KOKKOS_INLINE_FUNCTION @@ -588,7 +592,7 @@ private: // rank of the calling operator - included as first argument in ARG #define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \ DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \ - Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds ARG ; + Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds< typename traits::memory_space > ARG ; #else @@ -607,14 +611,10 @@ public: // Rank 0 KOKKOS_INLINE_FUNCTION reference_type operator()() const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map) ) - #endif + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) ) return implementation_map().reference(); - //return m_map.reference(0,0,0,0,0,0,0); + //return m_map.reference(0,0,0,0,0,0,0); } // Rank 1 @@ -624,6 +624,8 @@ public: typename std::enable_if< std::is_same<typename drvtraits::value_type, typename drvtraits::scalar_array_type>::value && std::is_integral<iType>::value, reference_type>::type operator[](const iType & i0) const { + //Phalanx is violating this, since they use the operator to access ALL elements in the allocation + //KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map) ) return data()[i0]; } @@ -647,14 +649,10 @@ public: template< typename iType > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type - operator()(const iType & i0 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) ) - #endif - return m_map.reference(i0); + operator()(const iType & i0 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) + return m_map.reference(i0); } template< typename iType > @@ -662,11 +660,7 @@ public: typename std::enable_if< !(std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type operator()(const iType & i0 ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) ) - #endif + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) return m_map.reference(i0,0,0,0,0,0,0); } @@ -674,155 +668,111 @@ public: template< typename iType0 , typename iType1 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) ) - #endif - return m_map.reference(i0,i1); + operator()(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) + return m_map.reference(i0,i1); } template< typename iType0 , typename iType1 > KOKKOS_INLINE_FUNCTION typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1) ) - #endif - return m_map.reference(i0,i1,0,0,0,0,0); + operator()(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) + return m_map.reference(i0,i1,0,0,0,0,0); } // Rank 3 template< typename iType0 , typename iType1 , typename iType2 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) ) - #endif - return m_map.reference(i0,i1,i2); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) + return m_map.reference(i0,i1,i2); } template< typename iType0 , typename iType1 , typename iType2 > KOKKOS_INLINE_FUNCTION typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2) ) - #endif - return m_map.reference(i0,i1,i2,0,0,0,0); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) + return m_map.reference(i0,i1,i2,0,0,0,0); } // Rank 4 template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) ) - #endif - return m_map.reference(i0,i1,i2,i3); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) + return m_map.reference(i0,i1,i2,i3); } template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > KOKKOS_INLINE_FUNCTION typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3) ) - #endif - return m_map.reference(i0,i1,i2,i3,0,0,0); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) + return m_map.reference(i0,i1,i2,i3,0,0,0); } // Rank 5 template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) ) - #endif - return m_map.reference(i0,i1,i2,i3,i4); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) + return m_map.reference(i0,i1,i2,i3,i4); } template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > KOKKOS_INLINE_FUNCTION typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4) ) - #endif - return m_map.reference(i0,i1,i2,i3,i4,0,0); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) + return m_map.reference(i0,i1,i2,i3,i4,0,0); } // Rank 6 template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) ) - #endif - return m_map.reference(i0,i1,i2,i3,i4,i5); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) + return m_map.reference(i0,i1,i2,i3,i4,i5); } template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > KOKKOS_INLINE_FUNCTION typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5) ) - #endif - return m_map.reference(i0,i1,i2,i3,i4,i5,0); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,0); } // Rank 7 template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value && std::is_integral<iType6>::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) ) - #endif - return m_map.reference(i0,i1,i2,i3,i4,i5,i6); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,i6); } #undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY //---------------------------------------- - // Standard constructor, destructor, and assignment operators... + // Standard constructor, destructor, and assignment operators... KOKKOS_INLINE_FUNCTION ~DynRankView() {} @@ -840,7 +790,7 @@ public: DynRankView & operator = ( const DynRankView & rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } KOKKOS_INLINE_FUNCTION - DynRankView & operator = ( DynRankView && rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + DynRankView & operator = ( DynRankView && rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } //---------------------------------------- // Compatible view copy constructor and assignment @@ -1068,7 +1018,7 @@ public: DynRankView( const Label & arg_label , typename std::enable_if< Kokkos::Experimental::Impl::is_view_label<Label>::value , - const size_t >::type arg_N0 = ~size_t(0) + const size_t >::type arg_N0 = ~size_t(0) , const size_t arg_N1 = ~size_t(0) , const size_t arg_N2 = ~size_t(0) , const size_t arg_N3 = ~size_t(0) @@ -1104,7 +1054,7 @@ public: , const size_t arg_N6 = ~size_t(0) , const size_t arg_N7 = ~size_t(0) ) - : DynRankView(Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing ), arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 ) + : DynRankView(Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing ), arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 ) {} //---------------------------------------- @@ -1182,7 +1132,7 @@ public: , const typename traits::array_layout & arg_layout ) : DynRankView( Impl::ViewCtorProp<pointer_type>( reinterpret_cast<pointer_type>( - arg_space.get_shmem( map_type::memory_span( + arg_space.get_shmem( map_type::memory_span( Impl::DynRankDimTraits<typename traits::specialize>::createLayout( arg_layout ) //is this correct? ) ) ) ) , arg_layout ) @@ -1206,7 +1156,7 @@ public: Impl::DynRankDimTraits<typename traits::specialize>::createLayout( typename traits::array_layout ( arg_N0 , arg_N1 , arg_N2 , arg_N3 - , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ) ) ) ) + , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) ) ) ) ) ) , typename traits::array_layout ( arg_N0 , arg_N1 , arg_N2 , arg_N3 @@ -1241,7 +1191,7 @@ struct ViewMapping , Kokkos::LayoutRight >::value || std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value - ) + ) ), DynRankSubviewTag >::type , SrcTraits , Args ... > @@ -1266,19 +1216,19 @@ private: typedef typename SrcTraits::value_type value_type ; - typedef value_type******* data_type ; + typedef value_type******* data_type ; public: typedef Kokkos::ViewTraits < data_type - , array_layout + , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits > traits_type ; typedef Kokkos::View < data_type - , array_layout + , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits > type ; @@ -1289,17 +1239,17 @@ public: static_assert( Kokkos::Impl::is_memory_traits< MemoryTraits >::value , "" ); typedef Kokkos::ViewTraits - < data_type + < data_type , array_layout , typename SrcTraits::device_type , MemoryTraits > traits_type ; typedef Kokkos::View - < data_type + < data_type , array_layout , typename SrcTraits::device_type , MemoryTraits > type ; - }; + }; typedef typename SrcTraits::dimension dimension ; @@ -1318,7 +1268,7 @@ public: template < typename T , class ... P > KOKKOS_INLINE_FUNCTION - static ret_type subview( const unsigned src_rank , Kokkos::Experimental::DynRankView< T , P...> const & src + static ret_type subview( const unsigned src_rank , Kokkos::Experimental::DynRankView< T , P...> const & src , Args ... args ) { @@ -1339,8 +1289,8 @@ public: ret_type dst ; - const SubviewExtents< 7 , rank > extents = - ExtentGenerator< Args ... >::generator( src.m_map.m_offset.m_dim , args... ) ; + const SubviewExtents< 7 , rank > extents = + ExtentGenerator< Args ... >::generator( src.m_map.m_offset.m_dim , args... ) ; dst_offset_type tempdst( src.m_map.m_offset , extents ) ; @@ -1392,12 +1342,12 @@ using Subdynrankview = typename Kokkos::Experimental::Impl::ViewMapping< Kokkos: template< class D , class ... P , class ...Args > KOKKOS_INLINE_FUNCTION -Subdynrankview< ViewTraits<D******* , P...> , Args... > +Subdynrankview< ViewTraits<D******* , P...> , Args... > subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args...args) { if ( src.rank() > sizeof...(Args) ) //allow sizeof...(Args) >= src.rank(), ignore the remaining args { Kokkos::abort("subdynrankview: num of args must be >= rank of the source DynRankView"); } - + typedef Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::ViewTraits< D*******, P... > , Args... > metafcn ; return metafcn::subview( src.rank() , src , args... ); @@ -1406,7 +1356,7 @@ subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args. //Wrapper to allow subview function name template< class D , class ... P , class ...Args > KOKKOS_INLINE_FUNCTION -Subdynrankview< ViewTraits<D******* , P...> , Args... > +Subdynrankview< ViewTraits<D******* , P...> , Args... > subview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args...args) { return subdynrankview( src , args... ); @@ -1508,7 +1458,7 @@ struct DynRankViewFill { }; template< class OutputView > -struct DynRankViewFill< OutputView , typename std::enable_if< OutputView::Rank == 0 >::type > { +struct DynRankViewFill< OutputView , typename std::enable_if< OutputView::Rank == 0 >::type > { DynRankViewFill( const OutputView & dst , const typename OutputView::const_value_type & src ) { Kokkos::Impl::DeepCopy< typename OutputView::memory_space , Kokkos::HostSpace > @@ -1648,9 +1598,9 @@ void deep_copy // If same type, equal layout, equal dimensions, equal span, and contiguous memory then can byte-wise copy if ( rank(src) == 0 && rank(dst) == 0 ) - { + { typedef typename dst_type::value_type value_type ; - Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.data() , src.data() , sizeof(value_type) ); + Kokkos::Impl::DeepCopy< dst_memory_space , src_memory_space >( dst.data() , src.data() , sizeof(value_type) ); } else if ( std::is_same< typename DstType::traits::value_type , typename SrcType::traits::non_const_value_type >::value && @@ -1826,7 +1776,7 @@ create_mirror( const DynRankView<T,P...> & src typedef DynRankView<T,P...> src_type ; typedef typename src_type::HostMirror dst_type ; - return dst_type( std::string( src.label() ).append("_mirror") + return dst_type( std::string( src.label() ).append("_mirror") , Impl::reconstructLayout(src.layout(), src.rank()) ); } @@ -1870,7 +1820,7 @@ create_mirror_view( const DynRankView<T,P...> & src )>::type * = 0 ) { - return Kokkos::Experimental::create_mirror( src ); + return Kokkos::Experimental::create_mirror( src ); } // Create a mirror view in a new space (specialization for same space) @@ -1966,3 +1916,4 @@ using Kokkos::Experimental::realloc ; } //end Kokkos #endif + diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp index 53e0eab693afeca7bbe0c164666612dc5ccc36d9..da96db2d6b782f2ac2f2aada57f53346365ccedb 100644 --- a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -86,7 +86,7 @@ private: public: - typedef Kokkos::Experimental::MemoryPool< typename traits::device_type > memory_pool ; + typedef Kokkos::MemoryPool< typename traits::device_type > memory_pool ; private: @@ -275,6 +275,10 @@ public: ch[jc_try] = reinterpret_cast<value_type*>( m_pool.allocate( sizeof(value_type) << m_chunk_shift )); + if ( 0 == ch[jc_try] ) { + Kokkos::abort("DynamicView::resize_parallel exhausted memory pool"); + } + Kokkos::memory_fence(); } } @@ -436,7 +440,7 @@ public: void operator()( unsigned i ) const { if ( m_destroy && i < m_chunk_max && 0 != m_chunks[i] ) { - m_pool.deallocate( m_chunks[i] , m_pool.get_min_block_size() ); + m_pool.deallocate( m_chunks[i] , m_pool.min_block_size() ); } m_chunks[i] = 0 ; } @@ -495,7 +499,7 @@ public: // The memory pool chunk is guaranteed to be a power of two , m_chunk_shift( Kokkos::Impl::integral_power_of_two( - m_pool.get_min_block_size()/sizeof(typename traits::value_type)) ) + m_pool.min_block_size()/sizeof(typename traits::value_type)) ) , m_chunk_mask( ( 1 << m_chunk_shift ) - 1 ) , m_chunk_max( ( arg_size_max + m_chunk_mask ) >> m_chunk_shift ) { diff --git a/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp b/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp index 4c90e4c238654b5458f24db2083eb85e815b683c..8b9e75d854234f317f953278bd8f663cab1002d4 100644 --- a/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp +++ b/lib/kokkos/containers/src/Kokkos_ErrorReporter.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -114,7 +114,7 @@ private: template <typename ReportType, typename DeviceType> -inline int ErrorReporter<ReportType, DeviceType>::getNumReports() +inline int ErrorReporter<ReportType, DeviceType>::getNumReports() { int num_reports = 0; Kokkos::deep_copy(num_reports,m_numReportsAttempted); @@ -194,3 +194,4 @@ void ErrorReporter<ReportType, DeviceType>::resize(const size_t new_size) } // namespace kokkos #endif + diff --git a/lib/kokkos/containers/src/Kokkos_Functional.hpp b/lib/kokkos/containers/src/Kokkos_Functional.hpp index 5c7350ef1cd3bb1ed68deff0c823ce3f7a5a3619..ebb5debaf144dc9f80dd5e61a89c5856c01e7f4c 100644 --- a/lib/kokkos/containers/src/Kokkos_Functional.hpp +++ b/lib/kokkos/containers/src/Kokkos_Functional.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,7 +35,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -170,4 +170,3 @@ struct less_equal #endif //KOKKOS_FUNCTIONAL_HPP - diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp index 193f1bc334dd76177e3823f6decee9dbd71b137e..63520daa6bff013f6298bc77f0504832a6d39579 100644 --- a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -61,7 +61,7 @@ #include <iostream> -#include <stdint.h> +#include <cstdint> #include <stdexcept> @@ -847,3 +847,4 @@ inline void deep_copy( UnorderedMap<DKey, DT, DDevice, Hasher, EqualTo> } // namespace Kokkos #endif //KOKKOS_UNORDERED_MAP_HPP + diff --git a/lib/kokkos/containers/src/Kokkos_Vector.hpp b/lib/kokkos/containers/src/Kokkos_Vector.hpp index 362b6b46cf816ec77fe16bd6fe9e40b600212d10..91fecd6151e6155a5876b6773e1435a6a070d808 100644 --- a/lib/kokkos/containers/src/Kokkos_Vector.hpp +++ b/lib/kokkos/containers/src/Kokkos_Vector.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -281,3 +281,4 @@ public: } #endif + diff --git a/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp b/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp index df2fbed5a6709ca74edc0628fb45d39238da0ade..3e910f5eefc1c2d6e2a7c75ad9a92c7594c551ca 100644 --- a/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp +++ b/lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp @@ -46,7 +46,7 @@ #include <Kokkos_Macros.hpp> #include <impl/Kokkos_BitOps.hpp> -#include <stdint.h> +#include <cstdint> #include <cstdio> #include <climits> diff --git a/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp b/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp index 7d6020536a8f3d1caef4880f62bc7999b56f2c02..43874e97ff7b71c07f8cd365fb45e7a4d881d314 100644 --- a/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp +++ b/lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,7 +35,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -43,7 +43,7 @@ #define KOKKOS_FUNCTIONAL_IMPL_HPP #include <Kokkos_Macros.hpp> -#include <stdint.h> +#include <cstdint> namespace Kokkos { namespace Impl { @@ -193,3 +193,4 @@ bool bitwise_equal(T const * const a_ptr, T const * const b_ptr) }} // namespace Kokkos::Impl #endif //KOKKOS_FUNCTIONAL_IMPL_HPP + diff --git a/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp b/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp index b788c966e9c5a04d0ce4ca626190d241ec273008..f57ee66a1d586acdba9b3814a93811725eef3931 100644 --- a/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp +++ b/lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,7 +45,7 @@ #define KOKKOS_UNORDERED_MAP_IMPL_HPP #include <Kokkos_Core_fwd.hpp> -#include <stdint.h> +#include <cstdint> #include <cstdio> #include <climits> @@ -295,3 +295,4 @@ struct UnorderedMapCanAssign<const Key,const Value,const Key,Value> : public tru }} //Kokkos::Impl #endif // KOKKOS_UNORDERED_MAP_IMPL_HPP + diff --git a/lib/kokkos/containers/unit_tests/Makefile b/lib/kokkos/containers/unit_tests/Makefile index c45e2be05ed73633331b775c1e71195e8d844acc..52559935d44d570d74c6b7447c3f65c87b16d132 100644 --- a/lib/kokkos/containers/unit_tests/Makefile +++ b/lib/kokkos/containers/unit_tests/Makefile @@ -8,7 +8,7 @@ default: build_all echo "End Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/config/nvcc_wrapper + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper else CXX = g++ endif @@ -21,8 +21,8 @@ include $(KOKKOS_PATH)/Makefile.kokkos KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/unit_tests -TEST_TARGETS = -TARGETS = +TEST_TARGETS = +TARGETS = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o @@ -49,16 +49,16 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) endif KokkosContainers_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Cuda + $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Cuda KokkosContainers_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Threads + $(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Threads KokkosContainers_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_OpenMP + $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_OpenMP KokkosContainers_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_UnitTest_Serial + $(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosContainers_UnitTest_Serial test-cuda: KokkosContainers_UnitTest_Cuda ./KokkosContainers_UnitTest_Cuda @@ -76,7 +76,7 @@ build_all: $(TARGETS) test: $(TEST_TARGETS) -clean: kokkos-clean +clean: kokkos-clean rm -f *.o $(TARGETS) # Compilation rules @@ -84,6 +84,5 @@ clean: kokkos-clean %.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc - diff --git a/lib/kokkos/containers/unit_tests/TestComplex.hpp b/lib/kokkos/containers/unit_tests/TestComplex.hpp deleted file mode 100644 index 94c04b61f46759d91f0738723d487980c8cb2a83..0000000000000000000000000000000000000000 --- a/lib/kokkos/containers/unit_tests/TestComplex.hpp +++ /dev/null @@ -1,263 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER - - -#ifndef KOKKOS_TEST_COMPLEX_HPP -#define KOKKOS_TEST_COMPLEX_HPP - -#include <Kokkos_Complex.hpp> -#include <gtest/gtest.h> -#include <iostream> - -namespace Test { - -namespace Impl { - template <typename RealType> - void testComplexConstructors () { - typedef Kokkos::complex<RealType> complex_type; - - complex_type z1; - complex_type z2 (0.0, 0.0); - complex_type z3 (1.0, 0.0); - complex_type z4 (0.0, 1.0); - complex_type z5 (-1.0, -2.0); - - ASSERT_TRUE( z1 == z2 ); - ASSERT_TRUE( z1 != z3 ); - ASSERT_TRUE( z1 != z4 ); - ASSERT_TRUE( z1 != z5 ); - - ASSERT_TRUE( z2 != z3 ); - ASSERT_TRUE( z2 != z4 ); - ASSERT_TRUE( z2 != z5 ); - - ASSERT_TRUE( z3 != z4 ); - ASSERT_TRUE( z3 != z5 ); - - complex_type z6 (-1.0, -2.0); - ASSERT_TRUE( z5 == z6 ); - - // Make sure that complex has value semantics, in particular, that - // equality tests use values and not pointers, so that - // reassignment actually changes the value. - z1 = complex_type (-3.0, -4.0); - ASSERT_TRUE( z1.real () == -3.0 ); - ASSERT_TRUE( z1.imag () == -4.0 ); - ASSERT_TRUE( z1 != z2 ); - - complex_type z7 (1.0); - ASSERT_TRUE( z3 == z7 ); - ASSERT_TRUE( z7 == 1.0 ); - ASSERT_TRUE( z7 != -1.0 ); - - z7 = complex_type (5.0); - ASSERT_TRUE( z7.real () == 5.0 ); - ASSERT_TRUE( z7.imag () == 0.0 ); - } - - template <typename RealType> - void testPlus () { - typedef Kokkos::complex<RealType> complex_type; - - complex_type z1 (1.0, -1.0); - complex_type z2 (-1.0, 1.0); - complex_type z3 = z1 + z2; - ASSERT_TRUE( z3 == complex_type (0.0, 0.0) ); - } - - template <typename RealType> - void testMinus () { - typedef Kokkos::complex<RealType> complex_type; - - // Test binary minus. - complex_type z1 (1.0, -1.0); - complex_type z2 (-1.0, 1.0); - complex_type z3 = z1 - z2; - ASSERT_TRUE( z3 == complex_type (2.0, -2.0) ); - - // Test unary minus. - complex_type z4 (3.0, -4.0); - ASSERT_TRUE( -z1 == complex_type (-3.0, 4.0) ); - } - - template <typename RealType> - void testTimes () { - typedef Kokkos::complex<RealType> complex_type; - - complex_type z1 (1.0, -1.0); - complex_type z2 (-1.0, 1.0); - complex_type z3 = z1 * z2; - ASSERT_TRUE( z3 == complex_type (0.0, 2.0) ); - - // Make sure that std::complex * Kokkos::complex works too. - std::complex<RealType> z4 (-1.0, 1.0); - complex_type z5 = z4 * z1; - ASSERT_TRUE( z5 == complex_type (0.0, 2.0) ); - } - - template <typename RealType> - void testDivide () { - typedef Kokkos::complex<RealType> complex_type; - - // Test division of a complex number by a real number. - complex_type z1 (1.0, -1.0); - complex_type z2 (1.0 / 2.0, -1.0 / 2.0); - ASSERT_TRUE( z1 / 2.0 == z2 ); - - // (-1+2i)/(1-i) == ((-1+2i)(1+i)) / ((1-i)(1+i)) - // (-1+2i)(1+i) == -3 + i - complex_type z3 (-1.0, 2.0); - complex_type z4 (1.0, -1.0); - complex_type z5 (-3.0, 1.0); - ASSERT_TRUE(z3 * Kokkos::conj (z4) == z5 ); - - // Test division of a complex number by a complex number. - // This assumes that RealType is a floating-point type. - complex_type z6 (Kokkos::real (z5) / 2.0, - Kokkos::imag (z5) / 2.0); - - complex_type z7 = z3 / z4; - ASSERT_TRUE( z7 == z6 ); - } - - template <typename RealType> - void testOutsideKernel () { - testComplexConstructors<RealType> (); - testPlus<RealType> (); - testTimes<RealType> (); - testDivide<RealType> (); - } - - - template<typename RealType, typename Device> - void testCreateView () { - typedef Kokkos::complex<RealType> complex_type; - Kokkos::View<complex_type*, Device> x ("x", 10); - ASSERT_TRUE( x.dimension_0 () == 10 ); - - // Test that View assignment works. - Kokkos::View<complex_type*, Device> x_nonconst = x; - Kokkos::View<const complex_type*, Device> x_const = x; - } - - template<typename RealType, typename Device> - class Fill { - public: - typedef typename Device::execution_space execution_space; - - typedef Kokkos::View<Kokkos::complex<RealType>*, Device> view_type; - typedef typename view_type::size_type size_type; - - KOKKOS_INLINE_FUNCTION - void operator () (const size_type i) const { - x_(i) = val_; - } - - Fill (const view_type& x, const Kokkos::complex<RealType>& val) : - x_ (x), val_ (val) - {} - - private: - view_type x_; - const Kokkos::complex<RealType> val_; - }; - - template<typename RealType, typename Device> - class Sum { - public: - typedef typename Device::execution_space execution_space; - - typedef Kokkos::View<const Kokkos::complex<RealType>*, Device> view_type; - typedef typename view_type::size_type size_type; - typedef Kokkos::complex<RealType> value_type; - - KOKKOS_INLINE_FUNCTION - void operator () (const size_type i, Kokkos::complex<RealType>& sum) const { - sum += x_(i); - } - - Sum (const view_type& x) : x_ (x) {} - - private: - view_type x_; - }; - - template<typename RealType, typename Device> - void testInsideKernel () { - typedef Kokkos::complex<RealType> complex_type; - typedef Kokkos::View<complex_type*, Device> view_type; - typedef typename view_type::size_type size_type; - - const size_type N = 1000; - view_type x ("x", N); - ASSERT_TRUE( x.dimension_0 () == N ); - - // Kokkos::parallel_reduce (N, [=] (const size_type i, complex_type& result) { - // result += x[i]; - // }); - - Kokkos::parallel_for (N, Fill<RealType, Device> (x, complex_type (1.0, -1.0))); - - complex_type sum; - Kokkos::parallel_reduce (N, Sum<RealType, Device> (x), sum); - - ASSERT_TRUE( sum.real () == 1000.0 && sum.imag () == -1000.0 ); - } -} // namespace Impl - - -template <typename Device> -void testComplex () -{ - Impl::testOutsideKernel<float> (); - Impl::testOutsideKernel<double> (); - - Impl::testCreateView<float, Device> (); - Impl::testCreateView<double, Device> (); - - Impl::testInsideKernel<float, Device> (); - Impl::testInsideKernel<double, Device> (); -} - - -} // namespace Test - -#endif // KOKKOS_TEST_COMPLEX_HPP diff --git a/lib/kokkos/containers/unit_tests/TestCuda.cpp b/lib/kokkos/containers/unit_tests/TestCuda.cpp index 1ca06150ffae8176f8fb6f5b5296067786afd08c..5a78a5de9e228fe26a113e2af40876d8f05d4464 100644 --- a/lib/kokkos/containers/unit_tests/TestCuda.cpp +++ b/lib/kokkos/containers/unit_tests/TestCuda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,14 +36,17 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_CUDA + #include <iostream> #include <iomanip> -#include <stdint.h> +#include <cstdint> #include <gtest/gtest.h> @@ -69,7 +72,6 @@ //---------------------------------------------------------------------------- -#ifdef KOKKOS_ENABLE_CUDA namespace Test { @@ -237,5 +239,7 @@ TEST_F(cuda, ErrorReporter) } +#else +void KOKKOS_CONTAINERS_UNIT_TESTS_TESTCUDA_PREVENT_EMPTY_LINK_ERROR() {} #endif /* #ifdef KOKKOS_ENABLE_CUDA */ diff --git a/lib/kokkos/containers/unit_tests/TestDualView.hpp b/lib/kokkos/containers/unit_tests/TestDualView.hpp index e72c69f7d41cf7d493becfcbb863e5f1d9f6679f..16891d4b1d9cf4d7bd363c3e2b930d8dff641d60 100644 --- a/lib/kokkos/containers/unit_tests/TestDualView.hpp +++ b/lib/kokkos/containers/unit_tests/TestDualView.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -119,3 +119,4 @@ void test_dualview_combinations(unsigned int size) } // namespace Test #endif //KOKKOS_TEST_UNORDERED_MAP_HPP + diff --git a/lib/kokkos/containers/unit_tests/TestDynamicView.hpp b/lib/kokkos/containers/unit_tests/TestDynamicView.hpp index beb07bd791cf162c31706b1eeaf31a4c25c91ba5..a7ae15921f9e521a796a3ab309dff86bddbf967f 100644 --- a/lib/kokkos/containers/unit_tests/TestDynamicView.hpp +++ b/lib/kokkos/containers/unit_tests/TestDynamicView.hpp @@ -61,7 +61,7 @@ struct TestDynamicView typedef typename Space::execution_space execution_space ; typedef typename Space::memory_space memory_space ; - typedef Kokkos::Experimental::MemoryPool<typename Space::device_type> memory_pool_type; + typedef Kokkos::MemoryPool<typename Space::device_type> memory_pool_type; typedef Kokkos::Experimental::DynamicView<Scalar*,Space> view_type; typedef typename view_type::const_type const_view_type ; @@ -131,7 +131,12 @@ struct TestDynamicView // printf("TestDynamicView::run(%d) construct memory pool\n",arg_total_size); - memory_pool_type pool( memory_space() , arg_total_size * sizeof(Scalar) * 1.2 ); + memory_pool_type pool( memory_space() + , arg_total_size * sizeof(Scalar) * 1.2 + , 500 /* min block size in bytes */ + , 30000 /* max block size in bytes */ + , 1000000 /* min superblock size in bytes */ + ); // printf("TestDynamicView::run(%d) construct dynamic view\n",arg_total_size); diff --git a/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp index cc206b80f4fed1c10c7314a471f02d867317eeef..17aa230d53e8aab952118d76b2714ecd5d8b2b7b 100644 --- a/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp +++ b/lib/kokkos/containers/unit_tests/TestErrorReporter.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -225,3 +225,4 @@ struct ErrorReporterDriverNativeOpenMP : public ErrorReporterDriverBase<Kokkos:: } // namespace Test #endif // #ifndef KOKKOS_TEST_ERROR_REPORTING_HPP + diff --git a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp index d96d0c667d73f111d31699568881281d98a10390..2448bd077b3f06c04144b264a661a384656552db 100644 --- a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp +++ b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_OPENMP + #include <gtest/gtest.h> #include <Kokkos_Core.hpp> @@ -56,7 +59,6 @@ #include <TestVector.hpp> #include <TestDualView.hpp> #include <TestDynamicView.hpp> -#include <TestComplex.hpp> #include <Kokkos_DynRankView.hpp> #include <TestDynViewAPI.hpp> @@ -68,7 +70,6 @@ namespace Test { -#ifdef KOKKOS_ENABLE_OPENMP class openmp : public ::testing::Test { protected: static void SetUpTestCase() @@ -91,11 +92,6 @@ protected: } }; -TEST_F( openmp, complex ) -{ - testComplex<Kokkos::OpenMP> (); -} - TEST_F( openmp, dyn_view_api) { TestDynViewAPI< double , Kokkos::OpenMP >(); } @@ -172,7 +168,6 @@ OPENMP_DUALVIEW_COMBINE_TEST( 10 ) #undef OPENMP_DEEP_COPY #undef OPENMP_VECTOR_COMBINE_TEST #undef OPENMP_DUALVIEW_COMBINE_TEST -#endif TEST_F( openmp , dynamic_view ) @@ -204,3 +199,7 @@ TEST_F(openmp, ErrorReporterNativeOpenMP) } // namespace test +#else +void KOKKOS_CONTAINERS_UNIT_TESTS_TESTOPENMP_PREVENT_EMPTY_LINK_ERROR() {} +#endif + diff --git a/lib/kokkos/containers/unit_tests/TestSerial.cpp b/lib/kokkos/containers/unit_tests/TestSerial.cpp index 51fbd503c788dfcbd011ac326f74535e50932b65..06c4d9f6ed0f4d97abb3828c3983058ea09c497b 100644 --- a/lib/kokkos/containers/unit_tests/TestSerial.cpp +++ b/lib/kokkos/containers/unit_tests/TestSerial.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,19 +36,18 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_SERIAL + #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#if ! defined(KOKKOS_ENABLE_SERIAL) -# error "It doesn't make sense to build this file unless the Kokkos::Serial device is enabled. If you see this message, it probably means that there is an error in Kokkos' CMake build infrastructure." -#else - #include <Kokkos_Bitset.hpp> #include <Kokkos_UnorderedMap.hpp> #include <Kokkos_Vector.hpp> @@ -59,7 +58,6 @@ #include <TestVector.hpp> #include <TestDualView.hpp> #include <TestDynamicView.hpp> -#include <TestComplex.hpp> #include <iomanip> @@ -105,11 +103,6 @@ TEST_F( serial , staticcrsgraph ) TestStaticCrsGraph::run_test_graph3< Kokkos::Serial >(75, 100000); } -TEST_F( serial, complex ) -{ - testComplex<Kokkos::Serial> (); -} - TEST_F( serial, bitset ) { test_bitset<Kokkos::Serial> (); @@ -190,6 +183,7 @@ TEST_F(serial, ErrorReporter) } // namespace Test +#else +void KOKKOS_CONTAINERS_UNIT_TESTS_TESTSERIAL_PREVENT_EMPTY_LINK_ERROR() {} #endif // KOKKOS_ENABLE_SERIAL - diff --git a/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp b/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp index 09cb84b166e908260b969b7eb453167f8d06eddb..cccb304ec0387c3e2b2998511db9f680cffd72b2 100644 --- a/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp +++ b/lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -184,4 +184,3 @@ void run_test_graph3(size_t B, size_t N) } } /* namespace TestStaticCrsGraph */ - diff --git a/lib/kokkos/containers/unit_tests/TestThreads.cpp b/lib/kokkos/containers/unit_tests/TestThreads.cpp index 4f352b38220ebb96a417cabbca387359abe8ad1d..938ec88e90f7924c61d20888a4cbc8dcddfef4bf 100644 --- a/lib/kokkos/containers/unit_tests/TestThreads.cpp +++ b/lib/kokkos/containers/unit_tests/TestThreads.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,17 +36,18 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_THREADS + #include <gtest/gtest.h> #include <Kokkos_Core.hpp> -#if defined( KOKKOS_ENABLE_PTHREAD ) - #include <Kokkos_Bitset.hpp> #include <Kokkos_UnorderedMap.hpp> @@ -201,6 +202,7 @@ TEST_F(threads, ErrorReporter) } // namespace Test - -#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */ +#else +void KOKKOS_CONTAINERS_UNIT_TESTS_TESTTHREADS_PREVENT_EMPTY_LINK_ERROR() {} +#endif /* #if defined( KOKKOS_ENABLE_THREADS ) */ diff --git a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp index ff0328548dee0a3458faa82ab44a16e5a081d29b..becaac198cdf679ddb596cdd529327647c3a666d 100644 --- a/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp +++ b/lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,7 +35,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -311,3 +311,4 @@ void test_deep_copy( uint32_t num_nodes ) } // namespace Test #endif //KOKKOS_TEST_UNORDERED_MAP_HPP + diff --git a/lib/kokkos/containers/unit_tests/TestVector.hpp b/lib/kokkos/containers/unit_tests/TestVector.hpp index f9f4564898edf32e0030d0ca135ff9f43909f397..2abf20f6391b87168c3306377863d920b82546c3 100644 --- a/lib/kokkos/containers/unit_tests/TestVector.hpp +++ b/lib/kokkos/containers/unit_tests/TestVector.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,7 +35,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -129,3 +129,4 @@ void test_vector_combinations(unsigned int size) } // namespace Test #endif //KOKKOS_TEST_UNORDERED_MAP_HPP + diff --git a/lib/kokkos/core/cmake/KokkosCore_config.h.in b/lib/kokkos/core/cmake/KokkosCore_config.h.in index a71e60f20742edd8417365bb99c45f172dc5b218..621cd54e1c84d954bd32bf69cbe7feeeba77947d 100644 --- a/lib/kokkos/core/cmake/KokkosCore_config.h.in +++ b/lib/kokkos/core/cmake/KokkosCore_config.h.in @@ -1,15 +1,38 @@ -#ifndef KOKKOS_CORE_CONFIG_H +#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H) +#error "Don't include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead." +#else #define KOKKOS_CORE_CONFIG_H +#endif /* The trivial 'src/build_common.sh' creates a config * that must stay in sync with this file. */ #cmakedefine KOKKOS_FOR_SIERRA -#if !defined( KOKKOS_FOR_SIERRA ) +#ifndef KOKKOS_FOR_SIERRA -#cmakedefine KOKKOS_HAVE_MPI #cmakedefine KOKKOS_HAVE_CUDA +#cmakedefine KOKKOS_HAVE_OPENMP +#cmakedefine KOKKOS_HAVE_PTHREAD +#cmakedefine KOKKOS_HAVE_QTHREADS +#cmakedefine KOKKOS_HAVE_SERIAL +#cmakedefine KOKKOS_HAVE_Winthread + +#cmakedefine KOKKOS_HAVE_HWLOC +#cmakedefine KOKKOS_ENABLE_HBWSPACE +#cmakedefine KOKKOS_ENABLE_LIBRT + +#cmakedefine KOKKOS_HAVE_DEBUG +#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK +#cmakedefine KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK +#cmakedefine KOKKOS_ENABLE_PROFILING +#cmakedefine KOKKOS_ENABLE_PROFILING_LOAD_PRINT + +#cmakedefine KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION + +#ifdef KOKKOS_HAVE_CUDA + +#cmakedefine KOKKOS_ENABLE_CUDA_LDG_INTRINSIC // mfh 16 Sep 2014: If passed in on the command line, that overrides // any value of KOKKOS_USE_CUDA_UVM here. Doing this should prevent build @@ -23,26 +46,8 @@ // hesitate to do that now, because I'm not sure if all the files are // including KokkosCore_config.h (or a header file that includes it) like // they should. - -#if ! defined(KOKKOS_USE_CUDA_UVM) +#ifndef KOKKOS_USE_CUDA_UVM #cmakedefine KOKKOS_USE_CUDA_UVM -#endif // ! defined(KOKKOS_USE_CUDA_UVM) - -#cmakedefine KOKKOS_HAVE_PTHREAD -#cmakedefine KOKKOS_HAVE_SERIAL -#cmakedefine KOKKOS_HAVE_QTHREADS -#cmakedefine KOKKOS_HAVE_Winthread -#cmakedefine KOKKOS_HAVE_OPENMP -#cmakedefine KOKKOS_HAVE_HWLOC -#cmakedefine KOKKOS_HAVE_DEBUG -#cmakedefine KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK -#cmakedefine KOKKOS_HAVE_CXX11 -#cmakedefine KOKKOS_HAVE_CUSPARSE -#cmakedefine KOKKOS_ENABLE_PROFILING_INTERNAL -#ifdef KOKKOS_ENABLE_PROFILING_INTERNAL -#define KOKKOS_ENABLE_PROFILING 1 -#else -#define KOKKOS_ENABLE_PROFILING 0 #endif #cmakedefine KOKKOS_HAVE_CUDA_RDC @@ -55,13 +60,51 @@ #define KOKKOS_CUDA_USE_LAMBDA 1 #endif +#endif + +#cmakedefine KOKKOS_CUDA_CLANG_WORKAROUND + +#ifndef __CUDA_ARCH__ +#cmakedefine KOKKOS_ENABLE_ISA_X86_64 +#cmakedefine KOKKOS_ENABLE_ISA_KNC +#cmakedefine KOKKOS_ENABLE_ISA_POWERPCLE +#endif + +#cmakedefine KOKKOS_ARCH_ARMV80 1 +#cmakedefine KOKKOS_ARCH_ARMV81 1 +#cmakedefine KOKKOS_ARCH_ARMV8_THUNDERX 1 +#cmakedefine KOKKOS_ARCH_AVX 1 +#cmakedefine KOKKOS_ARCH_AVX2 1 +#cmakedefine KOKKOS_ARCH_AVX512MIC 1 +#cmakedefine KOKKOS_ARCH_AVX512XEON 1 +#cmakedefine KOKKOS_ARCH_KNC 1 +#cmakedefine KOKKOS_ARCH_POWER8 1 +#cmakedefine KOKKOS_ARCH_POWER9 1 +#cmakedefine KOKKOS_ARCH_KEPLER 1 +#cmakedefine KOKKOS_ARCH_KEPLER30 1 +#cmakedefine KOKKOS_ARCH_KEPLER32 1 +#cmakedefine KOKKOS_ARCH_KEPLER35 1 +#cmakedefine KOKKOS_ARCH_KEPLER37 1 +#cmakedefine KOKKOS_ARCH_MAXWELL 1 +#cmakedefine KOKKOS_ARCH_MAXWELL50 1 +#cmakedefine KOKKOS_ARCH_MAXWELL52 1 +#cmakedefine KOKKOS_ARCH_MAXWELL53 1 +#cmakedefine KOKKOS_ARCH_PASCAL 1 +#cmakedefine KOKKOS_ARCH_PASCAL60 1 +#cmakedefine KOKKOS_ARCH_PASCAL61 1 + // Don't forbid users from defining this macro on the command line, // but still make sure that CMake logic can control its definition. -#if ! defined(KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA) +#ifndef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA #cmakedefine KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1 -#endif // KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA +#endif +// TODO: These are currently not used in Kokkos. Should they be removed? +#cmakedefine KOKKOS_HAVE_MPI +#cmakedefine KOKKOS_HAVE_CUSPARSE + +// TODO: No longer options in Kokkos. Need to be removed. #cmakedefine KOKKOS_USING_DEPRECATED_VIEW +#cmakedefine KOKKOS_HAVE_CXX11 #endif // KOKKOS_FOR_SIERRA -#endif // KOKKOS_CORE_CONFIG_H diff --git a/lib/kokkos/core/perf_test/CMakeLists.txt b/lib/kokkos/core/perf_test/CMakeLists.txt index cae52f1409e43a8adf9046855cc77b24f2dadce7..9f19a2a73eaeb180b09c90db17aec35bce3c5804 100644 --- a/lib/kokkos/core/perf_test/CMakeLists.txt +++ b/lib/kokkos/core/perf_test/CMakeLists.txt @@ -1,11 +1,18 @@ -INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINRARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) +# warning: PerfTest_CustomReduction.cpp uses +# ../../algorithms/src/Kokkos_Random.hpp +# we'll just allow it to be included, but note +# that in TriBITS KokkosAlgorithms can be disabled... +INCLUDE_DIRECTORIES("${CMAKE_CURRENT_SOURCE_DIR}/../../algorithms/src") + SET(SOURCES PerfTestMain.cpp - PerfTestHost.cpp - PerfTestCuda.cpp + PerfTestGramSchmidt.cpp + PerfTestHexGrad.cpp + PerfTest_CustomReduction.cpp ) # Per #374, we always want to build this test, but we only want to run diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile index 3a0ad2d4c16a4e16d73e91eec131ee092bf9f47e..f59e7bbe1c1c377f8d23aa2760323d48bbc6bafe 100644 --- a/lib/kokkos/core/perf_test/Makefile +++ b/lib/kokkos/core/perf_test/Makefile @@ -8,12 +8,14 @@ default: build_all echo "End Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/config/nvcc_wrapper + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper + KOKKOS_CUDA_OPTIONS=enable_lambda else CXX = g++ endif -CXXFLAGS = -O3 +CXXFLAGS = -O3 +#CXXFLAGS += -DGENERIC_REDUCER LINK ?= $(CXX) LDFLAGS ?= -lpthread @@ -21,23 +23,49 @@ include $(KOKKOS_PATH)/Makefile.kokkos KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/core/perf_test -TEST_TARGETS = -TARGETS = +TEST_TARGETS = +TARGETS = -OBJ_PERF = PerfTestHost.o PerfTestCuda.o PerfTestMain.o gtest-all.o +# + +OBJ_PERF = PerfTestMain.o gtest-all.o +OBJ_PERF += PerfTestGramSchmidt.o +OBJ_PERF += PerfTestHexGrad.o +OBJ_PERF += PerfTest_CustomReduction.o TARGETS += KokkosCore_PerformanceTest TEST_TARGETS += test-performance +# + OBJ_ATOMICS = test_atomic.o TARGETS += KokkosCore_PerformanceTest_Atomics TEST_TARGETS += test-atomic +# + +OBJ_MEMPOOL = test_mempool.o +TARGETS += KokkosCore_PerformanceTest_Mempool +TEST_TARGETS += test-mempool + +# + +OBJ_TASKDAG = test_taskdag.o +TARGETS += KokkosCore_PerformanceTest_TaskDAG +TEST_TARGETS += test-taskdag + +# KokkosCore_PerformanceTest: $(OBJ_PERF) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest + $(LINK) $(EXTRA_PATH) $(OBJ_PERF) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_PerformanceTest KokkosCore_PerformanceTest_Atomics: $(OBJ_ATOMICS) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ATOMICS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_Atomics + $(LINK) $(EXTRA_PATH) $(OBJ_ATOMICS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_PerformanceTest_Atomics + +KokkosCore_PerformanceTest_Mempool: $(OBJ_MEMPOOL) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_MEMPOOL) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_Mempool + +KokkosCore_PerformanceTest_TaskDAG: $(OBJ_TASKDAG) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_TASKDAG) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_PerformanceTest_TaskDAG test-performance: KokkosCore_PerformanceTest ./KokkosCore_PerformanceTest @@ -45,12 +73,18 @@ test-performance: KokkosCore_PerformanceTest test-atomic: KokkosCore_PerformanceTest_Atomics ./KokkosCore_PerformanceTest_Atomics +test-mempool: KokkosCore_PerformanceTest_Mempool + ./KokkosCore_PerformanceTest_Mempool + +test-taskdag: KokkosCore_PerformanceTest_TaskDAG + ./KokkosCore_PerformanceTest_TaskDAG + build_all: $(TARGETS) test: $(TEST_TARGETS) -clean: kokkos-clean +clean: kokkos-clean rm -f *.o $(TARGETS) # Compilation rules @@ -58,5 +92,6 @@ clean: kokkos-clean %.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc + diff --git a/lib/kokkos/core/perf_test/PerfTestCuda.cpp b/lib/kokkos/core/perf_test/PerfTestCuda.cpp deleted file mode 100644 index 65ce61fb53b9e5d8025f1f6f59e8ecf194ec45f0..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/perf_test/PerfTestCuda.cpp +++ /dev/null @@ -1,199 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <iostream> -#include <iomanip> -#include <algorithm> -#include <gtest/gtest.h> - -#include <Kokkos_Core.hpp> - -#if defined( KOKKOS_ENABLE_CUDA ) - -#include <impl/Kokkos_Timer.hpp> - -#include <PerfTestMDRange.hpp> - -#include <PerfTestHexGrad.hpp> -#include <PerfTestBlasKernels.hpp> -#include <PerfTestGramSchmidt.hpp> -#include <PerfTestDriver.hpp> - - -namespace Test { - -class cuda : public ::testing::Test { - protected: - static void SetUpTestCase() { - Kokkos::HostSpace::execution_space::initialize(); - Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); - } - static void TearDownTestCase() { - Kokkos::Cuda::finalize(); - Kokkos::HostSpace::execution_space::finalize(); - } -}; - -//TEST_F( cuda, mdrange_lr ) { -// EXPECT_NO_THROW( (run_test_mdrange<Kokkos::Cuda , Kokkos::LayoutRight>( 5, 8, "Kokkos::Cuda" )) ); -//} - -//TEST_F( cuda, mdrange_ll ) { -// EXPECT_NO_THROW( (run_test_mdrange<Kokkos::Cuda , Kokkos::LayoutLeft>( 5, 8, "Kokkos::Cuda" )) ); -//} - -TEST_F( cuda, hexgrad ) -{ - EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) ); -} - -TEST_F( cuda, gramschmidt ) -{ - EXPECT_NO_THROW( run_test_gramschmidt< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) ); -} - -namespace { - -template <typename T> -struct TextureFetch -{ - typedef Kokkos::View< T *, Kokkos::CudaSpace> array_type; - typedef Kokkos::View< const T *, Kokkos::CudaSpace, Kokkos::MemoryRandomAccess> const_array_type; - typedef Kokkos::View< int *, Kokkos::CudaSpace> index_array_type; - typedef Kokkos::View< const int *, Kokkos::CudaSpace> const_index_array_type; - - struct FillArray - { - array_type m_array; - FillArray( const array_type & array ) - : m_array(array) - {} - - void apply() const - { - Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this); - } - - KOKKOS_INLINE_FUNCTION - void operator()(int i) const { m_array(i) = i; } - }; - - struct RandomIndexes - { - index_array_type m_indexes; - typename index_array_type::HostMirror m_host_indexes; - RandomIndexes( const index_array_type & indexes) - : m_indexes(indexes) - , m_host_indexes(Kokkos::create_mirror(m_indexes)) - {} - - void apply() const - { - Kokkos::parallel_for( Kokkos::RangePolicy<Kokkos::HostSpace::execution_space,int>(0,m_host_indexes.dimension_0()), *this); - //random shuffle - Kokkos::HostSpace::execution_space::fence(); - std::random_shuffle(m_host_indexes.ptr_on_device(), m_host_indexes.ptr_on_device() + m_host_indexes.dimension_0()); - Kokkos::deep_copy(m_indexes,m_host_indexes); - } - - KOKKOS_INLINE_FUNCTION - void operator()(int i) const { m_host_indexes(i) = i; } - }; - - struct RandomReduce - { - const_array_type m_array; - const_index_array_type m_indexes; - RandomReduce( const const_array_type & array, const const_index_array_type & indexes) - : m_array(array) - , m_indexes(indexes) - {} - - void apply(T & reduce) const - { - Kokkos::parallel_reduce( Kokkos::RangePolicy<Kokkos::Cuda,int>(0,m_array.dimension_0()), *this, reduce); - } - - KOKKOS_INLINE_FUNCTION - void operator()(int i, T & reduce) const - { reduce += m_array(m_indexes(i)); } - }; - - static void run(int size, double & reduce_time, T &reduce) - { - array_type array("array",size); - index_array_type indexes("indexes",size); - - { FillArray f(array); f.apply(); } - { RandomIndexes f(indexes); f.apply(); } - - Kokkos::Cuda::fence(); - - Kokkos::Timer timer; - for (int j=0; j<10; ++j) { - RandomReduce f(array,indexes); - f.apply(reduce); - } - Kokkos::Cuda::fence(); - reduce_time = timer.seconds(); - } -}; - -} // unnamed namespace - -TEST_F( cuda, texture_double ) -{ - printf("Random reduce of double through texture fetch\n"); - for (int i=1; i<=26; ++i) { - int size = 1<<i; - double time = 0; - double reduce = 0; - TextureFetch<double>::run(size,time,reduce); - printf(" time = %1.3e size = 2^%d\n", time, i); - } -} - -} // namespace Test - -#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ - diff --git a/lib/kokkos/core/perf_test/PerfTestDriver.hpp b/lib/kokkos/core/perf_test/PerfTestDriver.hpp index 4732c3275a7f92cf1b1fc8f4d457c059ceb0679e..190fdb91422966cf55ec7c99e7df3a0fb6cfd94c 100644 --- a/lib/kokkos/core/perf_test/PerfTestDriver.hpp +++ b/lib/kokkos/core/perf_test/PerfTestDriver.hpp @@ -398,91 +398,5 @@ void run_test_mdrange( int exp_beg , int exp_end, const char deviceTypeName[], i } -template< class DeviceType > -void run_test_hexgrad( int exp_beg , int exp_end, const char deviceTypeName[] ) -{ - std::string label_hexgrad ; - label_hexgrad.append( "\"HexGrad< double , " ); - // mfh 06 Jun 2013: This only appends "DeviceType" (literally) to - // the string, not the actual name of the device type. Thus, I've - // modified the function to take the name of the device type. - // - //label_hexgrad.append( KOKKOS_MACRO_TO_STRING( DeviceType ) ); - label_hexgrad.append( deviceTypeName ); - label_hexgrad.append( " >\"" ); - - for (int i = exp_beg ; i < exp_end ; ++i) { - double min_seconds = 0.0 ; - double max_seconds = 0.0 ; - double avg_seconds = 0.0 ; - - const int parallel_work_length = 1<<i; - - for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) { - const double seconds = HexGrad< DeviceType >::test(parallel_work_length) ; - - if ( 0 == j ) { - min_seconds = seconds ; - max_seconds = seconds ; - } - else { - if ( seconds < min_seconds ) min_seconds = seconds ; - if ( seconds > max_seconds ) max_seconds = seconds ; - } - avg_seconds += seconds ; - } - avg_seconds /= NUMBER_OF_TRIALS ; - - std::cout << label_hexgrad - << " , " << parallel_work_length - << " , " << min_seconds - << " , " << ( min_seconds / parallel_work_length ) - << std::endl ; - } -} - -template< class DeviceType > -void run_test_gramschmidt( int exp_beg , int exp_end, const char deviceTypeName[] ) -{ - std::string label_gramschmidt ; - label_gramschmidt.append( "\"GramSchmidt< double , " ); - // mfh 06 Jun 2013: This only appends "DeviceType" (literally) to - // the string, not the actual name of the device type. Thus, I've - // modified the function to take the name of the device type. - // - //label_gramschmidt.append( KOKKOS_MACRO_TO_STRING( DeviceType ) ); - label_gramschmidt.append( deviceTypeName ); - label_gramschmidt.append( " >\"" ); - - for (int i = exp_beg ; i < exp_end ; ++i) { - double min_seconds = 0.0 ; - double max_seconds = 0.0 ; - double avg_seconds = 0.0 ; - - const int parallel_work_length = 1<<i; - - for ( int j = 0 ; j < NUMBER_OF_TRIALS ; ++j ) { - const double seconds = ModifiedGramSchmidt< double , DeviceType >::test(parallel_work_length, 32 ) ; - - if ( 0 == j ) { - min_seconds = seconds ; - max_seconds = seconds ; - } - else { - if ( seconds < min_seconds ) min_seconds = seconds ; - if ( seconds > max_seconds ) max_seconds = seconds ; - } - avg_seconds += seconds ; - } - avg_seconds /= NUMBER_OF_TRIALS ; - - std::cout << label_gramschmidt - << " , " << parallel_work_length - << " , " << min_seconds - << " , " << ( min_seconds / parallel_work_length ) - << std::endl ; - } -} - } diff --git a/lib/kokkos/core/perf_test/PerfTestGramSchmidt.hpp b/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp similarity index 78% rename from lib/kokkos/core/perf_test/PerfTestGramSchmidt.hpp rename to lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp index 516696b141d22ab5ac0662ef2c6d78fae8c9b8ad..56d90ed89089f872e4431e38540031f001fb2031 100644 --- a/lib/kokkos/core/perf_test/PerfTestGramSchmidt.hpp +++ b/lib/kokkos/core/perf_test/PerfTestGramSchmidt.cpp @@ -41,6 +41,10 @@ //@HEADER */ +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <PerfTest_Category.hpp> + #include <cmath> #include <PerfTestBlasKernels.hpp> @@ -70,7 +74,7 @@ struct InvNorm2 : public Kokkos::DotSingle< VectorView > { KOKKOS_INLINE_FUNCTION void final( value_type & result ) const { - result = sqrt( result ); + result = std::sqrt( result ); Rjj() = result ; inv() = ( 0 < result ) ? 1.0 / result : 0 ; } @@ -157,7 +161,7 @@ struct ModifiedGramSchmidt for ( size_type j = 0 ; j < count ; ++j ) { // Reduction : tmp = dot( Q(:,j) , Q(:,j) ); - // PostProcess : tmp = sqrt( tmp ); R(j,j) = tmp ; tmp = 1 / tmp ; + // PostProcess : tmp = std::sqrt( tmp ); R(j,j) = tmp ; tmp = 1 / tmp ; const vector_type Qj = Kokkos::subview( Q_ , Kokkos::ALL() , j ); const value_view Rjj = Kokkos::subview( R_ , j , j ); @@ -222,5 +226,58 @@ struct ModifiedGramSchmidt } }; +template< class DeviceType > +void run_test_gramschmidt( int exp_beg , int exp_end, int num_trials, const char deviceTypeName[] ) +{ + std::string label_gramschmidt ; + label_gramschmidt.append( "\"GramSchmidt< double , " ); + label_gramschmidt.append( deviceTypeName ); + label_gramschmidt.append( " >\"" ); + + for (int i = exp_beg ; i < exp_end ; ++i) { + double min_seconds = 0.0 ; + double max_seconds = 0.0 ; + double avg_seconds = 0.0 ; + + const int parallel_work_length = 1<<i; + + for ( int j = 0 ; j < num_trials ; ++j ) { + const double seconds = ModifiedGramSchmidt< double , DeviceType >::test(parallel_work_length, 32 ) ; + + if ( 0 == j ) { + min_seconds = seconds ; + max_seconds = seconds ; + } + else { + if ( seconds < min_seconds ) min_seconds = seconds ; + if ( seconds > max_seconds ) max_seconds = seconds ; + } + avg_seconds += seconds ; + } + avg_seconds /= num_trials ; + + std::cout << label_gramschmidt + << " , " << parallel_work_length + << " , " << min_seconds + << " , " << ( min_seconds / parallel_work_length ) + << std::endl ; + } +} + +TEST_F( default_exec, gramschmidt ) { + int exp_beg = 10; + int exp_end = 20; + int num_trials = 5; + + if(command_line_num_args()>1) + exp_beg = atoi(command_line_arg(1)); + if(command_line_num_args()>2) + exp_end = atoi(command_line_arg(2)); + if(command_line_num_args()>3) + num_trials = atoi(command_line_arg(3)); + + EXPECT_NO_THROW(run_test_gramschmidt< Kokkos::DefaultExecutionSpace>( exp_beg, exp_end, num_trials, Kokkos::DefaultExecutionSpace::name() )); +} + } diff --git a/lib/kokkos/core/perf_test/PerfTestHexGrad.hpp b/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp similarity index 83% rename from lib/kokkos/core/perf_test/PerfTestHexGrad.hpp rename to lib/kokkos/core/perf_test/PerfTestHexGrad.cpp index ed5371f29c4db0fc2af4613d301006b1e96a0f28..7d5067a22c97f1351922b053a54337a594119119 100644 --- a/lib/kokkos/core/perf_test/PerfTestHexGrad.hpp +++ b/lib/kokkos/core/perf_test/PerfTestHexGrad.cpp @@ -41,6 +41,10 @@ //@HEADER */ +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <PerfTest_Category.hpp> + namespace Test { template< class DeviceType , @@ -264,5 +268,58 @@ struct HexGrad } }; +template< class DeviceType > +void run_test_hexgrad( int exp_beg , int exp_end, int num_trials, const char deviceTypeName[] ) +{ + std::string label_hexgrad ; + label_hexgrad.append( "\"HexGrad< double , " ); + label_hexgrad.append( deviceTypeName ); + label_hexgrad.append( " >\"" ); + + for (int i = exp_beg ; i < exp_end ; ++i) { + double min_seconds = 0.0 ; + double max_seconds = 0.0 ; + double avg_seconds = 0.0 ; + + const int parallel_work_length = 1<<i; + + for ( int j = 0 ; j < num_trials ; ++j ) { + const double seconds = HexGrad< DeviceType >::test(parallel_work_length) ; + + if ( 0 == j ) { + min_seconds = seconds ; + max_seconds = seconds ; + } + else { + if ( seconds < min_seconds ) min_seconds = seconds ; + if ( seconds > max_seconds ) max_seconds = seconds ; + } + avg_seconds += seconds ; + } + avg_seconds /= num_trials ; + + std::cout << label_hexgrad + << " , " << parallel_work_length + << " , " << min_seconds + << " , " << ( min_seconds / parallel_work_length ) + << std::endl ; + } +} + +TEST_F( default_exec, hexgrad ) { + int exp_beg = 10; + int exp_end = 20; + int num_trials = 5; + + if(command_line_num_args()>1) + exp_beg = atoi(command_line_arg(1)); + if(command_line_num_args()>2) + exp_end = atoi(command_line_arg(2)); + if(command_line_num_args()>3) + num_trials = atoi(command_line_arg(3)); + + EXPECT_NO_THROW(run_test_hexgrad< Kokkos::DefaultExecutionSpace >( exp_beg, exp_end, num_trials, Kokkos::DefaultExecutionSpace::name() )); +} + } diff --git a/lib/kokkos/core/perf_test/PerfTestHost.cpp b/lib/kokkos/core/perf_test/PerfTestHost.cpp deleted file mode 100644 index 831d581109984319a4c8a61674a42a297ace443a..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/perf_test/PerfTestHost.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <gtest/gtest.h> - -#include <Kokkos_Core.hpp> - -#if defined( KOKKOS_ENABLE_OPENMP ) - -typedef Kokkos::OpenMP TestHostDevice ; -const char TestHostDeviceName[] = "Kokkos::OpenMP" ; - -#elif defined( KOKKOS_ENABLE_PTHREAD ) - -typedef Kokkos::Threads TestHostDevice ; -const char TestHostDeviceName[] = "Kokkos::Threads" ; - -#elif defined( KOKKOS_ENABLE_SERIAL ) - -typedef Kokkos::Serial TestHostDevice ; -const char TestHostDeviceName[] = "Kokkos::Serial" ; - -#else -# error "You must enable at least one of the following execution spaces in order to build this test: Kokkos::Threads, Kokkos::OpenMP, or Kokkos::Serial." -#endif - -#include <impl/Kokkos_Timer.hpp> - -#include <PerfTestMDRange.hpp> - -#include <PerfTestHexGrad.hpp> -#include <PerfTestBlasKernels.hpp> -#include <PerfTestGramSchmidt.hpp> -#include <PerfTestDriver.hpp> - -//------------------------------------------------------------------------ - -namespace Test { - -class host : public ::testing::Test { -protected: - static void SetUpTestCase() - { - if(Kokkos::hwloc::available()) { - const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - - unsigned threads_count = 0 ; - - threads_count = std::max( 1u , numa_count ) - * std::max( 2u , cores_per_numa * threads_per_core ); - - TestHostDevice::initialize( threads_count ); - } else { - const unsigned thread_count = 4 ; - TestHostDevice::initialize( thread_count ); - } - } - - static void TearDownTestCase() - { - TestHostDevice::finalize(); - } -}; - -//TEST_F( host, mdrange_lr ) { -// EXPECT_NO_THROW( (run_test_mdrange<TestHostDevice , Kokkos::LayoutRight> (5, 8, TestHostDeviceName) ) ); -//} - -//TEST_F( host, mdrange_ll ) { -// EXPECT_NO_THROW( (run_test_mdrange<TestHostDevice , Kokkos::LayoutLeft> (5, 8, TestHostDeviceName) ) ); -//} - -TEST_F( host, hexgrad ) { - EXPECT_NO_THROW(run_test_hexgrad< TestHostDevice>( 10, 20, TestHostDeviceName )); -} - -TEST_F( host, gramschmidt ) { - EXPECT_NO_THROW(run_test_gramschmidt< TestHostDevice>( 10, 20, TestHostDeviceName )); -} - -} // namespace Test - - diff --git a/lib/kokkos/core/perf_test/PerfTestMain.cpp b/lib/kokkos/core/perf_test/PerfTestMain.cpp index ac916308292076fc27231968715518b3f5c02f80..d80cfab8b58b87825f2b114b6bf7aed909555ad7 100644 --- a/lib/kokkos/core/perf_test/PerfTestMain.cpp +++ b/lib/kokkos/core/perf_test/PerfTestMain.cpp @@ -42,8 +42,37 @@ */ #include <gtest/gtest.h> +#include <Kokkos_Core.hpp> + +namespace Test { +int command_line_num_args(int n = 0) { + static int n_args = 0; + if(n>0) + n_args = n; + return n_args; +} + +const char* command_line_arg(int k, char** input_args = NULL) { + static char** args; + if(input_args != NULL) + args = input_args; + if(command_line_num_args() > k) + return args[k]; + else + return NULL; +} + +} int main(int argc, char *argv[]) { ::testing::InitGoogleTest(&argc,argv); - return RUN_ALL_TESTS(); + Kokkos::initialize(argc,argv); + + (void) Test::command_line_num_args(argc); + (void) Test::command_line_arg(0,argv); + + int result = RUN_ALL_TESTS(); + + Kokkos::finalize(); + return result; } diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp b/lib/kokkos/core/perf_test/PerfTest_Category.hpp similarity index 82% rename from lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp rename to lib/kokkos/core/perf_test/PerfTest_Category.hpp index 56524111aec939d0ff2b80196b5352a44f6919dd..02cee93ce9c0566d03bd2eb6686e61b0173d5ae7 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp +++ b/lib/kokkos/core/perf_test/PerfTest_Category.hpp @@ -41,16 +41,28 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP + +#include <gtest/gtest.h> namespace Test { -TEST_F( cuda, view_api_a ) -{ - typedef Kokkos::View< const int *, Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::RandomAccess> > view_texture_managed; - typedef Kokkos::View< const int *, Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::RandomAccess | Kokkos::Unmanaged> > view_texture_unmanaged; +extern int command_line_num_args(int n = 0); +extern const char* command_line_arg(int k, char** input_args = NULL); + +class default_exec : public ::testing::Test { +protected: + static void SetUpTestCase() { + } - TestViewAPI< double, Kokkos::Cuda >(); -} + static void TearDownTestCase() { + } +}; } // namespace Test + +#define TEST_CATEGORY default_exec +#define TEST_EXECSPACE Kokkos::DefaultExecutionSpace + +#endif diff --git a/lib/kokkos/core/perf_test/PerfTest_CustomReduction.cpp b/lib/kokkos/core/perf_test/PerfTest_CustomReduction.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dbce1ec53725ce8c1801e331a2fe726838f89583 --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTest_CustomReduction.cpp @@ -0,0 +1,115 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <gtest/gtest.h> +#include <PerfTest_Category.hpp> +#include <Kokkos_Random.hpp> + +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA +namespace Test { +template<class Scalar> +void custom_reduction_test(int N, int R, int num_trials) { + Kokkos::Random_XorShift64_Pool<> rand_pool(183291); + Kokkos::View<Scalar*> a("A",N); + Kokkos::fill_random(a,rand_pool,1.0); + + Scalar max; + + // Warm up + Kokkos::parallel_reduce(Kokkos::TeamPolicy<>(N/1024,32), KOKKOS_LAMBDA( const Kokkos::TeamPolicy<>::member_type& team, Scalar& lmax) { + Scalar team_max = Scalar(0); + for(int rr = 0; rr<R; rr++) { + int i = team.league_rank(); + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,32), [&] (const int& j, Scalar& thread_max) { + Scalar t_max = Scalar(0); + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,32), [&] (const int& k, Scalar& max_) { + const Scalar val = a((i*32 + j)*32 + k); + if(val>lmax) lmax = val; + if((k == 11) && (j==17) && (i==2)) lmax = 11.5; + },Kokkos::Experimental::Max<Scalar>(t_max)); + if(t_max>thread_max) thread_max = t_max; + },Kokkos::Experimental::Max<Scalar>(team_max)); + } + if(team_max>lmax) lmax = team_max; + },Kokkos::Experimental::Max<Scalar>(max)); + + // Timing + Kokkos::Timer timer; + for(int r = 0; r<num_trials; r++) { + Kokkos::parallel_reduce(Kokkos::TeamPolicy<>(N/1024,32), KOKKOS_LAMBDA( const Kokkos::TeamPolicy<>::member_type& team, Scalar& lmax) { + Scalar team_max = Scalar(0); + for(int rr = 0; rr<R; rr++) { + int i = team.league_rank(); + Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,32), [&] (const int& j, Scalar& thread_max) { + Scalar t_max = Scalar(0); + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,32), [&] (const int& k, Scalar& max_) { + const Scalar val = a((i*32 + j)*32 + k); + if(val>lmax) lmax = val; + if((k == 11) && (j==17) && (i==2)) lmax = 11.5; + },Kokkos::Experimental::Max<Scalar>(t_max)); + if(t_max>thread_max) thread_max = t_max; + },Kokkos::Experimental::Max<Scalar>(team_max)); + } + if(team_max>lmax) lmax = team_max; + },Kokkos::Experimental::Max<Scalar>(max)); + } + double time = timer.seconds(); + printf("%e %e %e\n",time,1.0*N*R*num_trials*sizeof(Scalar)/time/1024/1024/1024,max); +} + +TEST_F( default_exec, custom_reduction ) { + int N = 100000; + int R = 1000; + int num_trials = 1; + + if(command_line_num_args()>1) + N = atoi(command_line_arg(1)); + if(command_line_num_args()>2) + R = atoi(command_line_arg(2)); + if(command_line_num_args()>3) + num_trials = atoi(command_line_arg(3)); + custom_reduction_test<double>(N,R,num_trials); +} +} +#endif diff --git a/lib/kokkos/core/perf_test/run_mempool.sh b/lib/kokkos/core/perf_test/run_mempool.sh new file mode 100755 index 0000000000000000000000000000000000000000..e9b42c5a53fad43e2fc486fbf500660d1e932c97 --- /dev/null +++ b/lib/kokkos/core/perf_test/run_mempool.sh @@ -0,0 +1,25 @@ +#!/bin/bash -e +NT=$1 +PROG="./KokkosCore_PerformanceTest_Mempool" +COMMON_ARGS="--kokkos-threads=$NT --fill_stride=1 --fill_level=70 --chunk_span=5 --repeat_inner=100" + +postproc() { +cat log | head -n 1 | rev | cut -d ' ' -f 1 | rev >> xvals +cat log | tail -n 1 | rev | cut -d ' ' -f 1 | rev >> yvals +} + +for yset in 1 2 3 +do + rm -f xvals yvals + for x in 1 2 4 8 16 32 + do + echo "yset $yset x factor $x" + $PROG $COMMON_ARGS --alloc_size=`expr $x \* 1000000` --super_size=`expr $x \* 100000` > log + postproc + done + rm -f yvals$yset + mv yvals yvals$yset +done + +rm -f datapoints +paste -d',' xvals yvals1 yvals2 yvals3 > datapoints diff --git a/lib/kokkos/core/perf_test/run_mempool_fill.sh b/lib/kokkos/core/perf_test/run_mempool_fill.sh new file mode 100755 index 0000000000000000000000000000000000000000..cdd756b4873915a99d4531e260704640e7749fee --- /dev/null +++ b/lib/kokkos/core/perf_test/run_mempool_fill.sh @@ -0,0 +1,21 @@ +#!/bin/bash -e +NT=$1 +PROG="./KokkosCore_PerformanceTest_Mempool" +COMMON_ARGS="--kokkos-threads=$NT --fill_stride=1 --alloc_size=10027008 --super_size=65536 --repeat_inner=100 --chunk_span=4 --repeat_outer=10" + +postproc() { +cat log | grep "fill ops per second" | rev | cut -d ' ' -f 2 | rev >> yvals_fill +cat log | grep "cycle ops per second" | rev | cut -d ' ' -f 2 | rev >> yvals_cycle +} + +rm -f xvals yvals_fill yvals_cycle +for x in 75 95 +do + echo "test fill level $x" + echo $x >> xvals + $PROG $COMMON_ARGS --fill_level=$x 2>&1 | tee log + postproc +done + +rm -f datapoints +paste xvals yvals_fill yvals_cycle > datapoints.txt diff --git a/lib/kokkos/core/perf_test/run_taskdag.sh b/lib/kokkos/core/perf_test/run_taskdag.sh new file mode 100755 index 0000000000000000000000000000000000000000..dcb016c9d54cc5a8111f07b47c6d769098681253 --- /dev/null +++ b/lib/kokkos/core/perf_test/run_taskdag.sh @@ -0,0 +1,21 @@ +#!/bin/bash -e +NT=$1 +PROG="./KokkosCore_PerformanceTest_TaskDAG" +COMMON_ARGS="--kokkos-threads=$NT --alloc_size=10027008 --super_size=65536 --repeat_outer=10" + +postproc() { +cat log | grep "tasks per second" | rev | cut -d ' ' -f 2 | rev >> yvals +} + +rm -f xvals yvals +for x in 21 23 +do + echo "test input $x" + echo $x >> xvals + $PROG $COMMON_ARGS --input=$x 2>&1 | tee log + postproc +done + +rm -f datapoints.txt +paste xvals yvals > datapoints.txt + diff --git a/lib/kokkos/core/perf_test/test_mempool.cpp b/lib/kokkos/core/perf_test/test_mempool.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7759c28275c04a48263151b2edffa2bb5c2cb371 --- /dev/null +++ b/lib/kokkos/core/perf_test/test_mempool.cpp @@ -0,0 +1,357 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cstdio> +#include <cstring> +#include <cstdlib> +#include <limits> + +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Timer.hpp> + +using ExecSpace = Kokkos::DefaultExecutionSpace ; +using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space ; + +using MemoryPool = Kokkos::MemoryPool< ExecSpace > ; + +struct TestFunctor { + + typedef Kokkos::View< uintptr_t * , ExecSpace > ptrs_type ; + + enum : unsigned { chunk = 32 }; + + MemoryPool pool ; + ptrs_type ptrs ; + unsigned chunk_span ; + unsigned fill_stride ; + unsigned range_iter ; + unsigned repeat_inner ; + + TestFunctor( size_t total_alloc_size + , unsigned min_superblock_size + , unsigned number_alloc + , unsigned arg_stride_alloc + , unsigned arg_chunk_span + , unsigned arg_repeat ) + : pool() + , ptrs() + , chunk_span(0) + , fill_stride(0) + , repeat_inner(0) + { + MemorySpace m ; + + const unsigned min_block_size = chunk ; + const unsigned max_block_size = chunk * arg_chunk_span ; + pool = MemoryPool( m , total_alloc_size + , min_block_size + , max_block_size + , min_superblock_size ); + + ptrs = ptrs_type( Kokkos::view_alloc( m , "ptrs") , number_alloc ); + fill_stride = arg_stride_alloc ; + chunk_span = arg_chunk_span ; + range_iter = fill_stride * number_alloc ; + repeat_inner = arg_repeat ; + } + + //---------------------------------------- + + typedef long value_type ; + + //---------------------------------------- + + struct TagFill {}; + + KOKKOS_INLINE_FUNCTION + void operator()( TagFill , int i , value_type & update ) const noexcept + { + if ( 0 == i % fill_stride ) { + + const int j = i / fill_stride ; + + const unsigned size_alloc = chunk * ( 1 + ( j % chunk_span ) ); + + ptrs(j) = (uintptr_t) pool.allocate(size_alloc); + + if ( ptrs(j) ) ++update ; + } + } + + bool test_fill() + { + typedef Kokkos::RangePolicy< ExecSpace , TagFill > policy ; + + long result = 0 ; + + Kokkos::parallel_reduce( policy(0,range_iter), *this , result ); + + if ( result == long(ptrs.extent(0)) ) return true; + pool.print_state( std::cerr ); + return false; + } + + //---------------------------------------- + + struct TagDel {}; + + KOKKOS_INLINE_FUNCTION + void operator()( TagDel , int i ) const noexcept + { + if ( 0 == i % fill_stride ) { + + const int j = i / fill_stride ; + + const unsigned size_alloc = chunk * ( 1 + ( j % chunk_span ) ); + + pool.deallocate( (void*) ptrs(j) , size_alloc ); + } + } + + void test_del() + { + typedef Kokkos::RangePolicy< ExecSpace , TagDel > policy ; + + Kokkos::parallel_for( policy(0,range_iter), *this ); + } + + //---------------------------------------- + + struct TagAllocDealloc {}; + + KOKKOS_INLINE_FUNCTION + void operator()( TagAllocDealloc , int i , long & update ) const noexcept + { + if ( 0 == i % fill_stride ) { + + const int j = i / fill_stride ; + + if ( 0 == j % 3 ) { + + for ( unsigned k = 0 ; k < repeat_inner ; ++k ) { + + const unsigned size_alloc = chunk * ( 1 + ( j % chunk_span ) ); + + pool.deallocate( (void*) ptrs(j) , size_alloc ); + + ptrs(j) = (uintptr_t) pool.allocate(size_alloc); + + if ( 0 == ptrs(j) ) update++ ; + } + } + } + } + + bool test_alloc_dealloc() + { + typedef Kokkos::RangePolicy< ExecSpace , TagAllocDealloc > policy ; + + long error_count = 0 ; + + Kokkos::parallel_reduce( policy(0,range_iter), *this , error_count ); + + return 0 == error_count ; + } +}; + + + +int main( int argc , char* argv[] ) +{ + static const char help_flag[] = "--help" ; + static const char alloc_size_flag[] = "--alloc_size=" ; + static const char super_size_flag[] = "--super_size=" ; + static const char chunk_span_flag[] = "--chunk_span=" ; + static const char fill_stride_flag[] = "--fill_stride=" ; + static const char fill_level_flag[] = "--fill_level=" ; + static const char repeat_outer_flag[] = "--repeat_outer=" ; + static const char repeat_inner_flag[] = "--repeat_inner=" ; + + long total_alloc_size = 1000000 ; + int min_superblock_size = 10000 ; + int chunk_span = 5 ; + int fill_stride = 1 ; + int fill_level = 70 ; + int repeat_outer = 1 ; + int repeat_inner = 1 ; + + int ask_help = 0 ; + + for(int i=1;i<argc;i++) + { + const char * const a = argv[i]; + + if ( ! strncmp(a,help_flag,strlen(help_flag) ) ) ask_help = 1 ; + + if ( ! strncmp(a,alloc_size_flag,strlen(alloc_size_flag) ) ) + total_alloc_size = atol( a + strlen(alloc_size_flag) ); + + if ( ! strncmp(a,super_size_flag,strlen(super_size_flag) ) ) + min_superblock_size = atoi( a + strlen(super_size_flag) ); + + if ( ! strncmp(a,fill_stride_flag,strlen(fill_stride_flag) ) ) + fill_stride = atoi( a + strlen(fill_stride_flag) ); + + if ( ! strncmp(a,fill_level_flag,strlen(fill_level_flag) ) ) + fill_level = atoi( a + strlen(fill_level_flag) ); + + if ( ! strncmp(a,chunk_span_flag,strlen(chunk_span_flag) ) ) + chunk_span = atoi( a + strlen(chunk_span_flag) ); + + if ( ! strncmp(a,repeat_outer_flag,strlen(repeat_outer_flag) ) ) + repeat_outer = atoi( a + strlen(repeat_outer_flag) ); + + if ( ! strncmp(a,repeat_inner_flag,strlen(repeat_inner_flag) ) ) + repeat_inner = atoi( a + strlen(repeat_inner_flag) ); + } + + int chunk_span_bytes = 0; + for (int i = 0; i < chunk_span; ++i) { + auto chunk_bytes = TestFunctor::chunk * ( 1 + i ); + if (chunk_bytes < 64) chunk_bytes = 64; + auto block_bytes_lg2 = Kokkos::Impl::integral_power_of_two_that_contains( chunk_bytes ); + auto block_bytes = (1 << block_bytes_lg2); + chunk_span_bytes += block_bytes; + } + auto actual_superblock_bytes_lg2 = Kokkos::Impl::integral_power_of_two_that_contains( min_superblock_size ); + auto actual_superblock_bytes = (1 << actual_superblock_bytes_lg2); + auto superblock_mask = actual_superblock_bytes - 1; + auto nsuperblocks = (total_alloc_size + superblock_mask) >> actual_superblock_bytes_lg2; + auto actual_total_bytes = nsuperblocks * actual_superblock_bytes; + auto bytes_wanted = (actual_total_bytes * fill_level) / 100; + auto chunk_spans = bytes_wanted / chunk_span_bytes; + auto number_alloc = int( chunk_spans * chunk_span ); + + if ( ask_help ) { + std::cout << "command line options:" + << " " << help_flag + << " " << alloc_size_flag << "##" + << " " << super_size_flag << "##" + << " " << fill_stride_flag << "##" + << " " << fill_level_flag << "##" + << " " << chunk_span_flag << "##" + << " " << repeat_outer_flag << "##" + << " " << repeat_inner_flag << "##" + << std::endl ; + return 0; + } + + Kokkos::initialize(argc,argv); + + double sum_fill_time = 0; + double sum_cycle_time = 0; + double sum_both_time = 0; + double min_fill_time = std::numeric_limits<double>::max(); + double min_cycle_time = std::numeric_limits<double>::max(); + double min_both_time = std::numeric_limits<double>::max(); + //one alloc in fill, alloc/dealloc pair in repeat_inner + for ( int i = 0 ; i < repeat_outer ; ++i ) { + + TestFunctor functor( total_alloc_size + , min_superblock_size + , number_alloc + , fill_stride + , chunk_span + , repeat_inner ); + + Kokkos::Impl::Timer timer ; + + if ( ! functor.test_fill() ) { + Kokkos::abort("fill "); + } + + auto t0 = timer.seconds(); + + if ( ! functor.test_alloc_dealloc() ) { + Kokkos::abort("alloc/dealloc "); + } + + auto t1 = timer.seconds(); + auto this_fill_time = t0; + auto this_cycle_time = t1 - t0; + auto this_both_time = t1; + sum_fill_time += this_fill_time; + sum_cycle_time += this_cycle_time; + sum_both_time += this_both_time; + min_fill_time = std::min(min_fill_time, this_fill_time); + min_cycle_time = std::min(min_cycle_time, this_cycle_time); + min_both_time = std::min(min_both_time, this_both_time); + } + + Kokkos::finalize(); + + printf( "\"mempool: alloc super stride level span inner outer number\" %ld %d %d %d %d %d %d %d\n" + , total_alloc_size + , min_superblock_size + , fill_stride + , fill_level + , chunk_span + , repeat_inner + , repeat_outer + , number_alloc ); + + auto avg_fill_time = sum_fill_time / repeat_outer; + auto avg_cycle_time = sum_cycle_time / repeat_outer; + auto avg_both_time = sum_both_time / repeat_outer; + + printf( "\"mempool: fill time (min, avg)\" %.8f %.8f\n" + , min_fill_time + , avg_fill_time ); + + printf( "\"mempool: cycle time (min, avg)\" %.8f %.8f\n" + , min_cycle_time + , avg_cycle_time ); + + printf( "\"mempool: test time (min, avg)\" %.8f %.8f\n" + , min_both_time + , avg_both_time ); + + printf( "\"mempool: fill ops per second (max, avg)\" %g %g\n" + , number_alloc / min_fill_time + , number_alloc / avg_fill_time ); + + printf( "\"mempool: cycle ops per second (max, avg)\" %g %g\n" + , (2 * number_alloc * repeat_inner) / min_cycle_time + , (2 * number_alloc * repeat_inner) / avg_cycle_time ); +} + diff --git a/lib/kokkos/core/perf_test/test_taskdag.cpp b/lib/kokkos/core/perf_test/test_taskdag.cpp new file mode 100644 index 0000000000000000000000000000000000000000..221182c50b548b8ae2cde702a95a0010765a2a9b --- /dev/null +++ b/lib/kokkos/core/perf_test/test_taskdag.cpp @@ -0,0 +1,284 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#if ! defined( KOKKOS_ENABLE_TASKDAG ) || \ + defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) + +int main() +{ + return 0 ; +} + +#else + +#include <cstdio> +#include <cstring> +#include <cstdlib> +#include <limits> + +#include <impl/Kokkos_Timer.hpp> + +using ExecSpace = Kokkos::DefaultExecutionSpace ; + +inline +long eval_fib( long n ) +{ + constexpr long mask = 0x03; + + long fib[4] = { 0, 1, 0, 0 }; + + for ( long i = 2; i <= n; ++i ) { + fib[ i & mask ] = fib[ ( i - 1 ) & mask ] + fib[ ( i - 2 ) & mask ]; + } + + return fib[ n & mask ]; +} + +inline +long fib_alloc_count( long n ) +{ + constexpr long mask = 0x03; + + long count[4] = { 1, 1, 0, 0 }; + + for ( long i = 2; i <= n; ++i ) { + count[ i & mask ] = 2 // this task plus the 'when_all' task + + count[ ( i - 1 ) & mask ] + + count[ ( i - 2 ) & mask ]; + } + + return count[ n & mask ]; +} + +template< class Space > +struct TestFib { + + using Scheduler = Kokkos::TaskScheduler< Space > ; + using MemorySpace = typename Scheduler::memory_space ; + using MemberType = typename Scheduler::member_type ; + using FutureType = Kokkos::Future< long , Space > ; + + typedef long value_type ; + + Scheduler sched ; + FutureType dep[2] ; + const value_type n ; + + KOKKOS_INLINE_FUNCTION + TestFib( const Scheduler & arg_sched , const value_type arg_n ) + : sched( arg_sched ), dep{} , n( arg_n ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const MemberType & , value_type & result ) noexcept + { + if ( n < 2 ) { + result = n ; + } + else if ( ! dep[0].is_null() && ! dep[1].is_null() ) { + result = dep[0].get() + dep[1].get(); + } + else { + // Spawn new children and respawn myself to sum their results. + // Spawn lower value at higher priority as it has a shorter + // path to completion. + + dep[1] = Kokkos::task_spawn + ( Kokkos::TaskSingle( sched, Kokkos::TaskPriority::High ) + , TestFib( sched, n - 2 ) ); + + dep[0] = Kokkos::task_spawn + ( Kokkos::TaskSingle( sched ) + , TestFib( sched, n - 1 ) ); + + Kokkos::Future< ExecSpace > fib_all = Kokkos::when_all( dep, 2 ); + + if ( ! dep[0].is_null() && ! dep[1].is_null() && ! fib_all.is_null() ) { + // High priority to retire this branch. + Kokkos::respawn( this, fib_all, Kokkos::TaskPriority::High ); + } + else { + Kokkos::abort("Failed nested task spawn (allocation)"); + } + } + } +}; + + + +int main( int argc , char* argv[] ) +{ + static const char help[] = "--help" ; + static const char alloc_size[] = "--alloc_size=" ; + static const char super_size[] = "--super_size=" ; + static const char repeat_outer[] = "--repeat_outer=" ; + static const char input_value[] = "--input=" ; + + long total_alloc_size = 1000000 ; + int min_superblock_size = 10000 ; + int test_repeat_outer = 1 ; + int fib_input = 4 ; + + int ask_help = 0 ; + + for(int i=1;i<argc;i++) + { + const char * const a = argv[i]; + + if ( ! strncmp(a,help,strlen(help) ) ) ask_help = 1 ; + + if ( ! strncmp(a,alloc_size,strlen(alloc_size) ) ) + total_alloc_size = atol( a + strlen(alloc_size) ); + + if ( ! strncmp(a,super_size,strlen(super_size) ) ) + min_superblock_size = atoi( a + strlen(super_size) ); + + if ( ! strncmp(a,repeat_outer,strlen(repeat_outer) ) ) + test_repeat_outer = atoi( a + strlen(repeat_outer) ); + + if ( ! strncmp(a,input_value,strlen(input_value) ) ) + fib_input = atoi( a + strlen(input_value) ); + } + + const long fib_output = eval_fib( fib_input ); + const long number_alloc = fib_alloc_count( fib_input ); + + const unsigned min_block_size = 32 ; + const unsigned max_block_size = 128 ; + + long task_count_max = 0 ; + long task_count_accum = 0 ; + long test_result = 0 ; + + if ( ask_help ) { + std::cout << "command line options:" + << " " << help + << " " << alloc_size << "##" + << " " << super_size << "##" + << " " << input_value << "##" + << " " << repeat_outer << "##" + << std::endl ; + return -1; + } + + typedef TestFib< ExecSpace > Functor ; + + Kokkos::initialize(argc,argv); + + Functor::Scheduler sched( Functor::MemorySpace() + , total_alloc_size + , min_block_size + , max_block_size + , min_superblock_size + ); + + Functor::FutureType f = + Kokkos::host_spawn( Kokkos::TaskSingle( sched ) + , Functor( sched , fib_input ) + ); + + Kokkos::wait( sched ); + + test_result = f.get(); + + task_count_max = sched.allocated_task_count_max(); + task_count_accum = sched.allocated_task_count_accum(); + + if ( number_alloc != task_count_accum ) { + std::cout << " number_alloc( " << number_alloc << " )" + << " != task_count_accum( " << task_count_accum << " )" + << std::endl ; + } + + if ( fib_output != test_result ) { + std::cout << " answer( " << fib_output << " )" + << " != result( " << test_result << " )" + << std::endl ; + } + + if ( fib_output != test_result || number_alloc != task_count_accum ) { + printf(" TEST FAILED\n"); + return -1; + } + + double min_time = std::numeric_limits<double>::max(); + double time_sum = 0; + + for ( int i = 0 ; i < test_repeat_outer ; ++i ) { + Kokkos::Impl::Timer timer ; + + Functor::FutureType ftmp = + Kokkos::host_spawn( Kokkos::TaskSingle( sched ) + , Functor( sched , fib_input ) + ); + + Kokkos::wait( sched ); + auto this_time = timer.seconds(); + min_time = std::min(min_time, this_time); + time_sum += this_time; + } + + auto avg_time = time_sum / test_repeat_outer; + + Kokkos::finalize(); + + printf( "\"taskdag: alloc super repeat input output task-accum task-max\" %ld %d %d %d %ld %ld %ld\n" + , total_alloc_size + , min_superblock_size + , test_repeat_outer + , fib_input + , fib_output + , task_count_accum + , task_count_max ); + + printf( "\"taskdag: time (min, avg)\" %g %g\n", min_time, avg_time); + printf( "\"taskdag: tasks per second (max, avg)\" %g %g\n" + , number_alloc / min_time + , number_alloc / avg_time ); + + return 0 ; +} + +#endif + diff --git a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile.hpp b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile.hpp index e0eadb25a005f09e1c9d37400bd76a611cc4eb3b..341404b9c37a6d7d2cf00fc1889613da5c63a202 100644 --- a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile.hpp +++ b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile.hpp @@ -44,15 +44,13 @@ #ifndef KOKKOS_CUDA_EXP_ITERATE_TILE_HPP #define KOKKOS_CUDA_EXP_ITERATE_TILE_HPP -#include <iostream> -#include <algorithm> -#include <stdio.h> - #include <Kokkos_Macros.hpp> - -/* only compile this file if CUDA is enabled for Kokkos */ #if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA ) +#include <iostream> +#include <algorithm> +#include <cstdio> + #include <utility> //#include<Cuda/Kokkos_CudaExec.hpp> @@ -1298,3 +1296,4 @@ protected: #endif #endif + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp index a273db998ba808726f4d9b5bc17bfc10347952ed..13abcfd93c241f7601a58c3003b54dd5b281936b 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,11 +45,10 @@ #define KOKKOS_CUDAEXEC_HPP #include <Kokkos_Macros.hpp> - -/* only compile this file if CUDA is enabled for Kokkos */ #ifdef KOKKOS_ENABLE_CUDA #include <string> +#include <cstdint> #include <Kokkos_Parallel.hpp> #include <impl/Kokkos_Error.hpp> #include <Cuda/Kokkos_Cuda_abort.hpp> @@ -99,6 +98,8 @@ CudaSpace::size_type cuda_internal_maximum_warp_count(); CudaSpace::size_type cuda_internal_maximum_grid_count(); CudaSpace::size_type cuda_internal_maximum_shared_words(); +CudaSpace::size_type cuda_internal_maximum_concurrent_block_count(); + CudaSpace::size_type * cuda_internal_scratch_flags( const CudaSpace::size_type size ); CudaSpace::size_type * cuda_internal_scratch_space( const CudaSpace::size_type size ); CudaSpace::size_type * cuda_internal_scratch_unified( const CudaSpace::size_type size ); @@ -146,7 +147,7 @@ Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ; namespace Kokkos { namespace Impl { - void* cuda_resize_scratch_space(size_t bytes, bool force_shrink = false); + void* cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink = false); } } @@ -319,3 +320,4 @@ struct CudaParallelLaunch< DriverType , false > { #endif /* defined( __CUDACC__ ) */ #endif /* defined( KOKKOS_ENABLE_CUDA ) */ #endif /* #ifndef KOKKOS_CUDAEXEC_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index 303b3fa4f699f0e56c7d44682197bd050b2ac7ca..406b4f1e228065cffe087e61765ec38c5278ff23 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -41,16 +41,15 @@ //@HEADER */ -#include <stdlib.h> +#include <Kokkos_Macros.hpp> +#ifdef KOKKOS_ENABLE_CUDA + +#include <cstdlib> #include <iostream> #include <sstream> #include <stdexcept> #include <algorithm> #include <atomic> -#include <Kokkos_Macros.hpp> - -/* only compile this file if CUDA is enabled for Kokkos */ -#ifdef KOKKOS_ENABLE_CUDA #include <Kokkos_Core.hpp> #include <Kokkos_Cuda.hpp> @@ -890,9 +889,9 @@ void init_lock_arrays_cuda_space() { } } -void* cuda_resize_scratch_space(size_t bytes, bool force_shrink) { +void* cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink) { static void* ptr = NULL; - static size_t current_size = 0; + static std::int64_t current_size = 0; if(current_size == 0) { current_size = bytes; ptr = Kokkos::kokkos_malloc<Kokkos::CudaSpace>("CudaSpace::ScratchMemory",current_size); @@ -911,5 +910,7 @@ void* cuda_resize_scratch_space(size_t bytes, bool force_shrink) { } } +#else +void KOKKOS_CORE_SRC_CUDA_CUDASPACE_PREVENT_LINK_ERROR() {} #endif // KOKKOS_ENABLE_CUDA diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp index 49c228f86ae9ef5d9905a67d5e14a288104f06f7..391a881e2dcb320b2cb8ba25e09002e2eb9232a9 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,8 +45,6 @@ #define KOKKOS_CUDA_ALLOCATION_TRACKING_HPP #include <Kokkos_Macros.hpp> - -/* only compile this file if CUDA is enabled for Kokkos */ #ifdef KOKKOS_ENABLE_CUDA #include <impl/Kokkos_Traits.hpp> @@ -75,7 +73,7 @@ shared_allocation_record( Kokkos::CudaSpace const & arg_space new( functor ) DestructFunctor( arg_destruct ); record->m_destruct_functor = & shared_allocation_destroy< DestructFunctor > ; - + return record ; } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp index e58e1f58de304f19f9e950301bd6bb644fa37dd1..bc54954418dbdb5212b7701af65c60fb66f605f9 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,8 +45,6 @@ #define KOKKOS_CUDA_ERROR_HPP #include <Kokkos_Macros.hpp> - -/* only compile this file if CUDA is enabled for Kokkos */ #ifdef KOKKOS_ENABLE_CUDA namespace Kokkos { namespace Impl { @@ -67,3 +65,4 @@ inline void cuda_internal_safe_call( cudaError e , const char * name, const char #endif //KOKKOS_ENABLE_CUDA #endif //KOKKOS_CUDA_ERROR_HPP + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp index 44d908d1023197c5a8d0232a3d13ff49d06ef8d9..daf55cbd97b9931364f14a4f40b91a3e119bff00 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp @@ -44,11 +44,11 @@ /*--------------------------------------------------------------------------*/ /* Kokkos interfaces */ -#include <Kokkos_Core.hpp> - -/* only compile this file if CUDA is enabled for Kokkos */ +#include <Kokkos_Macros.hpp> #ifdef KOKKOS_ENABLE_CUDA +#include <Kokkos_Core.hpp> + #include <Cuda/Kokkos_Cuda_Error.hpp> #include <Cuda/Kokkos_Cuda_Internal.hpp> #include <impl/Kokkos_Error.hpp> @@ -56,7 +56,7 @@ /*--------------------------------------------------------------------------*/ /* Standard 'C' libraries */ -#include <stdlib.h> +#include <cstdlib> /* Standard 'C++' libraries */ #include <vector> @@ -404,9 +404,23 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) // Query what compute capability architecture a kernel executes: m_cudaArch = cuda_kernel_arch(); - if ( m_cudaArch != cudaProp.major * 100 + cudaProp.minor * 10 ) { + int compiled_major = m_cudaArch / 100; + int compiled_minor = ( m_cudaArch % 100 ) / 10; + + if ( compiled_major < 5 && cudaProp.major >= 5 ) { + std::stringstream ss; + ss << "Kokkos::Cuda::initialize ERROR: running kernels compiled for compute capability " + << compiled_major << "." << compiled_minor + << " (< 5.0) on device with compute capability " + << cudaProp.major << "." << cudaProp.minor + << " (>=5.0), this would give incorrect results!" + << std::endl ; + std::string msg = ss.str(); + Kokkos::abort( msg.c_str() ); + } + if ( compiled_major != cudaProp.major || compiled_minor != cudaProp.minor ) { std::cerr << "Kokkos::Cuda::initialize WARNING: running kernels compiled for compute capability " - << ( m_cudaArch / 100 ) << "." << ( ( m_cudaArch % 100 ) / 10 ) + << compiled_major << "." << compiled_minor << " on device with compute capability " << cudaProp.major << "." << cudaProp.minor << " , this will likely reduce potential performance." @@ -661,6 +675,15 @@ void CudaInternal::finalize() Cuda::size_type cuda_internal_multiprocessor_count() { return CudaInternal::singleton().m_multiProcCount ; } +CudaSpace::size_type cuda_internal_maximum_concurrent_block_count() +{ + // Compute capability 5.0 through 6.2 + enum : int { max_resident_blocks_per_multiprocessor = 32 }; + + return CudaInternal::singleton().m_multiProcCount + * max_resident_blocks_per_multiprocessor ; +}; + Cuda::size_type cuda_internal_maximum_warp_count() { return CudaInternal::singleton().m_maxWarpCount ; } @@ -772,8 +795,10 @@ void Cuda::fence() Kokkos::Impl::cuda_device_synchronize(); } -} // namespace Kokkos +const char* Cuda::name() { return "Cuda"; } +} // namespace Kokkos +#else +void KOKKOS_CORE_SRC_CUDA_IMPL_PREVENT_LINK_ERROR() {} #endif // KOKKOS_ENABLE_CUDA -//---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp index c75b9f1ddf6b8a410a3ccc15cf76c589beb22762..148d9f44eeed8f22efe624658500c834ad066833 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,19 +36,18 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ #ifndef KOKKOS_CUDA_INTERNAL_HPP #define KOKKOS_CUDA_INTERNAL_HPP -#include<iostream> -#include <Kokkos_Macros.hpp> -/* only compile this file if CUDA is enabled for Kokkos */ +#include <Kokkos_Macros.hpp> #ifdef KOKKOS_ENABLE_CUDA +#include<iostream> #include <Cuda/Kokkos_Cuda_Error.hpp> namespace Kokkos { namespace Impl { diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp index 56e6a3c1e34123d8fc58dbfffea0574acea31047..0c8c700e8f1e2a2a33789ec56ef5d5ac80f4496c 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -44,15 +44,14 @@ #ifndef KOKKOS_CUDA_PARALLEL_HPP #define KOKKOS_CUDA_PARALLEL_HPP -#include <iostream> -#include <algorithm> -#include <stdio.h> - #include <Kokkos_Macros.hpp> - -/* only compile this file if CUDA is enabled for Kokkos */ #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA ) +#include <iostream> +#include <algorithm> +#include <cstdio> +#include <cstdint> + #include <utility> #include <Kokkos_Parallel.hpp> @@ -72,166 +71,6 @@ namespace Kokkos { namespace Impl { -template< typename Type > -struct CudaJoinFunctor { - typedef Type value_type ; - - KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , - volatile const value_type & input ) - { update += input ; } -}; - -class CudaTeamMember { -private: - - typedef Kokkos::Cuda execution_space ; - typedef execution_space::scratch_memory_space scratch_memory_space ; - - void * m_team_reduce ; - scratch_memory_space m_team_shared ; - int m_league_rank ; - int m_league_size ; - -public: - - KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space & team_shmem() const - { return m_team_shared.set_team_thread_mode(0,1,0) ; } - KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space & team_scratch(const int& level) const - { return m_team_shared.set_team_thread_mode(level,1,0) ; } - KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space & thread_scratch(const int& level) const - { return m_team_shared.set_team_thread_mode(level,team_size(),team_rank()) ; } - - KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } - KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } - KOKKOS_INLINE_FUNCTION int team_rank() const { - #ifdef __CUDA_ARCH__ - return threadIdx.y ; - #else - return 1; - #endif - } - KOKKOS_INLINE_FUNCTION int team_size() const { - #ifdef __CUDA_ARCH__ - return blockDim.y ; - #else - return 1; - #endif - } - - KOKKOS_INLINE_FUNCTION void team_barrier() const { - #ifdef __CUDA_ARCH__ - __syncthreads(); - #endif - } - - template<class ValueType> - KOKKOS_INLINE_FUNCTION void team_broadcast(ValueType& value, const int& thread_id) const { - #ifdef __CUDA_ARCH__ - __shared__ ValueType sh_val; - if(threadIdx.x == 0 && threadIdx.y == thread_id) { - sh_val = value; - } - team_barrier(); - value = sh_val; - team_barrier(); - #endif - } - - template< class ValueType, class JoinOp > - KOKKOS_INLINE_FUNCTION - typename JoinOp::value_type team_reduce( const ValueType & value - , const JoinOp & op_in ) const { - #ifdef __CUDA_ARCH__ - typedef JoinLambdaAdapter<ValueType,JoinOp> JoinOpFunctor ; - const JoinOpFunctor op(op_in); - ValueType * const base_data = (ValueType *) m_team_reduce ; - - __syncthreads(); // Don't write in to shared data until all threads have entered this function - - if ( 0 == threadIdx.y ) { base_data[0] = 0 ; } - - base_data[ threadIdx.y ] = value ; - - Impl::cuda_intra_block_reduce_scan<false,JoinOpFunctor,void>( op , base_data ); - - return base_data[ blockDim.y - 1 ]; - #else - return typename JoinOp::value_type(); - #endif - } - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the - * league's parallel execution, be the scan's total. - * Parallel execution ordering of the league's teams is non-deterministic. - * As such the base value for each team's scan operation is similarly - * non-deterministic. - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const { - #ifdef __CUDA_ARCH__ - Type * const base_data = (Type *) m_team_reduce ; - - __syncthreads(); // Don't write in to shared data until all threads have entered this function - - if ( 0 == threadIdx.y ) { base_data[0] = 0 ; } - - base_data[ threadIdx.y + 1 ] = value ; - - Impl::cuda_intra_block_reduce_scan<true,Impl::CudaJoinFunctor<Type>,void>( Impl::CudaJoinFunctor<Type>() , base_data + 1 ); - - if ( global_accum ) { - if ( blockDim.y == threadIdx.y + 1 ) { - base_data[ blockDim.y ] = atomic_fetch_add( global_accum , base_data[ blockDim.y ] ); - } - __syncthreads(); // Wait for atomic - base_data[ threadIdx.y ] += base_data[ blockDim.y ] ; - } - - return base_data[ threadIdx.y ]; - #else - return Type(); - #endif - } - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value ; - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const { - return this->template team_scan<Type>( value , 0 ); - } - - //---------------------------------------- - // Private for the driver - - KOKKOS_INLINE_FUNCTION - CudaTeamMember( void * shared - , const int shared_begin - , const int shared_size - , void* scratch_level_1_ptr - , const int scratch_level_1_size - , const int arg_league_rank - , const int arg_league_size ) - : m_team_reduce( shared ) - , m_team_shared( ((char *)shared) + shared_begin , shared_size, scratch_level_1_ptr, scratch_level_1_size) - , m_league_rank( arg_league_rank ) - , m_league_size( arg_league_size ) - {} - -}; - -} // namespace Impl - -namespace Impl { template< class ... Properties > class TeamPolicyInternal< Kokkos::Cuda , Properties ... >: public PolicyTraits<Properties ... > { @@ -315,10 +154,10 @@ public: if(team_size_<0) team_size_ = m_team_size; return m_team_scratch_size[level] + team_size_*m_thread_scratch_size[level]; } - inline size_t team_scratch_size(int level) const { + inline int team_scratch_size(int level) const { return m_team_scratch_size[level]; } - inline size_t thread_scratch_size(int level) const { + inline int thread_scratch_size(int level) const { return m_thread_scratch_size[level]; } @@ -458,6 +297,7 @@ public: typedef Kokkos::Impl::CudaTeamMember member_type ; }; + } // namspace Impl } // namespace Kokkos @@ -559,14 +399,14 @@ private: // [ team shared space ] // - const FunctorType m_functor ; - const size_type m_league_size ; - const size_type m_team_size ; - const size_type m_vector_size ; - const size_type m_shmem_begin ; - const size_type m_shmem_size ; - void* m_scratch_ptr[2] ; - const int m_scratch_size[2] ; + const FunctorType m_functor ; + const size_type m_league_size ; + const size_type m_team_size ; + const size_type m_vector_size ; + const int m_shmem_begin ; + const int m_shmem_size ; + void* m_scratch_ptr[2] ; + const int m_scratch_size[2] ; template< class TagType > __device__ inline @@ -619,6 +459,11 @@ public: , league_rank , m_league_size ) ); } + if ( m_scratch_size[1]>0 ) { + __syncthreads(); + if (threadIdx.x==0 && threadIdx.y==0 ) + kokkos_impl_cuda_lock_arrays.atomic[threadid]=0; + } } inline @@ -885,7 +730,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( reducer ) - , m_result_ptr( reducer.result_view().ptr_on_device() ) + , m_result_ptr( reducer.view().ptr_on_device() ) , m_scratch_space( 0 ) , m_scratch_flags( 0 ) , m_unified_space( 0 ) @@ -894,6 +739,8 @@ public: //---------------------------------------------------------------------------- +#if 1 + template< class FunctorType , class ReducerType, class ... Properties > class ParallelReduce< FunctorType , Kokkos::TeamPolicy< Properties ... > @@ -990,6 +837,11 @@ public: } run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0), threadid ); + if ( m_scratch_size[1]>0 ) { + __syncthreads(); + if (threadIdx.x==0 && threadIdx.y==0 ) + kokkos_impl_cuda_lock_arrays.atomic[threadid]=0; + } } __device__ inline @@ -1032,6 +884,7 @@ public: for ( unsigned i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i]; } } + } __device__ inline @@ -1148,7 +1001,7 @@ public: m_team_begin = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem<false,FunctorType,WorkTag>( arg_functor , m_team_size ); m_shmem_begin = sizeof(double) * ( m_team_size + 2 ); m_shmem_size = arg_policy.scratch_size(0,m_team_size) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , m_team_size ); - m_scratch_ptr[1] = cuda_resize_scratch_space(m_scratch_size[1]*(Cuda::concurrency()/(m_team_size*m_vector_size))); + m_scratch_ptr[1] = cuda_resize_scratch_space(static_cast<std::int64_t>(m_scratch_size[1])*(static_cast<std::int64_t>(Cuda::concurrency()/(m_team_size*m_vector_size)))); m_scratch_size[0] = m_shmem_size; m_scratch_size[1] = arg_policy.scratch_size(1,m_team_size); @@ -1184,7 +1037,7 @@ public: , const ReducerType & reducer) : m_functor( arg_functor ) , m_reducer( reducer ) - , m_result_ptr( reducer.result_view().ptr_on_device() ) + , m_result_ptr( reducer.view().ptr_on_device() ) , m_scratch_space( 0 ) , m_scratch_flags( 0 ) , m_unified_space( 0 ) @@ -1237,6 +1090,364 @@ public: } }; +//---------------------------------------------------------------------------- +#else +//---------------------------------------------------------------------------- + +template< class FunctorType , class ReducerType, class ... Properties > +class ParallelReduce< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , ReducerType + , Kokkos::Cuda + > +{ +private: + + enum : int { align_scratch_value = 0x0100 /* 256 */ }; + enum : int { align_scratch_mask = align_scratch_value - 1 }; + + KOKKOS_INLINE_FUNCTION static constexpr + int align_scratch( const int n ) + { + return ( n & align_scratch_mask ) + ? n + align_scratch_value - ( n & align_scratch_mask ) : n ; + } + + //---------------------------------------- + // Reducer does not wrap a functor + template< class R = ReducerType , class F = void > + struct reducer_type : public R { + + template< class S > + using rebind = reducer_type< typename R::rebind<S> , void > ; + + KOKKOS_INLINE_FUNCTION + reducer_type( FunctorType const * + , ReducerType const * arg_reducer + , typename R::value_type * arg_value ) + : R( *arg_reducer , arg_value ) {} + }; + + // Reducer does wrap a functor + template< class R > + struct reducer_type< R , FunctorType > : public R { + + template< class S > + using rebind = reducer_type< typename R::rebind<S> , FunctorType > ; + + KOKKOS_INLINE_FUNCTION + reducer_type( FunctorType const * arg_functor + , ReducerType const * + , typename R::value_type * arg_value ) + : R( arg_functor , arg_value ) {} + }; + + //---------------------------------------- + + typedef TeamPolicyInternal< Kokkos::Cuda, Properties ... > Policy ; + typedef CudaTeamMember Member ; + typedef typename Policy::work_tag WorkTag ; + typedef typename reducer_type<>::pointer_type pointer_type ; + typedef typename reducer_type<>::reference_type reference_type ; + typedef typename reducer_type<>::value_type value_type ; + + typedef Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::REDUCE + , Policy + , FunctorType + > Analysis ; + +public: + + typedef FunctorType functor_type ; + typedef Cuda::size_type size_type ; + +private: + + const FunctorType m_functor ; + const reducer_type<> m_reducer ; + size_type * m_scratch_space ; + size_type * m_unified_space ; + size_type m_team_begin ; + size_type m_shmem_begin ; + size_type m_shmem_size ; + void* m_scratch_ptr[2] ; + int m_scratch_size[2] ; + const size_type m_league_size ; + const size_type m_team_size ; + const size_type m_vector_size ; + + template< class TagType > + __device__ inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_team( const Member & member , reference_type update ) const + { m_functor( member , update ); } + + template< class TagType > + __device__ inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_team( const Member & member , reference_type update ) const + { m_functor( TagType() , member , update ); } + + +public: + + __device__ inline + void operator() () const + { + void * const shmem = kokkos_impl_cuda_shared_memory<char>(); + + const bool reduce_to_host = + std::is_same< typename reducer_type<>::memory_space + , Kokkos::HostSpace >::value && + m_reducer.data(); + + value_type value ; + + typename reducer_type<>::rebind< CudaSpace > + reduce( & m_functor , & m_reducer , & value ); + + reduce.init( reduce.data() ); + + // Iterate this block through the league + + for ( int league_rank = blockIdx.x + ; league_rank < m_league_size + ; league_rank += gridDim.x ) { + + // Initialization of team member data: + + const Member member + ( shmem + , m_shmem_team_begin + , m_shmem_team_size + , reinterpret_cast<char*>(m_scratch_space) + m_global_team_begin + , m_global_team_size + , league_rank + , m_league_size ); + + ParallelReduce::template + exec_team< WorkTag >( member , reduce.reference() ); + } + + if ( Member::global_reduce( reduce + , m_scratch_space + , reinterpret_cast<char*>(m_scratch_space) + + aligned_flag_size + , shmem + , m_shmem_size ) ) { + + // Single thread with data in value + + reduce.final( reduce.data() ); + + if ( reduce_to_host ) { + reducer.copy( m_unified_space , reduce.data() ); + } + } + } + + + inline + void execute() + { + const bool reduce_to_host = + std::is_same< typename reducer_type<>::memory_space + , Kokkos::HostSpace >::value && + m_reducer.data(); + + const bool reduce_to_gpu = + std::is_same< typename reducer_type<>::memory_space + , Kokkos::CudaSpace >::value && + m_reducer.data(); + + if ( m_league_size && m_team_size ) { + + const int value_size = Analysis::value_size( m_functor ); + + m_scratch_space = cuda_internal_scratch_space( m_scratch_size ); + m_unified_space = cuda_internal_scratch_unified( value_size ); + + const dim3 block( m_vector_size , m_team_size , m_team_per_block ); + const dim3 grid( m_league_size , 1 , 1 ); + const int shmem = m_shmem_team_begin + m_shmem_team_size ; + + // copy to device and execute + CudaParallelLaunch<ParallelReduce>( *this, grid, block, shmem ); + + Cuda::fence(); + + if ( reduce_to_host ) { + m_reducer.copy( m_reducer.data() , pointer_type(m_unified_space) ); + } + } + else if ( reduce_to_host ) { + m_reducer.init( m_reducer.data() ); + } + else if ( reduce_to_gpu ) { + value_type tmp ; + m_reduce.init( & tmp ); + cudaMemcpy( m_reduce.data() , & tmp , cudaMemcpyHostToDevice ); + } + } + + + /**\brief Set up parameters and allocations for kernel launch. + * + * block = { vector_size , team_size , team_per_block } + * grid = { number_of_teams , 1 , 1 } + * + * shmem = shared memory for: + * [ team_reduce_buffer + * , team_scratch_buffer_level_0 ] + * reused by: + * [ global_reduce_buffer ] + * + * global_scratch for: + * [ global_reduce_flag_buffer + * , global_reduce_value_buffer + * , team_scratch_buffer_level_1 * max_concurrent_team ] + */ + + ParallelReduce( FunctorType && arg_functor + , Policy && arg_policy + , ReducerType const & arg_reducer + ) + : m_functor( arg_functor ) + // the input reducer may wrap the input functor so must + // generate a reducer bound to the copied functor. + , m_reducer( & m_functor , & arg_reducer , arg_reducer.data() ) + , m_scratch_space( 0 ) + , m_unified_space( 0 ) + , m_team_begin( 0 ) + , m_shmem_begin( 0 ) + , m_shmem_size( 0 ) + , m_scratch_ptr{NULL,NULL} + , m_league_size( arg_policy.league_size() ) + , m_team_per_block( 0 ) + , m_team_size( arg_policy.team_size() ) + , m_vector_size( arg_policy.vector_length() ) + { + if ( 0 == m_league_size ) return ; + + const int value_size = Analysis::value_size( m_functor ); + + //---------------------------------------- + // Vector length must be <= WarpSize and power of two + + const bool ok_vector = m_vector_size < CudaTraits::WarpSize && + Kokkos::Impl::is_integral_power_of_two( m_vector_size ); + + //---------------------------------------- + + if ( 0 == m_team_size ) { + // Team size is AUTO, use a whole block per team. + // Calculate block size using the occupance calculator. + // Occupancy calculator assumes whole block. + + m_team_size = + Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce > + ( arg_functor + , arg_policy.vector_length() + , arg_policy.team_scratch_size(0) + , arg_policy.thread_scratch_size(0) / arg_policy.vector_length() ); + + m_team_per_block = 1 ; + } + + //---------------------------------------- + // How many CUDA threads per team. + // If more than a warp or multiple teams cannot exactly fill a warp + // then only one team per block. + + const int team_threads = m_team_size * m_vector_size ; + + if ( ( CudaTraits::WarpSize < team_threads ) || + ( CudaTraits::WarpSize % team_threads ) ) { + m_team_per_block = 1 ; + } + + //---------------------------------------- + // How much team scratch shared memory determined from + // either the functor or the policy: + + if ( CudaTraits::WarpSize < team_threads ) { + // Need inter-warp team reduction (collectives) shared memory + // Speculate an upper bound for the value size + + m_shmem_team_begin = + align_scratch( CudaTraits::warp_count(team_threads) * sizeof(double) ); + } + + m_shmem_team_size = arg_policy.scratch_size(0,m_team_size); + + if ( 0 == m_shmem_team_size ) { + m_shmem_team_size = Analysis::team_shmem_size( m_functor , m_team_size ); + } + + m_shmem_team_size = align_scratch( m_shmem_team_size ); + + // Can fit a team in a block: + + const bool ok_shmem_team = + ( m_shmem_team_begin + m_shmem_team_size ) + < CudaTraits::SharedMemoryCapacity ; + + //---------------------------------------- + + if ( 0 == m_team_per_block ) { + // Potentially more than one team per block. + // Determine number of teams per block based upon + // how much team scratch can fit and exactly filling each warp. + + const int team_per_warp = team_threads / CudaTraits::WarpSize ; + + const int max_team_per_block = + Kokkos::Impl::CudaTraits::SharedMemoryCapacity + / shmem_team_scratch_size ; + + for ( m_team_per_block = team_per_warp ; + m_team_per_block + team_per_warp < max_team_per_block ; + m_team_per_block += team_per_warp ); + } + + //---------------------------------------- + // How much global reduce scratch shared memory. + + int shmem_global_reduce_size = 8 * value_size ; + + //---------------------------------------- + // Global scratch memory requirements. + + const int aligned_flag_size = align_scratch( sizeof(int) ); + + const int max_concurrent_block = + cuda_internal_maximum_concurrent_block_count(); + + // Reduce space has claim flag followed by vaue buffer + const int global_reduce_value_size = + max_concurrent_block * + ( aligned_flag_size + align_scratch( value_size ) ); + + // Scratch space has claim flag followed by scratch buffer + const int global_team_scratch_size = + max_concurrent_block * m_team_per_block * + ( aligned_flag_size + + align_scratch( arg_policy.scratch_size(1,m_team_size) / m_vector_size ) + ); + + const int global_size = aligned_flag_size + + global_reduce_value_size + + global_team_scratch_size ; + + m_global_reduce_begin = aligned_flag_size ; + m_global_team_begin = m_global_reduce_begin + global_reduce_value_size ; + m_global_size = m_global_team_begin + global_team_scratch_size ; + } +}; + +#endif + } // namespace Impl } // namespace Kokkos @@ -1467,390 +1678,6 @@ public: //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { -namespace Impl { - template<typename iType> - struct TeamThreadRangeBoundariesStruct<iType,CudaTeamMember> { - typedef iType index_type; - const iType start; - const iType end; - const iType increment; - const CudaTeamMember& thread; - -#ifdef __CUDA_ARCH__ - __device__ inline - TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count): - start( threadIdx.y ), - end( count ), - increment( blockDim.y ), - thread(thread_) - {} - __device__ inline - TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_): - start( begin_+threadIdx.y ), - end( end_ ), - increment( blockDim.y ), - thread(thread_) - {} -#else - KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count): - start( 0 ), - end( count ), - increment( 1 ), - thread(thread_) - {} - KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_): - start( begin_ ), - end( end_ ), - increment( 1 ), - thread(thread_) - {} -#endif - }; - - template<typename iType> - struct ThreadVectorRangeBoundariesStruct<iType,CudaTeamMember> { - typedef iType index_type; - const iType start; - const iType end; - const iType increment; - -#ifdef __CUDA_ARCH__ - __device__ inline - ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const iType& count): - start( threadIdx.x ), - end( count ), - increment( blockDim.x ) - {} - __device__ inline - ThreadVectorRangeBoundariesStruct (const iType& count): - start( threadIdx.x ), - end( count ), - increment( blockDim.x ) - {} -#else - KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const iType& count): - start( 0 ), - end( count ), - increment( 1 ) - {} - KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const iType& count): - start( 0 ), - end( count ), - increment( 1 ) - {} -#endif - }; - -} // namespace Impl - -template<typename iType> -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember > -TeamThreadRange( const Impl::CudaTeamMember & thread, const iType & count ) { - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, count ); -} - -template< typename iType1, typename iType2 > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::CudaTeamMember > -TeamThreadRange( const Impl::CudaTeamMember & thread, const iType1 & begin, const iType2 & end ) { - typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, iType(begin), iType(end) ); -} - -template<typename iType> -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember > -ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& count) { - return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >(thread,count); -} - -KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct<Impl::CudaTeamMember> PerTeam(const Impl::CudaTeamMember& thread) { - return Impl::ThreadSingleStruct<Impl::CudaTeamMember>(thread); -} - -KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct<Impl::CudaTeamMember> PerThread(const Impl::CudaTeamMember& thread) { - return Impl::VectorSingleStruct<Impl::CudaTeamMember>(thread); -} - -} // namespace Kokkos - -namespace Kokkos { - - /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support.*/ -template<typename iType, class Lambda> -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>& loop_boundaries, const Lambda& lambda) { - #ifdef __CUDA_ARCH__ - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); - #endif -} - -/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of - * val is performed and put into result. This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>& loop_boundaries, - const Lambda & lambda, ValueType& result) { - -#ifdef __CUDA_ARCH__ - result = ValueType(); - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,result); - } - - Impl::cuda_intra_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) - { dst+=src; }); - Impl::cuda_inter_warp_reduction(result,[&] (ValueType& dst, const ValueType& src) - { dst+=src; }); -#endif -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>& loop_boundaries, - const Lambda & lambda, const JoinType& join, ValueType& init_result) { - -#ifdef __CUDA_ARCH__ - ValueType result = init_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,result); - } - - Impl::cuda_intra_warp_reduction(result, join ); - Impl::cuda_inter_warp_reduction(result, join ); - - init_result = result; -#endif -} - -} //namespace Kokkos - -namespace Kokkos { -/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. - * This functionality requires C++11 support.*/ -template<typename iType, class Lambda> -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >& - loop_boundaries, const Lambda& lambda) { -#ifdef __CUDA_ARCH__ - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -#endif -} - -/** \brief Intra-thread vector parallel_reduce. - * - * Calls lambda(iType i, ValueType & val) for each i=[0..N). - * - * The range [0..N) is mapped to all vector lanes of - * the calling thread and a reduction of val is performed using += - * and output into result. - * - * The identity value for the += operator is assumed to be the default - * constructed value. - */ -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - ( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember > - const & loop_boundaries - , Lambda const & lambda - , ValueType & result ) -{ -#ifdef __CUDA_ARCH__ - result = ValueType(); - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,result); - } - - Impl::cuda_intra_warp_vector_reduce( - Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > >( & result ) ); - -#endif -} - -/** \brief Intra-thread vector parallel_reduce. - * - * Calls lambda(iType i, ValueType & val) for each i=[0..N). - * - * The range [0..N) is mapped to all vector lanes of - * the calling thread and a reduction of val is performed - * using JoinType::operator()(ValueType& val, const ValueType& update) - * and output into result. - * - * The input value of result must be the identity value for the - * reduction operation; e.g., ( 0 , += ) or ( 1 , *= ). - */ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - ( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember > - const & loop_boundaries - , Lambda const & lambda - , JoinType const & join - , ValueType & result ) -{ -#ifdef __CUDA_ARCH__ - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,result); - } - - Impl::cuda_intra_warp_vector_reduce( - Impl::Reducer< ValueType , JoinType >( join , & result ) ); - -#endif -} - -/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) - * for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. - * Depending on the target execution space the operator might be called twice: once with final=false - * and once with final=true. When final==true val contains the prefix sum value. The contribution of this - * "i" needs to be added to val no matter whether final==true or not. In a serial execution - * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set - * to the final sum value over all vector lanes. - * This functionality requires C++11 support.*/ -template< typename iType, class FunctorType > -KOKKOS_INLINE_FUNCTION -void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >& - loop_boundaries, const FunctorType & lambda) { - -#ifdef __CUDA_ARCH__ - typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; - typedef typename ValueTraits::value_type value_type ; - - value_type scan_val = value_type(); - const int VectorLength = blockDim.x; - - iType loop_bound = ((loop_boundaries.end+VectorLength-1)/VectorLength) * VectorLength; - for(int _i = threadIdx.x; _i < loop_bound; _i += VectorLength) { - value_type val = value_type(); - if(_i<loop_boundaries.end) - lambda(_i , val , false); - - value_type tmp = val; - value_type result_i; - - if(threadIdx.x%VectorLength == 0) - result_i = tmp; - if (VectorLength > 1) { - const value_type tmp2 = shfl_up(tmp, 1,VectorLength); - if(threadIdx.x > 0) - tmp+=tmp2; - } - if(threadIdx.x%VectorLength == 1) - result_i = tmp; - if (VectorLength > 3) { - const value_type tmp2 = shfl_up(tmp, 2,VectorLength); - if(threadIdx.x > 1) - tmp+=tmp2; - } - if ((threadIdx.x%VectorLength >= 2) && - (threadIdx.x%VectorLength < 4)) - result_i = tmp; - if (VectorLength > 7) { - const value_type tmp2 = shfl_up(tmp, 4,VectorLength); - if(threadIdx.x > 3) - tmp+=tmp2; - } - if ((threadIdx.x%VectorLength >= 4) && - (threadIdx.x%VectorLength < 8)) - result_i = tmp; - if (VectorLength > 15) { - const value_type tmp2 = shfl_up(tmp, 8,VectorLength); - if(threadIdx.x > 7) - tmp+=tmp2; - } - if ((threadIdx.x%VectorLength >= 8) && - (threadIdx.x%VectorLength < 16)) - result_i = tmp; - if (VectorLength > 31) { - const value_type tmp2 = shfl_up(tmp, 16,VectorLength); - if(threadIdx.x > 15) - tmp+=tmp2; - } - if (threadIdx.x%VectorLength >= 16) - result_i = tmp; - - val = scan_val + result_i - val; - scan_val += shfl(tmp,VectorLength-1,VectorLength); - if(_i<loop_boundaries.end) - lambda(_i , val , true); - } -#endif -} - -} - -namespace Kokkos { - -template<class FunctorType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) { -#ifdef __CUDA_ARCH__ - if(threadIdx.x == 0) lambda(); -#endif -} - -template<class FunctorType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) { -#ifdef __CUDA_ARCH__ - if(threadIdx.x == 0 && threadIdx.y == 0) lambda(); -#endif -} - -template<class FunctorType, class ValueType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda, ValueType& val) { -#ifdef __CUDA_ARCH__ - if(threadIdx.x == 0) lambda(val); - val = shfl(val,0,blockDim.x); -#endif -} - -template<class FunctorType, class ValueType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { -#ifdef __CUDA_ARCH__ - if(threadIdx.x == 0 && threadIdx.y == 0) { - lambda(val); - } - single_struct.team_member.team_broadcast(val,0); -#endif -} - -} - namespace Kokkos { namespace Impl { @@ -1886,62 +1713,6 @@ namespace Impl { }; - template< class FunctorType, class Enable = void> - struct ReduceFunctorHasInit { - enum {value = false}; - }; - - template< class FunctorType> - struct ReduceFunctorHasInit<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::init ) >::type > { - enum {value = true}; - }; - - template< class FunctorType, class Enable = void> - struct ReduceFunctorHasJoin { - enum {value = false}; - }; - - template< class FunctorType> - struct ReduceFunctorHasJoin<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::join ) >::type > { - enum {value = true}; - }; - - template< class FunctorType, class Enable = void> - struct ReduceFunctorHasFinal { - enum {value = false}; - }; - - template< class FunctorType> - struct ReduceFunctorHasFinal<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::final ) >::type > { - enum {value = true}; - }; - - template< class FunctorType, class Enable = void> - struct ReduceFunctorHasShmemSize { - enum {value = false}; - }; - - template< class FunctorType> - struct ReduceFunctorHasShmemSize<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type > { - enum {value = true}; - }; - - template< class FunctorType, bool Enable = - ( FunctorDeclaresValueType<FunctorType,void>::value) || - ( ReduceFunctorHasInit<FunctorType>::value ) || - ( ReduceFunctorHasJoin<FunctorType>::value ) || - ( ReduceFunctorHasFinal<FunctorType>::value ) || - ( ReduceFunctorHasShmemSize<FunctorType>::value ) - > - struct IsNonTrivialReduceFunctor { - enum {value = false}; - }; - - template< class FunctorType> - struct IsNonTrivialReduceFunctor<FunctorType, true> { - enum {value = true}; - }; - template<class FunctorType, class ResultType, class Tag, bool Enable = IsNonTrivialReduceFunctor<FunctorType>::value > struct FunctorReferenceType { typedef ResultType& reference_type; @@ -1965,6 +1736,7 @@ namespace Impl { } } // namespace Kokkos -#endif /* defined( __CUDACC__ ) */ +#endif /* defined( __CUDACC__ ) */ #endif /* #ifndef KOKKOS_CUDA_PARALLEL_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index 79b3867ba24a87e787faac051c21abf6a99795de..432c7895ccf88e6fbf14172c0491e83959c0d3a0 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,8 +45,6 @@ #define KOKKOS_CUDA_REDUCESCAN_HPP #include <Kokkos_Macros.hpp> - -/* only compile this file if CUDA is enabled for Kokkos */ #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA ) #include <utility> @@ -63,6 +61,7 @@ namespace Kokkos { namespace Impl { //---------------------------------------------------------------------------- +// Shuffle operations require input to be a register (stack) variable template< typename T > __device__ inline @@ -140,77 +139,6 @@ void cuda_shfl_up( T & out , T const & in , int delta , } } -//---------------------------------------------------------------------------- -/** \brief Reduce within a warp over blockDim.x, the "vector" dimension. - * - * This will be called within a nested, intra-team parallel operation. - * Use shuffle operations to avoid conflicts with shared memory usage. - * - * Requires: - * blockDim.x is power of 2 - * blockDim.x <= 32 (one warp) - * - * Cannot use "butterfly" pattern because floating point - * addition is non-associative. Therefore, must broadcast - * the final result. - */ -template< class Reducer > -__device__ inline -void cuda_intra_warp_vector_reduce( Reducer const & reducer ) -{ - static_assert( - std::is_reference< typename Reducer::reference_type >::value , "" ); - - if ( 1 < blockDim.x ) { - - typename Reducer::value_type tmp ; - - for ( int i = blockDim.x ; ( i >>= 1 ) ; ) { - - cuda_shfl_down( tmp , reducer.reference() , i , blockDim.x ); - - if ( threadIdx.x < i ) { reducer.join( reducer.data() , & tmp ); } - } - - // Broadcast from root "lane" to all other "lanes" - - cuda_shfl( reducer.reference() , reducer.reference() , 0 , blockDim.x ); - } -} - -/** \brief Inclusive scan over blockDim.x, the "vector" dimension. - * - * This will be called within a nested, intra-team parallel operation. - * Use shuffle operations to avoid conflicts with shared memory usage. - * - * Algorithm is concurrent bottom-up reductions in triangular pattern - * where each CUDA thread is the root of a reduction tree from the - * zeroth CUDA thread to itself. - * - * Requires: - * blockDim.x is power of 2 - * blockDim.x <= 32 (one warp) - */ -template< typename ValueType > -__device__ inline -void cuda_intra_warp_vector_inclusive_scan( ValueType & local ) -{ - ValueType tmp ; - - // Bottom up: - // [t] += [t-1] if t >= 1 - // [t] += [t-2] if t >= 2 - // [t] += [t-4] if t >= 4 - // ... - - for ( int i = 1 ; i < blockDim.x ; i <<= 1 ) { - - cuda_shfl_up( tmp , local , i , blockDim.x ); - - if ( i <= threadIdx.x ) { local += tmp ; } - } -} - //---------------------------------------------------------------------------- /* * Algorithmic constraints: @@ -247,12 +175,12 @@ inline void cuda_inter_warp_reduction( ValueType& value, #define STEP_WIDTH 4 // Depending on the ValueType _shared__ memory must be aligned up to 8byte boundaries - // The reason not to use ValueType directly is that for types with constructors it + // The reason not to use ValueType directly is that for types with constructors it // could lead to race conditions __shared__ double sh_result[(sizeof(ValueType)+7)/8*STEP_WIDTH]; ValueType* result = (ValueType*) & sh_result; - const unsigned step = 32 / blockDim.x; - unsigned shift = STEP_WIDTH; + const int step = 32 / blockDim.x; + int shift = STEP_WIDTH; const int id = threadIdx.y%step==0?threadIdx.y/step:65000; if(id < STEP_WIDTH ) { result[id] = value; @@ -297,7 +225,7 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT //Do the intra-block reduction with shfl operations and static shared memory cuda_intra_block_reduction(value,join,max_active_thread); - const unsigned id = threadIdx.y*blockDim.x + threadIdx.x; + const int id = threadIdx.y*blockDim.x + threadIdx.x; //One thread in the block writes block result to global scratch_memory if(id == 0 ) { @@ -329,35 +257,35 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT //Reduce all global values with splitting work over threads in one warp const int step_size = blockDim.x*blockDim.y < 32 ? blockDim.x*blockDim.y : 32; - for(int i=id; i<gridDim.x; i+=step_size) { + for(int i=id; i<(int)gridDim.x; i+=step_size) { value_type tmp = global[i]; join(value, tmp); } //Perform shfl reductions within the warp only join if contribution is valid (allows gridDim.x non power of two and <32) - if (blockDim.x*blockDim.y > 1) { + if (int(blockDim.x*blockDim.y) > 1) { value_type tmp = Kokkos::shfl_down(value, 1,32); - if( id + 1 < gridDim.x ) + if( id + 1 < int(gridDim.x) ) join(value, tmp); } - if (blockDim.x*blockDim.y > 2) { + if (int(blockDim.x*blockDim.y) > 2) { value_type tmp = Kokkos::shfl_down(value, 2,32); - if( id + 2 < gridDim.x ) + if( id + 2 < int(gridDim.x) ) join(value, tmp); } - if (blockDim.x*blockDim.y > 4) { + if (int(blockDim.x*blockDim.y) > 4) { value_type tmp = Kokkos::shfl_down(value, 4,32); - if( id + 4 < gridDim.x ) + if( id + 4 < int(gridDim.x) ) join(value, tmp); } - if (blockDim.x*blockDim.y > 8) { + if (int(blockDim.x*blockDim.y) > 8) { value_type tmp = Kokkos::shfl_down(value, 8,32); - if( id + 8 < gridDim.x ) + if( id + 8 < int(gridDim.x) ) join(value, tmp); } - if (blockDim.x*blockDim.y > 16) { + if (int(blockDim.x*blockDim.y) > 16) { value_type tmp = Kokkos::shfl_down(value, 16,32); - if( id + 16 < gridDim.x ) + if( id + 16 < int(gridDim.x) ) join(value, tmp); } } @@ -370,6 +298,166 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT #endif } +template< class ReducerType > +__device__ inline +typename std::enable_if< Kokkos::is_reducer<ReducerType>::value >::type +cuda_intra_warp_reduction( const ReducerType& reducer, + const int max_active_thread = blockDim.y) { + + typedef typename ReducerType::value_type ValueType; + + unsigned int shift = 1; + + ValueType result = reducer.reference(); + //Reduce over values from threads with different threadIdx.y + while(blockDim.x * shift < 32 ) { + const ValueType tmp = shfl_down(result, blockDim.x*shift,32u); + //Only join if upper thread is active (this allows non power of two for blockDim.y + if(threadIdx.y + shift < max_active_thread) + reducer.join(result , tmp); + shift*=2; + } + + result = shfl(result,0,32); + reducer.reference() = result; +} + +template< class ReducerType > +__device__ inline +typename std::enable_if< Kokkos::is_reducer<ReducerType>::value >::type +cuda_inter_warp_reduction( const ReducerType& reducer, + const int max_active_thread = blockDim.y) { + + typedef typename ReducerType::value_type ValueType; + + #define STEP_WIDTH 4 + // Depending on the ValueType _shared__ memory must be aligned up to 8byte boundaries + // The reason not to use ValueType directly is that for types with constructors it + // could lead to race conditions + __shared__ double sh_result[(sizeof(ValueType)+7)/8*STEP_WIDTH]; + ValueType* result = (ValueType*) & sh_result; + ValueType value = reducer.reference(); + const int step = 32 / blockDim.x; + int shift = STEP_WIDTH; + const int id = threadIdx.y%step==0?threadIdx.y/step:65000; + if(id < STEP_WIDTH ) { + result[id] = value; + } + __syncthreads(); + while (shift<=max_active_thread/step) { + if(shift<=id && shift+STEP_WIDTH>id && threadIdx.x==0) { + reducer.join(result[id%STEP_WIDTH],value); + } + __syncthreads(); + shift+=STEP_WIDTH; + } + + + value = result[0]; + for(int i = 1; (i*step<max_active_thread) && i<STEP_WIDTH; i++) + reducer.join(value,result[i]); + + reducer.reference() = value; +} + +template< class ReducerType > +__device__ inline +typename std::enable_if< Kokkos::is_reducer<ReducerType>::value >::type +cuda_intra_block_reduction( const ReducerType& reducer, + const int max_active_thread = blockDim.y) { + cuda_intra_warp_reduction(reducer,max_active_thread); + cuda_inter_warp_reduction(reducer,max_active_thread); +} + +template< class ReducerType> +__device__ inline +typename std::enable_if< Kokkos::is_reducer<ReducerType>::value , bool >::type +cuda_inter_block_reduction( const ReducerType& reducer, + Cuda::size_type * const m_scratch_space, + Cuda::size_type * const m_scratch_flags, + const int max_active_thread = blockDim.y) { +#ifdef __CUDA_ARCH__ + typedef typename ReducerType::value_type* pointer_type; + typedef typename ReducerType::value_type value_type; + + //Do the intra-block reduction with shfl operations and static shared memory + cuda_intra_block_reduction(reducer,max_active_thread); + + value_type value = reducer.reference(); + + const int id = threadIdx.y*blockDim.x + threadIdx.x; + + //One thread in the block writes block result to global scratch_memory + if(id == 0 ) { + pointer_type global = ((pointer_type) m_scratch_space) + blockIdx.x; + *global = value; + } + + //One warp of last block performs inter block reduction through loading the block values from global scratch_memory + bool last_block = false; + + __syncthreads(); + if ( id < 32 ) { + Cuda::size_type count; + + //Figure out whether this is the last block + if(id == 0) + count = Kokkos::atomic_fetch_add(m_scratch_flags,1); + count = Kokkos::shfl(count,0,32); + + //Last block does the inter block reduction + if( count == gridDim.x - 1) { + //set flag back to zero + if(id == 0) + *m_scratch_flags = 0; + last_block = true; + reducer.init(value); + + pointer_type const volatile global = (pointer_type) m_scratch_space ; + + //Reduce all global values with splitting work over threads in one warp + const int step_size = blockDim.x*blockDim.y < 32 ? blockDim.x*blockDim.y : 32; + for(int i=id; i<(int)gridDim.x; i+=step_size) { + value_type tmp = global[i]; + reducer.join(value, tmp); + } + + //Perform shfl reductions within the warp only join if contribution is valid (allows gridDim.x non power of two and <32) + if (int(blockDim.x*blockDim.y) > 1) { + value_type tmp = Kokkos::shfl_down(value, 1,32); + if( id + 1 < int(gridDim.x) ) + reducer.join(value, tmp); + } + if (int(blockDim.x*blockDim.y) > 2) { + value_type tmp = Kokkos::shfl_down(value, 2,32); + if( id + 2 < int(gridDim.x) ) + reducer.join(value, tmp); + } + if (int(blockDim.x*blockDim.y) > 4) { + value_type tmp = Kokkos::shfl_down(value, 4,32); + if( id + 4 < int(gridDim.x) ) + reducer.join(value, tmp); + } + if (int(blockDim.x*blockDim.y) > 8) { + value_type tmp = Kokkos::shfl_down(value, 8,32); + if( id + 8 < int(gridDim.x) ) + reducer.join(value, tmp); + } + if (int(blockDim.x*blockDim.y) > 16) { + value_type tmp = Kokkos::shfl_down(value, 16,32); + if( id + 16 < int(gridDim.x) ) + reducer.join(value, tmp); + } + } + } + + //The last block has in its thread=0 the global reduction value through "value" + return last_block; +#else + return true; +#endif +} + //---------------------------------------------------------------------------- // See section B.17 of Cuda C Programming Guide Version 3.2 // for discussion of @@ -529,11 +617,11 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType & functor , size_type * const shared = shared_data + word_count.value * BlockSizeMask ; size_type * const global = global_data + word_count.value * block_id ; -#if (__CUDA_ARCH__ < 500) - for ( size_type i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i] ; } -#else - for ( size_type i = 0 ; i < word_count.value ; i += 1 ) { global[i] = shared[i] ; } -#endif +//#if (__CUDA_ARCH__ < 500) + for ( int i = int(threadIdx.y) ; i < int(word_count.value) ; i += int(blockDim.y) ) { global[i] = shared[i] ; } +//#else +// for ( size_type i = 0 ; i < word_count.value ; i += 1 ) { global[i] = shared[i] ; } +//#endif } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp index cf3e55d50cf416cbb6a268c85602e7c7dd8fa4e2..3c6f0a5ddaeb78f9252cbd3da7226f14774bf970 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,15 +36,16 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include <Kokkos_Core.hpp> - +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) +#include <Kokkos_Core.hpp> + #include <impl/Kokkos_TaskQueue_impl.hpp> //---------------------------------------------------------------------------- @@ -120,7 +121,7 @@ printf("TaskQueue<Cuda>::driver(%d,%d) task(%lx)\n",threadIdx.z,blockIdx.x } if ( 0 == warp_lane ) { - queue->complete( task.ptr ); + queue->complete( task.ptr ); } } } while(1); @@ -157,7 +158,7 @@ printf("cuda_task_queue_execute before\n"); // If not large enough then set the stack size, in bytes: // // CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , stack_size ) ); - + cuda_task_queue_execute<<< grid , block , shared , stream >>>( queue ); CUDA_SAFE_CALL( cudaGetLastError() ); @@ -173,7 +174,7 @@ printf("cuda_task_queue_execute after\n"); }} /* namespace Kokkos::Impl */ //---------------------------------------------------------------------------- - +#else +void KOKKOS_CORE_SRC_CUDA_KOKKOS_CUDA_TASK_PREVENT_LINK_ERROR() {} #endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( KOKKOS_ENABLE_TASKDAG ) */ - diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp index a13e37837d8005867f1087b827a4d7e59ebd3209..5d08219ea5bf9767a49de4a602e7625b49eeb069 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp @@ -44,6 +44,7 @@ #ifndef KOKKOS_IMPL_CUDA_TASK_HPP #define KOKKOS_IMPL_CUDA_TASK_HPP +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_TASKDAG ) //---------------------------------------------------------------------------- @@ -325,7 +326,7 @@ ValueType shfl_warp_broadcast return Kokkos::shfl(val, src_lane, width); } -// all-reduce across corresponding vector lanes between team members within warp +/*// all-reduce across corresponding vector lanes between team members within warp // assume vec_length*team_size == warp_size // blockDim.x == vec_length == stride // blockDim.y == team_size @@ -351,7 +352,7 @@ void parallel_reduce loop_boundaries.thread.team_size(), blockDim.x); initialized_result = shfl_warp_broadcast<ValueType>( initialized_result, threadIdx.x, Impl::CudaTraits::WarpSize ); -} +}*/ // all-reduce across corresponding vector lanes between team members within warp // if no join() provided, use sum @@ -382,13 +383,36 @@ void parallel_reduce initialized_result = shfl_warp_broadcast<ValueType>( initialized_result, threadIdx.x, Impl::CudaTraits::WarpSize ); } +template< typename iType, class Lambda, typename ReducerType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, + const Lambda & lambda, + const ReducerType& reducer) { + + typedef typename ReducerType::value_type ValueType; + //TODO what is the point of creating this temporary? + ValueType result = ValueType(); + reducer.init(result); + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,result); + } + + strided_shfl_warp_reduction( + [&] (ValueType& val1, const ValueType& val2) { reducer.join(val1,val2); }, + result, + loop_boundaries.thread.team_size(), + blockDim.x); + reducer.reference() = shfl_warp_broadcast<ValueType>( result, threadIdx.x, Impl::CudaTraits::WarpSize ); +} // all-reduce within team members within warp // assume vec_length*team_size == warp_size // blockDim.x == vec_length == stride // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Lambda, typename ValueType, class JoinType > +/*template< typename iType, class Lambda, typename ValueType, class JoinType > KOKKOS_INLINE_FUNCTION void parallel_reduce (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, @@ -404,7 +428,7 @@ void parallel_reduce multi_shfl_warp_reduction<ValueType, JoinType>(join, initialized_result, blockDim.x); initialized_result = shfl_warp_broadcast<ValueType>( initialized_result, 0, blockDim.x ); -} +}*/ // all-reduce within team members within warp // if no join() provided, use sum @@ -436,6 +460,28 @@ void parallel_reduce initialized_result = shfl_warp_broadcast<ValueType>( initialized_result, 0, blockDim.x ); } +template< typename iType, class Lambda, typename ReducerType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, + const Lambda & lambda, + const ReducerType& reducer) { + + typedef typename ReducerType::value_type ValueType; + + ValueType result = ValueType(); + reducer.init(result); + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,result); + } + + multi_shfl_warp_reduction( + [&] (ValueType& val1, const ValueType& val2) { reducer.join(val1, val2); }, + result, + blockDim.x); + reducer.reference() = shfl_warp_broadcast<ValueType>( result, 0, blockDim.x ); +} // scan across corresponding vector lanes between team members within warp // assume vec_length*team_size == warp_size // blockDim.x == vec_length == stride diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp new file mode 100644 index 0000000000000000000000000000000000000000..084daa098b07662c475d4e28a7f3e3b763a227c4 --- /dev/null +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp @@ -0,0 +1,982 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_TEAM_HPP +#define KOKKOS_CUDA_TEAM_HPP + +#include <iostream> +#include <algorithm> +#include <stdio.h> + +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA ) + +#include <utility> +#include <Kokkos_Parallel.hpp> + +#include <Cuda/Kokkos_CudaExec.hpp> +#include <Cuda/Kokkos_Cuda_ReduceScan.hpp> +#include <Cuda/Kokkos_Cuda_Internal.hpp> +#include <Kokkos_Vectorization.hpp> + +#if defined(KOKKOS_ENABLE_PROFILING) +#include <impl/Kokkos_Profiling_Interface.hpp> +#include <typeinfo> +#endif + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename Type > +struct CudaJoinFunctor { + typedef Type value_type ; + + KOKKOS_INLINE_FUNCTION + static void join( volatile value_type & update , + volatile const value_type & input ) + { update += input ; } +}; + +/**\brief Team member_type passed to TeamPolicy or TeamTask closures. + * + * Cuda thread blocks for team closures are dimensioned as: + * blockDim.x == number of "vector lanes" per "thread" + * blockDim.y == number of "threads" per team + * blockDim.z == number of teams in a block + * where + * A set of teams exactly fill a warp OR a team is the whole block + * ( 0 == WarpSize % ( blockDim.x * blockDim.y ) ) + * OR + * ( 1 == blockDim.z ) + * + * Thus when 1 < blockDim.z the team is warp-synchronous + * and __syncthreads should not be called in team collectives. + * + * When multiple teams are mapped onto a single block then the + * total available shared memory must be partitioned among teams. + */ +class CudaTeamMember { +private: + + typedef Kokkos::Cuda execution_space ; + typedef execution_space::scratch_memory_space scratch_memory_space ; + + void * m_team_reduce ; + scratch_memory_space m_team_shared ; + int m_team_reduce_size ; + int m_league_rank ; + int m_league_size ; + +public: + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space & team_shmem() const + { return m_team_shared.set_team_thread_mode(0,1,0) ; } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space & + team_scratch(const int& level) const + { return m_team_shared.set_team_thread_mode(level,1,0) ; } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space & + thread_scratch(const int& level) const + { return m_team_shared.set_team_thread_mode(level,team_size(),team_rank()) ; } + + KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } + KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } + KOKKOS_INLINE_FUNCTION int team_rank() const + { + #ifdef __CUDA_ARCH__ + return threadIdx.y ; + #else + return 0; + #endif + } + + KOKKOS_INLINE_FUNCTION int team_size() const + { + #ifdef __CUDA_ARCH__ + return blockDim.y ; + #else + return 1; + #endif + } + + KOKKOS_INLINE_FUNCTION void team_barrier() const + { + #ifdef __CUDA_ARCH__ + if ( 1 == blockDim.z ) __syncthreads(); // team == block + else __threadfence_block(); // team <= warp + #endif + } + + //-------------------------------------------------------------------------- + + template<class ValueType> + KOKKOS_INLINE_FUNCTION + void team_broadcast( ValueType & val, const int& thread_id) const + { + #ifdef __CUDA_ARCH__ + if ( 1 == blockDim.z ) { // team == block + __syncthreads(); + // Wait for shared data write until all threads arrive here + if ( threadIdx.x == 0 && threadIdx.y == thread_id ) { + *((ValueType*) m_team_reduce) = val ; + } + __syncthreads(); // Wait for shared data read until root thread writes + val = *((ValueType*) m_team_reduce); + } + else { // team <= warp + ValueType tmp( val ); // input might not be a register variable + cuda_shfl( val, tmp, blockDim.x * thread_id, blockDim.x * blockDim.y ); + } + #endif + } + + //-------------------------------------------------------------------------- + /**\brief Reduction across a team + * + * Mapping of teams onto blocks: + * blockDim.x is "vector lanes" + * blockDim.y is team "threads" + * blockDim.z is number of teams per block + * + * Requires: + * blockDim.x is power two + * blockDim.x <= CudaTraits::WarpSize + * ( 0 == CudaTraits::WarpSize % ( blockDim.x * blockDim.y ) + * OR + * ( 1 == blockDim.z ) + */ + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< is_reducer< ReducerType >::value >::type + team_reduce( ReducerType const & reducer ) const noexcept + { + #ifdef __CUDA_ARCH__ + + typedef typename ReducerType::value_type value_type ; + + value_type tmp( reducer.reference() ); + + // reduce within the warp using shuffle + + const int wx = + ( threadIdx.x + blockDim.x * threadIdx.y ) & CudaTraits::WarpIndexMask ; + + for ( int i = CudaTraits::WarpSize ; blockDim.x <= ( i >>= 1 ) ; ) { + + cuda_shfl_down( reducer.reference() , tmp , i , CudaTraits::WarpSize ); + + // Root of each vector lane reduces: + if ( 0 == threadIdx.x && wx < i ) { + reducer.join( tmp , reducer.reference() ); + } + } + + if ( 1 < blockDim.z ) { // team <= warp + // broadcast result from root vector lange of root thread + + cuda_shfl( reducer.reference() , tmp + , blockDim.x * threadIdx.y , CudaTraits::WarpSize ); + + } + else { // team == block + // Reduce across warps using shared memory + // Broadcast result within block + + // Number of warps, blockDim.y may not be power of two: + const int nw = ( blockDim.x * blockDim.y + CudaTraits::WarpIndexMask ) >> CudaTraits::WarpIndexShift ; + + // Warp index: + const int wy = ( blockDim.x * threadIdx.y ) >> CudaTraits::WarpIndexShift ; + + // Number of shared memory entries for the reduction: + int nsh = m_team_reduce_size / sizeof(value_type); + + // Using at most one entry per warp: + if ( nw < nsh ) nsh = nw ; + + __syncthreads(); // Wait before shared data write + + if ( 0 == wx && wy < nsh ) { + ((value_type*) m_team_reduce)[wy] = tmp ; + } + + // When more warps than shared entries: + for ( int i = nsh ; i < nw ; i += nsh ) { + + __syncthreads(); + + if ( 0 == wx && i <= wy ) { + const int k = wy - i ; + if ( k < nsh ) { + reducer.join( *((value_type*) m_team_reduce + k) , tmp ); + } + } + } + + __syncthreads(); + + // One warp performs the inter-warp reduction: + + if ( 0 == wy ) { + + // Start at power of two covering nsh + + for ( int i = 1 << ( 32 - __clz(nsh-1) ) ; ( i >>= 1 ) ; ) { + const int k = wx + i ; + if ( wx < i && k < nsh ) { + reducer.join( ((value_type*)m_team_reduce)[wx] + , ((value_type*)m_team_reduce)[k] ); + __threadfence_block(); + } + } + } + + __syncthreads(); // Wait for reduction + + // Broadcast result to all threads + reducer.reference() = *((value_type*)m_team_reduce); + } + + #endif /* #ifdef __CUDA_ARCH__ */ + } + + //-------------------------------------------------------------------------- + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the + * league's parallel execution, be the scan's total. + * Parallel execution ordering of the league's teams is non-deterministic. + * As such the base value for each team's scan operation is similarly + * non-deterministic. + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION + Type team_scan( const Type & value , Type * const global_accum ) const + { + #ifdef __CUDA_ARCH__ + Type * const base_data = (Type *) m_team_reduce ; + + __syncthreads(); // Don't write in to shared data until all threads have entered this function + + if ( 0 == threadIdx.y ) { base_data[0] = 0 ; } + + base_data[ threadIdx.y + 1 ] = value ; + + Impl::cuda_intra_block_reduce_scan<true,Impl::CudaJoinFunctor<Type>,void>( Impl::CudaJoinFunctor<Type>() , base_data + 1 ); + + if ( global_accum ) { + if ( blockDim.y == threadIdx.y + 1 ) { + base_data[ blockDim.y ] = atomic_fetch_add( global_accum , base_data[ blockDim.y ] ); + } + __syncthreads(); // Wait for atomic + base_data[ threadIdx.y ] += base_data[ blockDim.y ] ; + } + + return base_data[ threadIdx.y ]; + #else + return Type(); + #endif + } + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value ; + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const { + return this->template team_scan<Type>( value , 0 ); + } + + //---------------------------------------- + + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION static + typename std::enable_if< is_reducer< ReducerType >::value >::type + vector_reduce( ReducerType const & reducer ) + { + + #ifdef __CUDA_ARCH__ + if(blockDim.x == 1) return; + + // Intra vector lane shuffle reduction: + typename ReducerType::value_type tmp ( reducer.reference() ); + + for ( int i = blockDim.x ; ( i >>= 1 ) ; ) { + cuda_shfl_down( reducer.reference() , tmp , i , blockDim.x ); + if ( threadIdx.x < i ) { reducer.join( tmp , reducer.reference() ); } + } + + // Broadcast from root lane to all other lanes. + // Cannot use "butterfly" algorithm to avoid the broadcast + // because floating point summation is not associative + // and thus different threads could have different results. + + cuda_shfl( reducer.reference() , tmp , 0 , blockDim.x ); + #endif + } + + //-------------------------------------------------------------------------- + /**\brief Global reduction across all blocks + * + * Return !0 if reducer contains the final value + */ + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION static + typename std::enable_if< is_reducer< ReducerType >::value , int >::type + global_reduce( ReducerType const & reducer + , int * const global_scratch_flags + , void * const global_scratch_space + , void * const shmem + , int const shmem_size + ) + { + #ifdef __CUDA_ARCH__ + + typedef typename ReducerType::value_type value_type ; + typedef value_type volatile * pointer_type ; + + // Number of shared memory entries for the reduction: + const int nsh = shmem_size / sizeof(value_type); + + // Number of CUDA threads in the block, rank within the block + const int nid = blockDim.x * blockDim.y * blockDim.z ; + const int tid = threadIdx.x + blockDim.x * ( + threadIdx.y + blockDim.y * threadIdx.z ); + + // Reduces within block using all available shared memory + // Contributes if it is the root "vector lane" + + // wn == number of warps in the block + // wx == which lane within the warp + // wy == which warp within the block + + const int wn = ( nid + CudaTraits::WarpIndexMask ) >> CudaTraits::WarpIndexShift ; + const int wx = tid & CudaTraits::WarpIndexMask ; + const int wy = tid >> CudaTraits::WarpIndexShift ; + + //------------------------ + { // Intra warp shuffle reduction from contributing CUDA threads + + value_type tmp( reducer.reference() ); + + for ( int i = CudaTraits::WarpSize ; blockDim.x <= ( i >>= 1 ) ; ) { + + cuda_shfl_down( reducer.reference(), tmp, i, CudaTraits::WarpSize ); + + // Root of each vector lane reduces "thread" contribution + if ( 0 == threadIdx.x && wx < i ) { + reducer.join( & tmp , reducer.data() ); + } + } + + // Reduce across warps using shared memory. + // Number of warps may not be power of two. + + __syncthreads(); // Wait before shared data write + + // Number of shared memory entries for the reduction + // is at most one per warp + const int nentry = wn < nsh ? wn : nsh ; + + if ( 0 == wx && wy < nentry ) { + // Root thread of warp 'wy' has warp's value to contribute + ((value_type*) shmem)[wy] = tmp ; + } + + __syncthreads(); // Wait for write to be visible to block + + // When more warps than shared entries + // then warps must take turns joining their contribution + // to the designated shared memory entry. + for ( int i = nentry ; i < wn ; i += nentry ) { + + const int k = wy - i ; + + if ( 0 == wx && i <= wy && k < nentry ) { + // Root thread of warp 'wy' has warp's value to contribute + reducer.join( ((value_type*) shmem) + k , & tmp ); + } + + __syncthreads(); // Wait for write to be visible to block + } + + // One warp performs the inter-warp reduction: + + if ( 0 == wy ) { + + // Start fan-in at power of two covering nentry + + for ( int i = ( 1 << ( 32 - __clz(nentry-1) ) ) ; ( i >>= 1 ) ; ) { + const int k = wx + i ; + if ( wx < i && k < nentry ) { + reducer.join( ((pointer_type)shmem) + wx + , ((pointer_type)shmem) + k ); + __threadfence_block(); // Wait for write to be visible to warp + } + } + } + } + //------------------------ + { // Write block's value to global_scratch_memory + + int last_block = 0 ; + + if ( 0 == wx ) { + reducer.copy( ((pointer_type)global_scratch_space) + + blockIdx.x * reducer.length() + , reducer.data() ); + + __threadfence(); // Wait until global write is visible. + + last_block = gridDim.x == + 1 + Kokkos::atomic_fetch_add(global_scratch_flags,1); + + // If last block then reset count + if ( last_block ) *global_scratch_flags = 0 ; + } + + last_block = __syncthreads_or( last_block ); + + if ( ! last_block ) return 0 ; + + } + //------------------------ + // Last block reads global_scratch_memory into shared memory. + + const int nentry = nid < gridDim.x ? + ( nid < nsh ? nid : nsh ) : + ( gridDim.x < nsh ? gridDim.x : nsh ) ; + + // nentry = min( nid , nsh , gridDim.x ) + + // whole block reads global memory into shared memory: + + if ( tid < nentry ) { + + const int offset = tid * reducer.length(); + + reducer.copy( ((pointer_type)shmem) + offset + , ((pointer_type)global_scratch_space) + offset ); + + for ( int i = nentry + tid ; i < gridDim.x ; i += nentry ) { + reducer.join( ((pointer_type)shmem) + offset + , ((pointer_type)global_scratch_space) + + i * reducer.length() ); + } + } + + __syncthreads(); // Wait for writes to be visible to block + + if ( 0 == wy ) { + + // Iterate to reduce shared memory to single warp fan-in size + + const int nreduce = CudaTraits::WarpSize < nentry + ? CudaTraits::WarpSize : nentry ; + + // nreduce = min( CudaTraits::WarpSize , nsh , gridDim.x ) + + if ( wx < nreduce && nreduce < nentry ) { + for ( int i = nreduce + wx ; i < nentry ; i += nreduce ) { + reducer.join( ((pointer_type)shmem) + wx + , ((pointer_type)shmem) + i ); + } + __threadfence_block(); // Wait for writes to be visible to warp + } + + // Start fan-in at power of two covering nentry + + for ( int i = ( 1 << ( 32 - __clz(nreduce-1) ) ) ; ( i >>= 1 ) ; ) { + const int k = wx + i ; + if ( wx < i && k < nreduce ) { + reducer.join( ((pointer_type)shmem) + wx + , ((pointer_type)shmem) + k ); + __threadfence_block(); // Wait for writes to be visible to warp + } + } + + if ( 0 == wx ) { + reducer.copy( reducer.data() , (pointer_type)shmem ); + return 1 ; + } + } + return 0 ; + + #else + return 0 ; + #endif + } + + //---------------------------------------- + // Private for the driver + + KOKKOS_INLINE_FUNCTION + CudaTeamMember( void * shared + , const int shared_begin + , const int shared_size + , void* scratch_level_1_ptr + , const int scratch_level_1_size + , const int arg_league_rank + , const int arg_league_size ) + : m_team_reduce( shared ) + , m_team_shared( ((char *)shared) + shared_begin , shared_size, scratch_level_1_ptr, scratch_level_1_size) + , m_team_reduce_size( shared_begin ) + , m_league_rank( arg_league_rank ) + , m_league_size( arg_league_size ) + {} + +}; + +} // namspace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<typename iType> +struct TeamThreadRangeBoundariesStruct<iType,CudaTeamMember> { + typedef iType index_type; + const CudaTeamMember& member; + const iType start; + const iType end; + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& count) + : member(thread_) + , start( 0 ) + , end( count ) {} + + KOKKOS_INLINE_FUNCTION + TeamThreadRangeBoundariesStruct (const CudaTeamMember& thread_, const iType& begin_, const iType& end_) + : member(thread_) + , start( begin_ ) + , end( end_ ) {} +}; + + + +template<typename iType> +struct ThreadVectorRangeBoundariesStruct<iType,CudaTeamMember> { + typedef iType index_type; + const iType start; + const iType end; + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct (const CudaTeamMember, const iType& count) + : start( 0 ), end( count ) {} + + KOKKOS_INLINE_FUNCTION + ThreadVectorRangeBoundariesStruct (const iType& count) + : start( 0 ), end( count ) {} +}; + +} // namespace Impl + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember > +TeamThreadRange( const Impl::CudaTeamMember & thread, const iType & count ) { + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, count ); +} + +template< typename iType1, typename iType2 > +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, + Impl::CudaTeamMember > +TeamThreadRange( const Impl::CudaTeamMember & thread, const iType1 & begin, const iType2 & end ) { + typedef typename std::common_type< iType1, iType2 >::type iType; + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::CudaTeamMember >( thread, iType(begin), iType(end) ); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember > +ThreadVectorRange(const Impl::CudaTeamMember& thread, const iType& count) { + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >(thread,count); +} + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct<Impl::CudaTeamMember> PerTeam(const Impl::CudaTeamMember& thread) { + return Impl::ThreadSingleStruct<Impl::CudaTeamMember>(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct<Impl::CudaTeamMember> PerThread(const Impl::CudaTeamMember& thread) { + return Impl::VectorSingleStruct<Impl::CudaTeamMember>(thread); +} + +//---------------------------------------------------------------------------- + +/** \brief Inter-thread parallel_for. + * + * Executes closure(iType i) for each i=[0..N). + * + * The range [0..N) is mapped to all threads of the the calling thread team. + */ +template<typename iType, class Closure > +KOKKOS_INLINE_FUNCTION +void parallel_for + ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember>& + loop_boundaries + , const Closure & closure + ) +{ + #ifdef __CUDA_ARCH__ + for( iType i = loop_boundaries.start + threadIdx.y + ; i < loop_boundaries.end + ; i += blockDim.y ) + closure(i); + #endif +} + +//---------------------------------------------------------------------------- + +/** \brief Inter-thread parallel_reduce with a reducer. + * + * Executes closure(iType i, ValueType & val) for each i=[0..N) + * + * The range [0..N) is mapped to all threads of the + * calling thread team and a summation of val is + * performed and put into result. + */ +template< typename iType, class Closure, class ReducerType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type +parallel_reduce + ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember> & + loop_boundaries + , const Closure & closure + , const ReducerType & reducer + ) +{ +#ifdef __CUDA_ARCH__ + + reducer.init( reducer.reference() ); + + for( iType i = loop_boundaries.start + threadIdx.y + ; i < loop_boundaries.end + ; i += blockDim.y ) { + closure(i,reducer.reference()); + } + + loop_boundaries.member.team_reduce( reducer ); + +#endif +} + + +/** \brief Inter-thread parallel_reduce assuming summation. + * + * Executes closure(iType i, ValueType & val) for each i=[0..N) + * + * The range [0..N) is mapped to all threads of the + * calling thread team and a summation of val is + * performed and put into result. + */ +template< typename iType, class Closure, typename ValueType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< ! Kokkos::is_reducer< ValueType >::value >::type +parallel_reduce + ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::CudaTeamMember> & + loop_boundaries + , const Closure & closure + , ValueType & result + ) +{ +#ifdef __CUDA_ARCH__ + + Kokkos::Experimental::Sum<ValueType> reducer(result); + + reducer.init( reducer.reference() ); + + for( iType i = loop_boundaries.start + threadIdx.y + ; i < loop_boundaries.end + ; i += blockDim.y ) { + closure(i,result); + } + + loop_boundaries.member.team_reduce( reducer ); + +#endif +} + +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel_for. + * + * Executes closure(iType i) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes of the the calling thread. + */ +template<typename iType, class Closure > +KOKKOS_INLINE_FUNCTION +void parallel_for + ( const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember>& + loop_boundaries + , const Closure & closure + ) +{ +#ifdef __CUDA_ARCH__ + for ( iType i = loop_boundaries.start + threadIdx.x + ; i < loop_boundaries.end + ; i += blockDim.x ) { + closure(i); + } +#endif +} + +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel_reduce. + * + * Calls closure(iType i, ValueType & val) for each i=[0..N). + * + * The range [0..N) is mapped to all vector lanes of + * the calling thread and a reduction of val is performed using += + * and output into result. + * + * The identity value for the += operator is assumed to be the default + * constructed value. + */ +template< typename iType, class Closure, class ReducerType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< is_reducer< ReducerType >::value >::type +parallel_reduce + ( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember> + const & loop_boundaries + , Closure const & closure + , ReducerType const & reducer ) +{ +#ifdef __CUDA_ARCH__ + + reducer.init( reducer.reference() ); + + for ( iType i = loop_boundaries.start + threadIdx.x + ; i < loop_boundaries.end + ; i += blockDim.x ) { + closure(i,reducer.reference()); + } + + Impl::CudaTeamMember::vector_reduce( reducer ); + +#endif +} + +/** \brief Intra-thread vector parallel_reduce. + * + * Calls closure(iType i, ValueType & val) for each i=[0..N). + * + * The range [0..N) is mapped to all vector lanes of + * the calling thread and a reduction of val is performed using += + * and output into result. + * + * The identity value for the += operator is assumed to be the default + * constructed value. + */ +template< typename iType, class Closure, typename ValueType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< ! is_reducer< ValueType >::value >::type +parallel_reduce + ( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember> + const & loop_boundaries + , Closure const & closure + , ValueType & result ) +{ +#ifdef __CUDA_ARCH__ + result = ValueType(); + + for ( iType i = loop_boundaries.start + threadIdx.x + ; i < loop_boundaries.end + ; i += blockDim.x ) { + closure(i,result); + } + + Impl::CudaTeamMember::vector_reduce( + Kokkos::Experimental::Sum<ValueType>(result ) ); + +#endif +} + +//---------------------------------------------------------------------------- + +/** \brief Intra-thread vector parallel exclusive prefix sum. + * + * Executes closure(iType i, ValueType & val, bool final) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes in the + * thread and a scan operation is performed. + * The last call to closure has final == true. + */ +template< typename iType, class Closure > +KOKKOS_INLINE_FUNCTION +void parallel_scan + ( const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >& + loop_boundaries + , const Closure & closure + ) +{ + +#ifdef __CUDA_ARCH__ + + // Extract value_type from closure + + using value_type = + typename Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::SCAN + , void + , Closure >::value_type ; + + // Loop through boundaries by vector-length chunks + // must scan at each iteration + + value_type accum = 0 ; + + // All thread "lanes" must loop the same number of times. + // Determine an loop end for all thread "lanes." + // Requires: + // blockDim.x is power of two and thus + // ( end % blockDim.x ) == ( end & ( blockDim.x - 1 ) ) + // 1 <= blockDim.x <= CudaTraits::WarpSize + + const int mask = blockDim.x - 1 ; + const int rem = loop_boundaries.end & mask ; // == end % blockDim.x + const int end = loop_boundaries.end + ( rem ? blockDim.x - rem : 0 ); + + for ( int i = threadIdx.x ; i < end ; i += blockDim.x ) { + + value_type val = 0 ; + + // First acquire per-lane contributions: + if ( i < loop_boundaries.end ) closure( i , val , false ); + + value_type sval = val ; + + // Bottom up inclusive scan in triangular pattern + // where each CUDA thread is the root of a reduction tree + // from the zeroth "lane" to itself. + // [t] += [t-1] if t >= 1 + // [t] += [t-2] if t >= 2 + // [t] += [t-4] if t >= 4 + // ... + + for ( int j = 1 ; j < blockDim.x ; j <<= 1 ) { + value_type tmp = 0 ; + Impl::cuda_shfl_up( tmp , sval , j , blockDim.x ); + if ( j <= threadIdx.x ) { sval += tmp ; } + } + + // Include accumulation and remove value for exclusive scan: + val = accum + sval - val ; + + // Provide exclusive scan value: + if ( i < loop_boundaries.end ) closure( i , val , true ); + + // Accumulate the last value in the inclusive scan: + Impl::cuda_shfl( sval , sval , mask , blockDim.x ); + + accum += sval ; + } + +#endif +} + +} + +namespace Kokkos { + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) { +#ifdef __CUDA_ARCH__ + if(threadIdx.x == 0) lambda(); +#endif +} + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda) { +#ifdef __CUDA_ARCH__ + if(threadIdx.x == 0 && threadIdx.y == 0) lambda(); +#endif +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::CudaTeamMember>& , const FunctorType& lambda, ValueType& val) { +#ifdef __CUDA_ARCH__ + if(threadIdx.x == 0) lambda(val); + val = shfl(val,0,blockDim.x); +#endif +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::CudaTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { +#ifdef __CUDA_ARCH__ + if(threadIdx.x == 0 && threadIdx.y == 0) { + lambda(val); + } + single_struct.team_member.team_broadcast(val,0); +#endif +} + +} // namespace Kokkos + +#endif /* defined( __CUDACC__ ) */ + +#endif /* #ifndef KOKKOS_CUDA_TEAM_HPP */ + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp index 33adc5b7dde681e3d914717cf455794da06d0da7..99d8fcc99907d89afc413440356652fdfdf5d599 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,8 +44,6 @@ #define KOKKOS_CUDA_VECTORIZATION_HPP #include <Kokkos_Macros.hpp> - -/* only compile this file if CUDA is enabled for Kokkos */ #ifdef KOKKOS_ENABLE_CUDA #include <Kokkos_Cuda.hpp> @@ -296,3 +294,4 @@ namespace Impl { #endif // KOKKOS_ENABLE_CUDA #endif + diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp index cbbe15374e7b6000995eb132446464cf62cc1294..f5e2d87fb67982b0cfb7fcd9b7e6be254b9f790d 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP #define KOKKOS_EXPERIMENTAL_CUDA_VIEW_HPP -/* only compile this file if CUDA is enabled for Kokkos */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_CUDA ) //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp index c2489137346d3ee3f5821209a8aa7ecbd8f60aa7..df4e3d37f62e6fb36b20334087d1c8dfbf673a7f 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -46,7 +46,7 @@ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#include "Kokkos_Macros.hpp" +#include <Kokkos_Macros.hpp> #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA ) #include <cuda.h> @@ -82,6 +82,8 @@ void cuda_abort( const char * const message ) } // namespace Impl } // namespace Kokkos +#else +void KOKKOS_CORE_SRC_CUDA_ABORT_PREVENT_LINK_ERROR() {} #endif /* #if defined(__CUDACC__) && defined( KOKKOS_ENABLE_CUDA ) */ #endif /* #ifndef KOKKOS_CUDA_ABORT_HPP */ diff --git a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp index a450ca36ae1bb0049c2abd142e20733edcaf2f7c..4f68d9c2c091a9355fae1a29fd7fc4567fc2eb2b 100644 --- a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp +++ b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp @@ -236,13 +236,13 @@ struct MDRangePolicy MDRangePolicy( lower_tmp, upper_tmp, tile_tmp ); #else - if(m_lower.size()!=rank || m_upper.size() != rank) + if(static_cast<int>(m_lower.size()) != rank || static_cast<int>(m_upper.size()) != rank) Kokkos::abort("MDRangePolicy: Constructor initializer lists have wrong size"); for ( auto i = 0; i < rank; ++i ) { m_lower[i] = static_cast<array_index_type>(lower.begin()[i]); m_upper[i] = static_cast<array_index_type>(upper.begin()[i]); - if(tile.size()==rank) + if(static_cast<int>(tile.size())==rank) m_tile[i] = static_cast<array_index_type>(tile.begin()[i]); else m_tile[i] = 0; diff --git a/lib/kokkos/core/src/Kokkos_Complex.hpp b/lib/kokkos/core/src/Kokkos_Complex.hpp index cdfa4429f08f241d86bd32c3020f1b20c9a5a90b..1fe964a6d2cf5ae5fa69478c82e83429a762ddf4 100644 --- a/lib/kokkos/core/src/Kokkos_Complex.hpp +++ b/lib/kokkos/core/src/Kokkos_Complex.hpp @@ -44,6 +44,7 @@ #define KOKKOS_COMPLEX_HPP #include <Kokkos_Atomic.hpp> +#include <Kokkos_NumericTraits.hpp> #include <complex> #include <iostream> @@ -324,9 +325,30 @@ public: im_ /= src; return *this; } + + KOKKOS_INLINE_FUNCTION + bool operator == (const complex<RealType>& src) { + return (re_ == src.re_) && (im_ == src.im_); + } + + KOKKOS_INLINE_FUNCTION + bool operator == (const RealType src) { + return (re_ == src) && (im_ == RealType(0)); + } + + KOKKOS_INLINE_FUNCTION + bool operator != (const complex<RealType>& src) { + return (re_ != src.re_) || (im_ != src.im_); + } + + KOKKOS_INLINE_FUNCTION + bool operator != (const RealType src) { + return (re_ != src) || (im_ != RealType(0)); + } + }; -//! Binary + operator for complex. +//! Binary + operator for complex complex. template<class RealType> KOKKOS_INLINE_FUNCTION complex<RealType> @@ -334,6 +356,22 @@ operator + (const complex<RealType>& x, const complex<RealType>& y) { return complex<RealType> (x.real () + y.real (), x.imag () + y.imag ()); } +//! Binary + operator for complex scalar. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator + (const complex<RealType>& x, const RealType& y) { + return complex<RealType> (x.real () + y , x.imag ()); +} + +//! Binary + operator for scalar complex. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator + (const RealType& x, const complex<RealType>& y) { + return complex<RealType> (x + y.real (), y.imag ()); +} + //! Unary + operator for complex. template<class RealType> KOKKOS_INLINE_FUNCTION @@ -350,6 +388,22 @@ operator - (const complex<RealType>& x, const complex<RealType>& y) { return complex<RealType> (x.real () - y.real (), x.imag () - y.imag ()); } +//! Binary - operator for complex scalar. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator - (const complex<RealType>& x, const RealType& y) { + return complex<RealType> (x.real () - y , x.imag ()); +} + +//! Binary - operator for scalar complex. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator - (const RealType& x, const complex<RealType>& y) { + return complex<RealType> (x - y.real (), - y.imag ()); +} + //! Unary - operator for complex. template<class RealType> KOKKOS_INLINE_FUNCTION @@ -395,6 +449,16 @@ operator * (const RealType& x, const complex<RealType>& y) { return complex<RealType> (x * y.real (), x * y.imag ()); } +/// \brief Binary * operator for RealType times complex. +/// +/// This function exists because the compiler doesn't know that +/// RealType and complex<RealType> commute with respect to operator*. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> +operator * (const complex<RealType>& y, const RealType& x) { + return complex<RealType> (x * y.real (), x * y.imag ()); +} //! Imaginary part of a complex number. template<class RealType> @@ -415,7 +479,25 @@ template<class RealType> KOKKOS_INLINE_FUNCTION RealType abs (const complex<RealType>& x) { // FIXME (mfh 31 Oct 2014) Scale to avoid unwarranted overflow. - return ::sqrt (real (x) * real (x) + imag (x) * imag (x)); + return std::sqrt (real (x) * real (x) + imag (x) * imag (x)); +} + +//! Power of a complex number +template<class RealType> +KOKKOS_INLINE_FUNCTION +Kokkos::complex<RealType> pow (const complex<RealType>& x, const RealType& e) { + RealType r = abs(x); + RealType phi = std::atan(x.imag()/x.real()); + return std::pow(r,e) * Kokkos::complex<RealType>(std::cos(phi*e),std::sin(phi*e)); +} + +//! Square root of a complex number. +template<class RealType> +KOKKOS_INLINE_FUNCTION +Kokkos::complex<RealType> sqrt (const complex<RealType>& x) { + RealType r = abs(x); + RealType phi = std::atan(x.imag()/x.real()); + return std::sqrt(r) * Kokkos::complex<RealType>(std::cos(phi*0.5),std::sin(phi*0.5)); } //! Conjugate of a complex number. @@ -425,6 +507,19 @@ complex<RealType> conj (const complex<RealType>& x) { return complex<RealType> (real (x), -imag (x)); } +//! Exponential of a complex number. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> exp (const complex<RealType>& x) { + return std::exp(x.real()) * complex<RealType> (std::cos (x.imag()), std::sin(x.imag())); +} + +//! Exponential of a complex number. +template<class RealType> +KOKKOS_INLINE_FUNCTION +complex<RealType> pow (const complex<RealType>& x) { + return std::exp(x.real()) * complex<RealType> (std::cos (x.imag()), std::sin(x.imag())); +} //! Binary operator / for complex and real numbers template<class RealType1, class RealType2> @@ -461,6 +556,14 @@ operator / (const complex<RealType>& x, const complex<RealType>& y) { } } +//! Binary operator / for complex and real numbers +template<class RealType1, class RealType2> +KOKKOS_INLINE_FUNCTION +complex<RealType1> +operator / (const RealType1& x, const complex<RealType2>& y) { + return complex<RealType1> (x)/y; +} + //! Equality operator for two complex numbers. template<class RealType> KOKKOS_INLINE_FUNCTION @@ -468,9 +571,13 @@ bool operator == (const complex<RealType>& x, const complex<RealType>& y) { return real (x) == real (y) && imag (x) == imag (y); } -//! Equality operator for std::complex and Kokkos::complex. +/// \brief Equality operator for std::complex and Kokkos::complex. +/// +/// This cannot be a device function, since std::real is not. +/// Otherwise, CUDA builds will give compiler warnings ("warning: +/// calling a constexpr __host__ function("real") from a __host__ +/// __device__ function("operator==") is not allowed"). template<class RealType> -KOKKOS_INLINE_FUNCTION bool operator == (const std::complex<RealType>& x, const complex<RealType>& y) { return std::real (x) == real (y) && std::imag (x) == imag (y); } @@ -533,6 +640,15 @@ std::ostream& operator >> (std::ostream& os, complex<RealType>& x) { } +template<class T> +struct reduction_identity<Kokkos::complex<T> > { + typedef reduction_identity<T> t_red_ident; + KOKKOS_FORCEINLINE_FUNCTION constexpr static Kokkos::complex<T> sum() + {return Kokkos::complex<T>(t_red_ident::sum(),t_red_ident::sum());} + KOKKOS_FORCEINLINE_FUNCTION constexpr static Kokkos::complex<T> prod() + {return Kokkos::complex<T>(t_red_ident::prod(),t_red_ident::sum());} +}; + } // namespace Kokkos #endif // KOKKOS_COMPLEX_HPP diff --git a/lib/kokkos/core/src/Kokkos_Concepts.hpp b/lib/kokkos/core/src/Kokkos_Concepts.hpp index cfcdabf95e3e085cf388f14e99fb6b4db3d8c654..9a2b53e1570664fd1a98b20c187b78790fb5c656 100644 --- a/lib/kokkos/core/src/Kokkos_Concepts.hpp +++ b/lib/kokkos/core/src/Kokkos_Concepts.hpp @@ -200,7 +200,14 @@ public: , Kokkos::DefaultHostExecutionSpace , execution_space >::type host_execution_space ; #else - typedef execution_space host_execution_space ; + #if defined( KOKKOS_ENABLE_OPENMPTARGET ) + typedef typename std::conditional + < std::is_same< execution_space , Kokkos::Experimental::OpenMPTarget >::value + , Kokkos::DefaultHostExecutionSpace , execution_space + >::type host_execution_space ; + #else + typedef execution_space host_execution_space ; + #endif #endif typedef typename std::conditional diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp index 16c1bce902d47f38a1cd455df8f8900d3e73c0a5..19de791c0ff118a434235cc1c44923ad103c3f70 100644 --- a/lib/kokkos/core/src/Kokkos_Core.hpp +++ b/lib/kokkos/core/src/Kokkos_Core.hpp @@ -57,11 +57,16 @@ #include <Kokkos_OpenMP.hpp> #endif +//#if defined( KOKKOS_ENABLE_OPENMPTARGET ) +#include <Kokkos_OpenMPTarget.hpp> +#include <Kokkos_OpenMPTargetSpace.hpp> +//#endif + #if defined( KOKKOS_ENABLE_QTHREADS ) #include <Kokkos_Qthreads.hpp> #endif -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) #include <Kokkos_Threads.hpp> #endif @@ -69,8 +74,8 @@ #include <Kokkos_Cuda.hpp> #endif -#include <Kokkos_MemoryPool.hpp> #include <Kokkos_Pair.hpp> +#include <Kokkos_MemoryPool.hpp> #include <Kokkos_Array.hpp> #include <Kokkos_View.hpp> #include <Kokkos_Vectorization.hpp> @@ -167,3 +172,4 @@ void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size ) //---------------------------------------------------------------------------- #endif + diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp index 4029bf599c6b564a8bc6bb2b6d20f9472fe19be5..09081d238710ac0673e544433e13cc58902e06b4 100644 --- a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp +++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp @@ -96,7 +96,7 @@ class Serial; ///< Execution space main process on CPU. class Qthreads; ///< Execution space with Qthreads back-end. #endif -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) class Threads; ///< Execution space with pthreads back-end. #endif @@ -104,6 +104,14 @@ class Threads; ///< Execution space with pthreads back-end. class OpenMP; ///< OpenMP execution space. #endif +#if defined( KOKKOS_ENABLE_OPENMPTARGET ) +namespace Experimental { +class OpenMPTarget; ///< OpenMPTarget execution space. +class OpenMPTargetSpace; +} +#endif + + #if defined( KOKKOS_ENABLE_CUDA ) class CudaSpace; ///< Memory space on Cuda GPU class CudaUVMSpace; ///< Memory space on Cuda GPU with UVM @@ -121,12 +129,14 @@ struct Device; /// Define Kokkos::DefaultExecutionSpace as per configuration option /// or chosen from the enabled execution spaces in the following order: -/// Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Serial +/// Kokkos::Cuda, Kokkos::Experimental::OpenMPTarget, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Serial namespace Kokkos { #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) typedef Cuda DefaultExecutionSpace; +#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET ) + typedef Experimental::OpenMPTarget DefaultExecutionSpace ; #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) typedef OpenMP DefaultExecutionSpace; #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) @@ -136,7 +146,7 @@ namespace Kokkos { #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) typedef Serial DefaultExecutionSpace; #else -# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial." +# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::Experimental::OpenMPTarget, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial." #endif #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) @@ -149,7 +159,7 @@ namespace Kokkos { typedef Serial DefaultHostExecutionSpace; #elif defined( KOKKOS_ENABLE_OPENMP ) typedef OpenMP DefaultHostExecutionSpace; -#elif defined( KOKKOS_ENABLE_PTHREAD ) +#elif defined( KOKKOS_ENABLE_THREADS ) typedef Threads DefaultHostExecutionSpace; //#elif defined( KOKKOS_ENABLE_QTHREADS ) // typedef Qthreads DefaultHostExecutionSpace; @@ -254,6 +264,21 @@ template< class FunctorType, class ExecPolicy, class ExecutionSapce = } // namespace Impl +namespace Experimental { +template<class ScalarType , class Space = HostSpace> struct Sum; +template<class ScalarType , class Space = HostSpace> struct Prod; +template<class ScalarType , class Space = HostSpace> struct Min; +template<class ScalarType , class Space = HostSpace> struct Max; +template<class ScalarType , class Space = HostSpace> struct MinMax; +template<class ScalarType , class Index, class Space = HostSpace> struct MinLoc; +template<class ScalarType , class Index, class Space = HostSpace> struct MaxLoc; +template<class ScalarType , class Index, class Space = HostSpace> struct MinMaxLoc; +template<class ScalarType , class Space = HostSpace> struct BAnd; +template<class ScalarType , class Space = HostSpace> struct BOr; +template<class ScalarType , class Space = HostSpace> struct LAnd; +template<class ScalarType , class Space = HostSpace> struct LOr; +} } // namespace Kokkos #endif /* #ifndef KOKKOS_CORE_FWD_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Kokkos_Cuda.hpp index 433cac5e518cfbb40a413e1b5984994d54bfacbd..f0f0f874580b9bbe028d5fdb13183ea5211aea78 100644 --- a/lib/kokkos/core/src/Kokkos_Cuda.hpp +++ b/lib/kokkos/core/src/Kokkos_Cuda.hpp @@ -44,12 +44,11 @@ #ifndef KOKKOS_CUDA_HPP #define KOKKOS_CUDA_HPP -#include <Kokkos_Core_fwd.hpp> - -// If CUDA execution space is enabled then use this header file. - +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_CUDA ) +#include <Kokkos_Core_fwd.hpp> + #include <iosfwd> #include <vector> @@ -214,6 +213,8 @@ public: //@} //-------------------------------------------------------------------------- + static const char* name(); + private: cudaStream_t m_stream ; @@ -291,6 +292,7 @@ struct VerifyExecutionCanAccessMemorySpace #include <Cuda/Kokkos_CudaExec.hpp> #include <Cuda/Kokkos_Cuda_View.hpp> +#include <Cuda/Kokkos_Cuda_Team.hpp> #include <Cuda/Kokkos_Cuda_Parallel.hpp> #include <Cuda/Kokkos_Cuda_Task.hpp> @@ -300,5 +302,3 @@ struct VerifyExecutionCanAccessMemorySpace #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ #endif /* #ifndef KOKKOS_CUDA_HPP */ - - diff --git a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp index 2bbf631b7386a96d73a40c269676a6673db7995b..307ab193b187f51c2e6fa3886c221c106f19b6fa 100644 --- a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,10 +44,11 @@ #ifndef KOKKOS_CUDASPACE_HPP #define KOKKOS_CUDASPACE_HPP -#include <Kokkos_Core_fwd.hpp> - +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_CUDA ) +#include <Kokkos_Core_fwd.hpp> + #include <iosfwd> #include <typeinfo> #include <string> @@ -717,7 +718,7 @@ private: static ::cudaTextureObject_t attach_texture_object( const unsigned sizeof_alias , void * const alloc_ptr - , const size_t alloc_size ); + , const size_t alloc_size ); static RecordBase s_root_record ; diff --git a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp index db4d67ae7d9656a998c1d3ff867dc6c1601562b7..375a2d37440f0545c596edbb074a8d5035de8ce8 100644 --- a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp +++ b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -565,5 +565,3 @@ ThreadVectorRange( const TeamMemberType&, const iType& count ); #endif /* #define KOKKOS_EXECPOLICY_HPP */ -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp index fc39ce0e5bc04c4a9f2c6ee91580dbc43a45d8ef..e224cd4e84a03df491ed0440c0ffcd5d5f1b3491 100644 --- a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp @@ -44,12 +44,11 @@ #ifndef KOKKOS_HBWSPACE_HPP #define KOKKOS_HBWSPACE_HPP -#include <Kokkos_HostSpace.hpp> - -/*--------------------------------------------------------------------------*/ - +#include <Kokkos_Macros.hpp> #ifdef KOKKOS_ENABLE_HBWSPACE +#include <Kokkos_HostSpace.hpp> + namespace Kokkos { namespace Experimental { @@ -114,7 +113,7 @@ public: // typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_OPENMP ) typedef Kokkos::OpenMP execution_space; -#elif defined( KOKKOS_ENABLE_PTHREAD ) +#elif defined( KOKKOS_ENABLE_THREADS ) typedef Kokkos::Threads execution_space; //#elif defined( KOKKOS_ENABLE_QTHREADS ) // typedef Kokkos::Qthreads execution_space; @@ -348,5 +347,5 @@ struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::HBWSpace, Kokk } // namespace Kokkos #endif - #endif // #define KOKKOS_HBWSPACE_HPP + diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp index 82006665ce0a6a4ba37ae88ad8e7456d4c75101a..d00cce8f608ee261dda128206cb2a182633b492f 100644 --- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp @@ -117,7 +117,7 @@ public: // typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_OPENMP ) typedef Kokkos::OpenMP execution_space; -#elif defined( KOKKOS_ENABLE_PTHREAD ) +#elif defined( KOKKOS_ENABLE_THREADS ) typedef Kokkos::Threads execution_space; //#elif defined( KOKKOS_ENABLE_QTHREADS ) // typedef Kokkos::Qthreads execution_space; @@ -265,7 +265,7 @@ public: return (SharedAllocationRecord *) 0; #endif } - + /**\brief Allocate tracked memory in the space */ static @@ -316,3 +316,4 @@ struct DeepCopy< HostSpace, HostSpace, ExecutionSpace > { } // namespace Kokkos #endif // #define KOKKOS_HOSTSPACE_HPP + diff --git a/lib/kokkos/core/src/Kokkos_Layout.hpp b/lib/kokkos/core/src/Kokkos_Layout.hpp index 8ffbc8bb03d7cc3ed9693c3c5feb727edbdc4b4c..f300a6d9f694488980d16895293333ce99ca8f62 100644 --- a/lib/kokkos/core/src/Kokkos_Layout.hpp +++ b/lib/kokkos/core/src/Kokkos_Layout.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -47,7 +47,7 @@ #ifndef KOKKOS_LAYOUT_HPP #define KOKKOS_LAYOUT_HPP -#include <stddef.h> +#include <cstddef> #include <impl/Kokkos_Traits.hpp> #include <impl/Kokkos_Tags.hpp> @@ -62,7 +62,7 @@ enum { ARRAY_LAYOUT_MAX_RANK = 8 }; /// /// This is an example of a \c MemoryLayout template parameter of /// View. The memory layout describes how View maps from a -/// multi-index (i0, i1, ..., ik) to a memory location. +/// multi-index (i0, i1, ..., ik) to a memory location. /// /// "Layout left" indicates a mapping where the leftmost index i0 /// refers to contiguous access, and strides increase for dimensions @@ -95,7 +95,7 @@ struct LayoutLeft { /// /// This is an example of a \c MemoryLayout template parameter of /// View. The memory layout describes how View maps from a -/// multi-index (i0, i1, ..., ik) to a memory location. +/// multi-index (i0, i1, ..., ik) to a memory location. /// /// "Right layout" indicates a mapping where the rightmost index ik /// refers to contiguous access, and strides increase for dimensions @@ -130,7 +130,7 @@ struct LayoutStride { typedef LayoutStride array_layout ; size_t dimension[ ARRAY_LAYOUT_MAX_RANK ] ; - size_t stride[ ARRAY_LAYOUT_MAX_RANK ] ; + size_t stride[ ARRAY_LAYOUT_MAX_RANK ] ; LayoutStride( LayoutStride const & ) = default ; LayoutStride( LayoutStride && ) = default ; @@ -192,7 +192,7 @@ struct LayoutStride { /// /// This is an example of a \c MemoryLayout template parameter of /// View. The memory layout describes how View maps from a -/// multi-index (i0, i1, ..., ik) to a memory location. +/// multi-index (i0, i1, ..., ik) to a memory location. /// /// "Tiled layout" indicates a mapping to contiguously stored /// <tt>ArgN0</tt> by <tt>ArgN1</tt> tiles for the rightmost two diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp index c138b08c94a5a9f93e7faeb067283a221486cb4a..1439dbd3f85f6b02dfd0273420c572e93bc0b3e5 100644 --- a/lib/kokkos/core/src/Kokkos_Macros.hpp +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -48,9 +48,10 @@ /** Pick up configure / build options via #define macros: * * KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces - * KOKKOS_ENABLE_PTHREAD Kokkos::Threads execution space + * KOKKOS_ENABLE_THREADS Kokkos::Threads execution space * KOKKOS_ENABLE_QTHREADS Kokkos::Qthreads execution space * KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space + * KOKKOS_ENABLE_OPENMPTARGET Kokkos::Experimental::OpenMPTarget execution space * KOKKOS_ENABLE_HWLOC HWLOC library is available. * KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK Insert array bounds checks, is expensive! * KOKKOS_ENABLE_MPI Negotiate MPI/execution space interactions. @@ -414,6 +415,7 @@ // There is zero or one default execution space specified. #if 1 < ( ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET ) ? 1 : 0 ) + \ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \ ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) ? 1 : 0 ) + \ @@ -424,15 +426,18 @@ // If default is not specified then chose from enabled execution spaces. // Priority: CUDA, OPENMP, THREADS, QTHREADS, SERIAL #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) //#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) #elif defined( KOKKOS_ENABLE_CUDA ) #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA +#elif defined( KOKKOS_ENABLE_OPENMPTARGET ) + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET #elif defined( KOKKOS_ENABLE_OPENMP ) #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP -#elif defined( KOKKOS_ENABLE_PTHREAD ) +#elif defined( KOKKOS_ENABLE_THREADS ) #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS //#elif defined( KOKKOS_ENABLE_QTHREADS ) // #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS @@ -459,10 +464,17 @@ #endif //---------------------------------------------------------------------------- -// Enable Profiling by default +// If compiling with CUDA then must be using CUDA 8 or better +// and use relocateable device code to enable the task policy. +// nvcc relocatable device code option: --relocatable-device-code=true -#ifndef KOKKOS_ENABLE_PROFILING - #define KOKKOS_ENABLE_PROFILING 1 +#if ( defined( KOKKOS_ENABLE_CUDA ) ) + #if ( 8000 <= CUDA_VERSION ) && defined( KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ) + #define KOKKOS_ENABLE_TASKDAG + #endif +#else + #define KOKKOS_ENABLE_TASKDAG #endif #endif // #ifndef KOKKOS_MACROS_HPP + diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp index eadad10b4991db1e98410f8eafcd77ad9bc87db0..dbf1ad8057fe87ecab80f3d29eed98952be19520 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -47,1513 +47,664 @@ #include <Kokkos_Core_fwd.hpp> #include <Kokkos_Parallel.hpp> #include <Kokkos_Atomic.hpp> -#include <impl/Kokkos_BitOps.hpp> +#include <impl/Kokkos_ConcurrentBitset.hpp> #include <impl/Kokkos_Error.hpp> #include <impl/Kokkos_SharedAlloc.hpp> -#include <limits> -#include <algorithm> -#include <chrono> - -// How should errors be handled? In general, production code should return a -// value indicating failure so the user can decide how the error is handled. -// While experimental, code can abort instead. If KOKKOS_ENABLE_MEMPOOL_PRINTERR is -// defined, the code will abort with an error message. Otherwise, the code will -// return with a value indicating failure when possible, or do nothing instead. -//#define KOKKOS_ENABLE_MEMPOOL_PRINTERR - -//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INFO -//#define KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO -//#define KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO -//#define KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO -//#define KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS -//#define KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO -//#define KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO - -//---------------------------------------------------------------------------- - namespace Kokkos { -namespace Experimental { -namespace MempoolImpl { - -template < typename T, typename ExecutionSpace > -struct initialize_array { - typedef ExecutionSpace execution_space; - typedef typename ExecutionSpace::size_type size_type; - - T * m_data; - T m_value; - - initialize_array( T * d, size_t size, T v ) : m_data( d ), m_value( v ) - { - Kokkos::parallel_for( size, *this ); - - execution_space::fence(); - } - - KOKKOS_INLINE_FUNCTION - void operator()( size_type i ) const { m_data[i] = m_value; } -}; +template< typename DeviceType > +class MemoryPool { +private: -template <typename Bitset> -struct bitset_count -{ - typedef typename Bitset::execution_space execution_space; - typedef typename execution_space::size_type size_type; - typedef typename Bitset::size_type value_type; - typedef typename Bitset::word_type word_type; + typedef typename Kokkos::Impl::concurrent_bitset CB ; + + enum : uint32_t { bits_per_int_lg2 = CB::bits_per_int_lg2 }; + enum : uint32_t { state_shift = CB::state_shift }; + enum : uint32_t { state_used_mask = CB::state_used_mask }; + enum : uint32_t { state_header_mask = CB::state_header_mask }; + enum : uint32_t { max_bit_count_lg2 = CB::max_bit_count_lg2 }; + enum : uint32_t { max_bit_count = CB::max_bit_count }; + + /* Defaults for min block, max block, and superblock sizes */ + enum : uint32_t { MIN_BLOCK_SIZE_LG2 = 6 /* 64 bytes */ }; + enum : uint32_t { MAX_BLOCK_SIZE_LG2 = 12 /* 4k bytes */ }; + enum : uint32_t { SUPERBLOCK_SIZE_LG2 = 16 /* 64k bytes */ }; + + enum : uint32_t { HINT_PER_BLOCK_SIZE = 2 }; + + /* Each superblock has a concurrent bitset state + * which is an array of uint32_t integers. + * [ { block_count_lg2 : state_shift bits + * , used_block_count : ( 32 - state_shift ) bits + * } + * , { block allocation bit set }* ] + * + * As superblocks are assigned (allocated) to a block size + * and released (deallocated) back to empty the superblock state + * is concurrently updated. + */ + + typedef typename DeviceType::memory_space base_memory_space ; + + enum { accessible = + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace + , base_memory_space >::accessible }; + + typedef Kokkos::Impl::SharedAllocationTracker Tracker ; + typedef Kokkos::Impl::SharedAllocationRecord + < base_memory_space > Record ; + + Tracker m_tracker ; + uint32_t * m_sb_state_array ; + uint32_t m_sb_state_size ; + uint32_t m_sb_size_lg2 ; + uint32_t m_max_block_size_lg2 ; + uint32_t m_min_block_size_lg2 ; + int32_t m_sb_count ; + int32_t m_hint_offset ; // Offset to K * #block_size array of hints + int32_t m_data_offset ; // Offset to 0th superblock data + int32_t m_unused_padding ; - word_type * m_words; - value_type & m_result; +public: - bitset_count( word_type * w, value_type num_words, value_type & r ) - : m_words( w ), m_result( r ) - { - parallel_reduce( num_words, *this, m_result ); - } + //-------------------------------------------------------------------------- KOKKOS_INLINE_FUNCTION - void init( value_type & v ) const - { v = 0; } + size_t capacity() const noexcept + { return size_t(m_sb_count) << m_sb_size_lg2 ; } KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dst, volatile value_type const & src ) const - { dst += src; } + size_t min_block_size() const noexcept + { return ( 1LU << m_min_block_size_lg2 ); } KOKKOS_INLINE_FUNCTION - void operator()( size_type i, value_type & count ) const - { - count += Kokkos::Impl::bit_count( m_words[i] ); - } -}; - -template < typename Device > -class Bitset { -public: - typedef typename Device::execution_space execution_space; - typedef typename Device::memory_space memory_space; - typedef unsigned word_type; - typedef unsigned size_type; - - typedef Kokkos::Impl::DeepCopy< memory_space, Kokkos::HostSpace > raw_deep_copy; - - // Define some constants. - enum { - // Size of bitset word. Should be 32. - WORD_SIZE = sizeof(word_type) * CHAR_BIT, - LG_WORD_SIZE = Kokkos::Impl::integral_power_of_two( WORD_SIZE ), - WORD_MASK = WORD_SIZE - 1 + size_t max_block_size() const noexcept + { return ( 1LU << m_max_block_size_lg2 ); } + + struct usage_statistics { + size_t capacity_bytes ; ///< Capacity in bytes + size_t superblock_bytes ; ///< Superblock size in bytes + size_t max_block_bytes ; ///< Maximum block size in bytes + size_t min_block_bytes ; ///< Minimum block size in bytes + size_t capacity_superblocks ; ///< Number of superblocks + size_t consumed_superblocks ; ///< Superblocks assigned to allocations + size_t consumed_blocks ; ///< Number of allocations + size_t consumed_bytes ; ///< Bytes allocated + size_t reserved_blocks ; ///< Unallocated blocks in assigned superblocks + size_t reserved_bytes ; ///< Unallocated bytes in assigned superblocks }; -private: - word_type * m_words; - size_type m_size; - size_type m_num_words; - word_type m_last_word_mask; - -public: - ~Bitset() = default; - Bitset() = default; - Bitset( Bitset && ) = default; - Bitset( const Bitset & ) = default; - Bitset & operator = ( Bitset && ) = default; - Bitset & operator = ( const Bitset & ) = default; - - void init( void * w, size_type s ) - { - // Assumption: The size of the memory pointed to by w is a multiple of - // sizeof(word_type). - - m_words = reinterpret_cast<word_type*>( w ); - m_size = s; - m_num_words = ( s + WORD_SIZE - 1 ) >> LG_WORD_SIZE; - m_last_word_mask = m_size & WORD_MASK ? ( word_type(1) << ( m_size & WORD_MASK ) ) - 1 : 0; - - reset(); - } - - size_type size() const { return m_size; } - - size_type count() const - { - size_type val = 0; - bitset_count< Bitset > bc( m_words, m_num_words, val ); - return val; - } - - void set() - { - // Set all the bits. - initialize_array< word_type, execution_space > ia( m_words, m_num_words, ~word_type(0) ); - - if ( m_last_word_mask ) { - // Clear the unused bits in the last block. - raw_deep_copy( m_words + ( m_num_words - 1 ), &m_last_word_mask, sizeof(word_type) ); - } - } - - void reset() - { - initialize_array< word_type, execution_space > ia( m_words, m_num_words, word_type(0) ); - } - - KOKKOS_FORCEINLINE_FUNCTION - bool test( size_type i ) const - { - size_type word_pos = i >> LG_WORD_SIZE; - word_type word = volatile_load( &m_words[ word_pos ] ); - word_type mask = word_type(1) << ( i & WORD_MASK ); - - return word & mask; - } - - KOKKOS_FORCEINLINE_FUNCTION - bool set( size_type i ) const - { - size_type word_pos = i >> LG_WORD_SIZE; - word_type mask = word_type(1) << ( i & WORD_MASK ); - - return !( atomic_fetch_or( &m_words[ word_pos ], mask ) & mask ); - } - - KOKKOS_FORCEINLINE_FUNCTION - bool reset( size_type i ) const - { - size_type word_pos = i >> LG_WORD_SIZE; - word_type mask = word_type(1) << ( i & WORD_MASK ); - - return atomic_fetch_and( &m_words[ word_pos ], ~mask ) & mask; - } - - KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair< bool, word_type > - fetch_word_set( size_type i ) const - { - size_type word_pos = i >> LG_WORD_SIZE; - word_type mask = word_type(1) << ( i & WORD_MASK ); - - Kokkos::pair<bool, word_type> result; - result.second = atomic_fetch_or( &m_words[ word_pos ], mask ); - result.first = !( result.second & mask ); - - return result; - } - - KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair< bool, word_type > - fetch_word_reset( size_type i ) const - { - size_type word_pos = i >> LG_WORD_SIZE; - word_type mask = word_type(1) << ( i & WORD_MASK ); + void get_usage_statistics( usage_statistics & stats ) const + { + Kokkos::HostSpace host ; - Kokkos::pair<bool, word_type> result; - result.second = atomic_fetch_and( &m_words[ word_pos ], ~mask ); - result.first = result.second & mask; + const size_t alloc_size = m_hint_offset * sizeof(uint32_t); - return result; - } + uint32_t * const sb_state_array = + accessible ? m_sb_state_array : (uint32_t *) host.allocate(alloc_size); - KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair< bool, word_type > - set_any_in_word( size_type & pos ) const - { - size_type word_pos = pos >> LG_WORD_SIZE; - word_type word = volatile_load( &m_words[ word_pos ] ); - - // Loop until there are no more unset bits in the word. - while ( ~word ) { - // Find the first unset bit in the word. - size_type bit = Kokkos::Impl::bit_scan_forward( ~word ); - - // Try to set the bit. - word_type mask = word_type(1) << bit; - word = atomic_fetch_or( &m_words[ word_pos ], mask ); - - if ( !( word & mask ) ) { - // Successfully set the bit. - pos = ( word_pos << LG_WORD_SIZE ) + bit; - - return Kokkos::pair<bool, word_type>( true, word ); + if ( ! accessible ) { + Kokkos::Impl::DeepCopy< Kokkos::HostSpace , base_memory_space > + ( sb_state_array , m_sb_state_array , alloc_size ); } - } - - // Didn't find a free bit in this word. - return Kokkos::pair<bool, word_type>( false, word_type(0) ); - } - KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair< bool, word_type > - set_any_in_word( size_type & pos, word_type word_mask ) const - { - size_type word_pos = pos >> LG_WORD_SIZE; - word_type word = volatile_load( &m_words[ word_pos ] ); - word = ( ~word ) & word_mask; - - // Loop until there are no more unset bits in the word. - while ( word ) { - // Find the first unset bit in the word. - size_type bit = Kokkos::Impl::bit_scan_forward( word ); - - // Try to set the bit. - word_type mask = word_type(1) << bit; - word = atomic_fetch_or( &m_words[ word_pos ], mask ); - - if ( !( word & mask ) ) { - // Successfully set the bit. - pos = ( word_pos << LG_WORD_SIZE ) + bit; - - return Kokkos::pair<bool, word_type>( true, word ); + stats.superblock_bytes = ( 1LU << m_sb_size_lg2 ); + stats.max_block_bytes = ( 1LU << m_max_block_size_lg2 ); + stats.min_block_bytes = ( 1LU << m_min_block_size_lg2 ); + stats.capacity_bytes = stats.superblock_bytes * m_sb_count ; + stats.capacity_superblocks = m_sb_count ; + stats.consumed_superblocks = 0 ; + stats.consumed_blocks = 0 ; + stats.consumed_bytes = 0 ; + stats.reserved_blocks = 0 ; + stats.reserved_bytes = 0 ; + + const uint32_t * sb_state_ptr = sb_state_array ; + + for ( int32_t i = 0 ; i < m_sb_count + ; ++i , sb_state_ptr += m_sb_state_size ) { + + const uint32_t block_count_lg2 = (*sb_state_ptr) >> state_shift ; + + if ( block_count_lg2 ) { + const uint32_t block_count = 1u << block_count_lg2 ; + const uint32_t block_size_lg2 = m_sb_size_lg2 - block_count_lg2 ; + const uint32_t block_size = 1u << block_size_lg2 ; + const uint32_t block_used = (*sb_state_ptr) & state_used_mask ; + + stats.consumed_superblocks++ ; + stats.consumed_blocks += block_used ; + stats.consumed_bytes += block_used * block_size ; + stats.reserved_blocks += block_count - block_used ; + stats.reserved_bytes += (block_count - block_used ) * block_size ; + } } - word = ( ~word ) & word_mask; + if ( ! accessible ) { + host.deallocate( sb_state_array, alloc_size ); + } } - // Didn't find a free bit in this word. - return Kokkos::pair<bool, word_type>( false, word_type(0) ); - } + void print_state( std::ostream & s ) const + { + Kokkos::HostSpace host ; - KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair< bool, word_type > - reset_any_in_word( size_type & pos ) const - { - size_type word_pos = pos >> LG_WORD_SIZE; - word_type word = volatile_load( &m_words[ word_pos ] ); - - // Loop until there are no more set bits in the word. - while ( word ) { - // Find the first unset bit in the word. - size_type bit = Kokkos::Impl::bit_scan_forward( word ); - - // Try to reset the bit. - word_type mask = word_type(1) << bit; - word = atomic_fetch_and( &m_words[ word_pos ], ~mask ); - - if ( word & mask ) { - // Successfully reset the bit. - pos = ( word_pos << LG_WORD_SIZE ) + bit; - - return Kokkos::pair<bool, word_type>( true, word ); - } - } + const size_t alloc_size = m_hint_offset * sizeof(uint32_t); - // Didn't find a free bit in this word. - return Kokkos::pair<bool, word_type>( false, word_type(0) ); - } + uint32_t * const sb_state_array = + accessible ? m_sb_state_array : (uint32_t *) host.allocate(alloc_size); - KOKKOS_FORCEINLINE_FUNCTION - Kokkos::pair< bool, word_type > - reset_any_in_word( size_type & pos, word_type word_mask ) const - { - size_type word_pos = pos >> LG_WORD_SIZE; - word_type word = volatile_load( &m_words[ word_pos ] ); - word = word & word_mask; - - // Loop until there are no more set bits in the word. - while ( word ) { - // Find the first unset bit in the word. - size_type bit = Kokkos::Impl::bit_scan_forward( word ); - - // Try to reset the bit. - word_type mask = word_type(1) << bit; - word = atomic_fetch_and( &m_words[ word_pos ], ~mask ); - - if ( word & mask ) { - // Successfully reset the bit. - pos = ( word_pos << LG_WORD_SIZE ) + bit; - - return Kokkos::pair<bool, word_type>( true, word ); + if ( ! accessible ) { + Kokkos::Impl::DeepCopy< Kokkos::HostSpace , base_memory_space > + ( sb_state_array , m_sb_state_array , alloc_size ); } - word = word & word_mask; - } + const uint32_t * sb_state_ptr = sb_state_array ; - // Didn't find a free bit in this word. - return Kokkos::pair<bool, word_type>( false, word_type(0) ); - } -}; + s << "pool_size(" << ( size_t(m_sb_count) << m_sb_size_lg2 ) << ")" + << " superblock_size(" << ( 1 << m_sb_size_lg2 ) << ")" << std::endl ; -template < typename UInt32View, typename BSHeaderView, typename SBHeaderView, - typename MempoolBitset > -struct create_histogram { - typedef typename UInt32View::execution_space execution_space; - typedef typename execution_space::size_type size_type; - typedef Kokkos::pair< double, uint32_t > value_type; - - size_t m_start; - UInt32View m_page_histogram; - BSHeaderView m_blocksize_info; - SBHeaderView m_sb_header; - MempoolBitset m_sb_blocks; - size_t m_lg_max_sb_blocks; - uint32_t m_lg_min_block_size; - uint32_t m_blocks_per_page; - value_type & m_result; - - create_histogram( size_t start, size_t end, UInt32View ph, BSHeaderView bsi, - SBHeaderView sbh, MempoolBitset sbb, size_t lmsb, - uint32_t lmbs, uint32_t bpp, value_type & r ) - : m_start( start ), m_page_histogram( ph ), m_blocksize_info( bsi ), - m_sb_header( sbh ), m_sb_blocks( sbb ), m_lg_max_sb_blocks( lmsb ), - m_lg_min_block_size( lmbs ), m_blocks_per_page( bpp ), m_result( r ) - { - Kokkos::parallel_reduce( end - start, *this, m_result ); - - execution_space::fence(); - } + for ( int32_t i = 0 ; i < m_sb_count + ; ++i , sb_state_ptr += m_sb_state_size ) { - KOKKOS_INLINE_FUNCTION - void init( value_type & v ) const - { - v.first = 0.0; - v.second = 0; - } + if ( *sb_state_ptr ) { - KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dst, volatile value_type const & src ) const - { - dst.first += src.first; - dst.second += src.second; - } + const uint32_t block_count_lg2 = (*sb_state_ptr) >> state_shift ; + const uint32_t block_size_lg2 = m_sb_size_lg2 - block_count_lg2 ; + const uint32_t block_count = 1 << block_count_lg2 ; + const uint32_t block_used = (*sb_state_ptr) & state_used_mask ; - KOKKOS_INLINE_FUNCTION - void operator()( size_type i, value_type & r ) const - { - size_type i2 = i + m_start; + s << "Superblock[ " << i << " / " << m_sb_count << " ] {" + << " block_size(" << ( 1 << block_size_lg2 ) << ")" + << " block_count( " << block_used + << " / " << block_count << " )" + << std::endl ; + } + } - uint32_t lg_block_size = m_sb_header(i2).m_lg_block_size; + if ( ! accessible ) { + host.deallocate( sb_state_array, alloc_size ); + } + } - // A superblock only has a block size of 0 when it is empty. - if ( lg_block_size != 0 ) { - uint32_t block_size_id = lg_block_size - m_lg_min_block_size; - uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb; - uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb; + //-------------------------------------------------------------------------- + + MemoryPool() = default ; + MemoryPool( MemoryPool && ) = default ; + MemoryPool( const MemoryPool & ) = default ; + MemoryPool & operator = ( MemoryPool && ) = default ; + MemoryPool & operator = ( const MemoryPool & ) = default ; + + /**\brief Allocate a memory pool from 'memspace'. + * + * The memory pool will have at least 'min_total_alloc_size' bytes + * of memory to allocate divided among superblocks of at least + * 'min_superblock_size' bytes. A single allocation must fit + * within a single superblock, so 'min_superblock_size' must be + * at least as large as the maximum single allocation. + * Both 'min_total_alloc_size' and 'min_superblock_size' + * are rounded up to the smallest power-of-two value that + * contains the corresponding sizes. + * Individual allocations will always consume a block of memory that + * is also a power-of-two. These roundings are made to enable + * significant runtime performance improvements. + */ + MemoryPool( const base_memory_space & memspace + , const size_t min_total_alloc_size + , const uint32_t min_block_alloc_size // = 1 << MIN_BLOCK_SIZE_LG2 + , const uint32_t max_block_alloc_size // = 1 << MAX_BLOCK_SIZE_LG2 + , const uint32_t min_superblock_size // = 1 << SUPERBLOCK_SIZE_LG2 + ) + : m_tracker() + , m_sb_state_array(0) + , m_sb_state_size(0) + , m_sb_size_lg2(0) + , m_max_block_size_lg2(0) + , m_min_block_size_lg2(0) + , m_sb_count(0) + , m_hint_offset(0) + , m_data_offset(0) + , m_unused_padding(0) + { + const uint32_t int_align_lg2 = 3 ; /* align as int[8] */ + const uint32_t int_align_mask = ( 1u << int_align_lg2 ) - 1 ; - uint32_t total_allocated_blocks = 0; + // Block and superblock size is power of two: - for ( uint32_t j = 0; j < pages_per_sb; ++j ) { - unsigned start_pos = ( i2 << m_lg_max_sb_blocks ) + j * m_blocks_per_page; - unsigned end_pos = start_pos + m_blocks_per_page; - uint32_t page_allocated_blocks = 0; + m_min_block_size_lg2 = + Kokkos::Impl::integral_power_of_two_that_contains(min_block_alloc_size); - for ( unsigned k = start_pos; k < end_pos; ++k ) { - page_allocated_blocks += m_sb_blocks.test( k ); - } + m_max_block_size_lg2 = + Kokkos::Impl::integral_power_of_two_that_contains(max_block_alloc_size); + + m_sb_size_lg2 = + Kokkos::Impl::integral_power_of_two_that_contains(min_superblock_size); - total_allocated_blocks += page_allocated_blocks; + // Constraints: + // m_min_block_size_lg2 <= m_max_block_size_lg2 <= m_sb_size_lg2 + // m_sb_size_lg2 <= m_min_block_size + max_bit_count_lg2 - atomic_increment( &m_page_histogram(page_allocated_blocks) ); + if ( m_min_block_size_lg2 + max_bit_count_lg2 < m_sb_size_lg2 ) { + m_min_block_size_lg2 = m_sb_size_lg2 - max_bit_count_lg2 ; + } + if ( m_min_block_size_lg2 + max_bit_count_lg2 < m_max_block_size_lg2 ) { + m_min_block_size_lg2 = m_max_block_size_lg2 - max_bit_count_lg2 ; + } + if ( m_max_block_size_lg2 < m_min_block_size_lg2 ) { + m_max_block_size_lg2 = m_min_block_size_lg2 ; + } + if ( m_sb_size_lg2 < m_max_block_size_lg2 ) { + m_sb_size_lg2 = m_max_block_size_lg2 ; } - r.first += double(total_allocated_blocks) / blocks_per_sb; - r.second += blocks_per_sb; - } - } -}; + // At least 32 minimum size blocks in a superblock -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO -template < typename UInt32View, typename SBHeaderView, typename MempoolBitset > -struct count_allocated_blocks { - typedef typename UInt32View::execution_space execution_space; - typedef typename execution_space::size_type size_type; + if ( m_sb_size_lg2 < m_min_block_size_lg2 + 5 ) { + m_sb_size_lg2 = m_min_block_size_lg2 + 5 ; + } - UInt32View m_num_allocated_blocks; - SBHeaderView m_sb_header; - MempoolBitset m_sb_blocks; - size_t m_sb_size; - size_t m_lg_max_sb_blocks; + // number of superblocks is multiple of superblock size that + // can hold min_total_alloc_size. - count_allocated_blocks( size_t num_sb, UInt32View nab, SBHeaderView sbh, - MempoolBitset sbb, size_t sbs, size_t lmsb ) - : m_num_allocated_blocks( nab ), m_sb_header( sbh ), - m_sb_blocks( sbb ), m_sb_size( sbs ), m_lg_max_sb_blocks( lmsb ) - { - Kokkos::parallel_for( num_sb, *this ); + const uint32_t sb_size_mask = ( 1u << m_sb_size_lg2 ) - 1 ; - execution_space::fence(); - } + m_sb_count = ( min_total_alloc_size + sb_size_mask ) >> m_sb_size_lg2 ; - KOKKOS_INLINE_FUNCTION - void operator()( size_type i ) const - { - uint32_t lg_block_size = m_sb_header(i).m_lg_block_size; - - // A superblock only has a block size of 0 when it is empty. - if ( lg_block_size != 0 ) { - // Count the allocated blocks in the superblock. - uint32_t blocks_per_sb = lg_block_size > 0 ? m_sb_size >> lg_block_size : 0; - unsigned start_pos = i << m_lg_max_sb_blocks; - unsigned end_pos = start_pos + blocks_per_sb; - uint32_t count = 0; - - for ( unsigned j = start_pos; j < end_pos; ++j ) { - count += m_sb_blocks.test( j ); - } + // Any superblock can be assigned to the smallest size block + // Size the block bitset to maximum number of blocks - m_num_allocated_blocks(i) = count; - } - } -}; -#endif + const uint32_t max_block_count_lg2 = + m_sb_size_lg2 - m_min_block_size_lg2 ; -} - -/// \class MemoryPool -/// \brief Bitset based memory manager for pools of same-sized chunks of memory. -/// \tparam Device Kokkos device that gives the execution and memory space the -/// allocator will be used in. -/// -/// MemoryPool is a memory space that can be on host or device. It provides a -/// pool memory allocator for fast allocation of same-sized chunks of memory. -/// The memory is only accessible on the host / device this allocator is -/// associated with. -/// -/// This allocator is based on ideas from the following GPU allocators: -/// Halloc (https://github.com/canonizer/halloc). -/// ScatterAlloc (https://github.com/ComputationalRadiationPhysics/scatteralloc) -template < typename Device > -class MemoryPool { -private: - // The allocator uses superblocks. A superblock is divided into pages, and a - // page is divided into blocks. A block is the chunk of memory that is given - // out by the allocator. A page always has a number of blocks equal to the - // size of the word used by the bitset. Thus, the pagesize can vary between - // superblocks as it is based on the block size of the superblock. The - // allocator supports all powers of 2 from MIN_BLOCK_SIZE to the size of a - // superblock as block sizes. - - // Superblocks are divided into 4 categories: - // 1. empty - is completely empty; there are no active allocations - // 2. partfull - partially full; there are some active allocations - // 3. full - full enough with active allocations that new allocations - // will likely fail - // 4. active - is currently the active superblock for a block size - // - // An inactive superblock is one that is empty, partfull, or full. - // - // New allocations occur only from an active superblock. If a superblock is - // made inactive after an allocation request is made to it but before the - // allocation request is fulfilled, the allocation will still be attempted - // from that superblock. Deallocations can occur to partfull, full, or - // active superblocks. Superblocks move between categories as allocations - // and deallocations happen. Superblocks all start empty. - // - // Here are the possible moves between categories: - // empty -> active During allocation, there is no active superblock - // or the active superblock is full. - // active -> full During allocation, the full threshold of the - // superblock is reached when increasing the fill - // level. - // full -> partfull During deallocation, the full threshold of the - // superblock is crossed when decreasing the fill - // level. - // partfull -> empty Deallocation of the last allocated block of an - // inactive superblock. - // partfull -> active During allocation, the active superblock is full. - // - // When a new active superblock is needed, partfull superblocks of the same - // block size are chosen over empty superblocks. - // - // The empty and partfull superblocks are tracked using bitsets that represent - // the superblocks in those repsective categories. Empty superblocks use a - // single bitset, while partfull superblocks use a bitset per block size - // (contained sequentially in a single bitset). Active superblocks are - // tracked by the active superblocks array. Full superblocks aren't tracked - // at all. - - typedef typename Device::execution_space execution_space; - typedef typename Device::memory_space backend_memory_space; - typedef Device device_type; - typedef MempoolImpl::Bitset< device_type > MempoolBitset; - - // Define some constants. - enum { - MIN_BLOCK_SIZE = 64, - LG_MIN_BLOCK_SIZE = Kokkos::Impl::integral_power_of_two( MIN_BLOCK_SIZE ), - MAX_BLOCK_SIZES = 31 - LG_MIN_BLOCK_SIZE + 1, - - // Size of bitset word. - BLOCKS_PER_PAGE = MempoolBitset::WORD_SIZE, - LG_BLOCKS_PER_PAGE = MempoolBitset::LG_WORD_SIZE, - - INVALID_SUPERBLOCK = ~uint32_t(0), - SUPERBLOCK_LOCK = ~uint32_t(0) - 1, - - MAX_TRIES = 32 // Cap on the number of pages searched - // before an allocation returns empty. - }; + m_sb_state_size = + ( CB::buffer_bound_lg2( max_block_count_lg2 ) + int_align_mask ) & ~int_align_mask ; -public: - // Stores information about each superblock. - struct SuperblockHeader { - uint32_t m_full_pages; - uint32_t m_empty_pages; - uint32_t m_lg_block_size; - uint32_t m_is_active; - - KOKKOS_FUNCTION - SuperblockHeader() : - m_full_pages(0), m_empty_pages(0), m_lg_block_size(0), m_is_active(false) {} - }; + // Array of all superblock states - // Stores information about each block size. - struct BlockSizeHeader { - uint32_t m_blocks_per_sb; - uint32_t m_pages_per_sb; - uint32_t m_sb_full_level; - uint32_t m_page_full_level; + const size_t all_sb_state_size = + ( m_sb_count * m_sb_state_size + int_align_mask ) & ~int_align_mask ; - KOKKOS_FUNCTION - BlockSizeHeader() : - m_blocks_per_sb(0), m_pages_per_sb(0), m_sb_full_level(0), m_page_full_level(0) {} - }; + // Number of block sizes -private: - typedef Kokkos::Impl::SharedAllocationTracker Tracker; - typedef View< uint32_t *, device_type > UInt32View; - typedef View< SuperblockHeader *, device_type > SBHeaderView; - - // The letters 'sb' used in any variable name mean superblock. - - size_t m_lg_sb_size; // Log2 of superblock size. - size_t m_sb_size; // Superblock size. - size_t m_lg_max_sb_blocks; // Log2 of the number of blocks of the - // minimum block size in a superblock. - size_t m_num_sb; // Number of superblocks. - size_t m_ceil_num_sb; // Number of superblocks rounded up to the smallest - // multiple of the bitset word size. Used by - // bitsets representing superblock categories to - // ensure different block sizes never share a word - // in the bitset. - size_t m_num_block_size; // Number of block sizes supported. - size_t m_data_size; // Amount of memory available to the allocator. - size_t m_sb_blocks_size; // Amount of memory for free / empty blocks bitset. - size_t m_empty_sb_size; // Amount of memory for empty superblocks bitset. - size_t m_partfull_sb_size; // Amount of memory for partfull superblocks bitset. - size_t m_total_size; // Total amount of memory allocated. - char * m_data; // Beginning device memory location used for - // superblocks. - UInt32View m_active; // Active superblocks IDs. - SBHeaderView m_sb_header; // Header info for superblocks. - MempoolBitset m_sb_blocks; // Bitsets representing free / allocated status - // of blocks in superblocks. - MempoolBitset m_empty_sb; // Bitset representing empty superblocks. - MempoolBitset m_partfull_sb; // Bitsets representing partially full superblocks. - Tracker m_track; // Tracker for superblock memory. - BlockSizeHeader m_blocksize_info[MAX_BLOCK_SIZES]; // Header info for block sizes. - - // There were several methods tried for storing the block size header info: in a View, - // in a View of const data, and in a RandomAccess View. All of these were slower than - // storing it in a static array that is a member variable to the class. In the latter - // case, the block size info gets copied into the constant memory on the GPU along with - // the class when it is copied there for exeucting a parallel loop. Instead of storing - // the values, computing the values every time they were needed was also tried. This - // method was slightly slower than storing them in the static array. + const int32_t number_block_sizes = + 1 + m_max_block_size_lg2 - m_min_block_size_lg2 ; -public: - //! Tag this class as a kokkos memory space - typedef MemoryPool memory_space; - - ~MemoryPool() = default; - MemoryPool() = default; - MemoryPool( MemoryPool && ) = default; - MemoryPool( const MemoryPool & ) = default; - MemoryPool & operator = ( MemoryPool && ) = default; - MemoryPool & operator = ( const MemoryPool & ) = default; - - /// \brief Initializes the memory pool. - /// \param memspace The memory space from which the memory pool will allocate memory. - /// \param total_size The requested memory amount controlled by the allocator. The - /// actual amount is rounded up to the smallest multiple of the - /// superblock size >= the requested size. - /// \param log2_superblock_size Log2 of the size of superblocks used by the allocator. - /// In most use cases, the default value should work. - inline - MemoryPool( const backend_memory_space & memspace, - size_t total_size, size_t log2_superblock_size = 20 ) - : m_lg_sb_size( log2_superblock_size ), - m_sb_size( size_t(1) << m_lg_sb_size ), - m_lg_max_sb_blocks( m_lg_sb_size - LG_MIN_BLOCK_SIZE ), - m_num_sb( ( total_size + m_sb_size - 1 ) >> m_lg_sb_size ), - m_ceil_num_sb( ( ( m_num_sb + BLOCKS_PER_PAGE - 1 ) >> LG_BLOCKS_PER_PAGE ) << - LG_BLOCKS_PER_PAGE ), - m_num_block_size( m_lg_sb_size - LG_MIN_BLOCK_SIZE + 1 ), - m_data_size( m_num_sb * m_sb_size ), - m_sb_blocks_size( ( m_num_sb << m_lg_max_sb_blocks ) / CHAR_BIT ), - m_empty_sb_size( m_ceil_num_sb / CHAR_BIT ), - m_partfull_sb_size( m_ceil_num_sb * m_num_block_size / CHAR_BIT ), - m_total_size( m_data_size + m_sb_blocks_size + m_empty_sb_size + m_partfull_sb_size ), - m_data(0), - m_active( "Active superblocks" ), - m_sb_header( "Superblock headers" ), - m_track() - { - // Assumption. The minimum block size must be a power of 2. - static_assert( Kokkos::Impl::is_integral_power_of_two( MIN_BLOCK_SIZE ), "" ); - - // Assumption. Require a superblock be large enough so it takes at least 1 - // whole bitset word to represent it using the minimum blocksize. - if ( m_sb_size < MIN_BLOCK_SIZE * BLOCKS_PER_PAGE ) { - printf( "\n** MemoryPool::MemoryPool() Superblock size must be >= %u **\n", - MIN_BLOCK_SIZE * BLOCKS_PER_PAGE ); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - fflush( stdout ); -#endif - Kokkos::abort( "" ); - } + // Array length for possible block sizes + // Hint array is one uint32_t per block size - // Assumption. A superblock's size can be at most 2^31. Verify this. - if ( m_lg_sb_size > 31 ) { - printf( "\n** MemoryPool::MemoryPool() Superblock size must be < %u **\n", - ( uint32_t(1) << 31 ) ); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - fflush( stdout ); -#endif - Kokkos::abort( "" ); - } + const int32_t block_size_array_size = + ( number_block_sizes + int_align_mask ) & ~int_align_mask ; - // Assumption. The Bitset only uses unsigned for size types which limits - // the amount of memory the allocator can manage. Verify the memory size - // is below this limit. - if ( m_data_size > size_t(MIN_BLOCK_SIZE) * std::numeric_limits<unsigned>::max() ) { - printf( "\n** MemoryPool::MemoryPool() Allocator can only manage %lu bytes of memory; requested %lu **\n", - size_t(MIN_BLOCK_SIZE) * std::numeric_limits<unsigned>::max(), total_size ); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - fflush( stdout ); -#endif - Kokkos::abort( "" ); - } + m_hint_offset = all_sb_state_size ; + m_data_offset = m_hint_offset + + block_size_array_size * HINT_PER_BLOCK_SIZE ; - // Allocate memory for Views. This is done here instead of at construction - // so that the runtime checks can be performed before allocating memory. - resize( m_active, m_num_block_size ); - resize( m_sb_header, m_num_sb ); + // Allocation: - // Allocate superblock memory. - typedef Kokkos::Impl::SharedAllocationRecord< backend_memory_space, void > SharedRecord; - SharedRecord * rec = - SharedRecord::allocate( memspace, "mempool", m_total_size ); + const size_t header_size = m_data_offset * sizeof(uint32_t); + const size_t alloc_size = header_size + + ( size_t(m_sb_count) << m_sb_size_lg2 ); - m_track.assign_allocated_record_to_uninitialized( rec ); - m_data = reinterpret_cast<char *>( rec->data() ); + Record * rec = Record::allocate( memspace , "MemoryPool" , alloc_size ); - // Set and initialize the free / empty block bitset memory. - m_sb_blocks.init( m_data + m_data_size, m_num_sb << m_lg_max_sb_blocks ); + m_tracker.assign_allocated_record_to_uninitialized( rec ); - // Set and initialize the empty superblock block bitset memory. - m_empty_sb.init( m_data + m_data_size + m_sb_blocks_size, m_num_sb ); + m_sb_state_array = (uint32_t *) rec->data(); - // Start with all superblocks in the empty category. - m_empty_sb.set(); + Kokkos::HostSpace host ; - // Set and initialize the partfull superblock block bitset memory. - m_partfull_sb.init( m_data + m_data_size + m_sb_blocks_size + m_empty_sb_size, - m_ceil_num_sb * m_num_block_size ); + uint32_t * const sb_state_array = + accessible ? m_sb_state_array + : (uint32_t *) host.allocate(header_size); - // Initialize all active superblocks to be invalid. - typename UInt32View::HostMirror host_active = create_mirror_view( m_active ); - for ( size_t i = 0; i < m_num_block_size; ++i ) host_active(i) = INVALID_SUPERBLOCK; - deep_copy( m_active, host_active ); + for ( int32_t i = 0 ; i < m_data_offset ; ++i ) sb_state_array[i] = 0 ; - // A superblock is considered full when this percentage of its pages are full. - const double superblock_full_fraction = .8; + // Initial assignment of empty superblocks to block sizes: - // A page is considered full when this percentage of its blocks are full. - const double page_full_fraction = .875; + for ( int32_t i = 0 ; i < number_block_sizes ; ++i ) { + const uint32_t block_size_lg2 = i + m_min_block_size_lg2 ; + const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2 ; + const uint32_t block_state = block_count_lg2 << state_shift ; + const uint32_t hint_begin = m_hint_offset + i * HINT_PER_BLOCK_SIZE ; - // Initialize the blocksize info. - for ( size_t i = 0; i < m_num_block_size; ++i ) { - uint32_t lg_block_size = i + LG_MIN_BLOCK_SIZE; - uint32_t blocks_per_sb = m_sb_size >> lg_block_size; - uint32_t pages_per_sb = ( blocks_per_sb + BLOCKS_PER_PAGE - 1 ) >> LG_BLOCKS_PER_PAGE; + // for block size index 'i': + // sb_id_hint = sb_state_array[ hint_begin ]; + // sb_id_begin = sb_state_array[ hint_begin + 1 ]; - m_blocksize_info[i].m_blocks_per_sb = blocks_per_sb; - m_blocksize_info[i].m_pages_per_sb = pages_per_sb; + const int32_t jbeg = ( i * m_sb_count ) / number_block_sizes ; + const int32_t jend = ( ( i + 1 ) * m_sb_count ) / number_block_sizes ; - // Set the full level for the superblock. - m_blocksize_info[i].m_sb_full_level = - static_cast<uint32_t>( pages_per_sb * superblock_full_fraction ); + sb_state_array[ hint_begin ] = uint32_t(jbeg); + sb_state_array[ hint_begin + 1 ] = uint32_t(jbeg); - if ( m_blocksize_info[i].m_sb_full_level == 0 ) { - m_blocksize_info[i].m_sb_full_level = 1; + for ( int32_t j = jbeg ; j < jend ; ++j ) { + sb_state_array[ j * m_sb_state_size ] = block_state ; + } } - // Set the full level for the page. - uint32_t blocks_per_page = - blocks_per_sb < BLOCKS_PER_PAGE ? blocks_per_sb : BLOCKS_PER_PAGE; + // Write out initialized state: - m_blocksize_info[i].m_page_full_level = - static_cast<uint32_t>( blocks_per_page * page_full_fraction ); + if ( ! accessible ) { + Kokkos::Impl::DeepCopy< base_memory_space , Kokkos::HostSpace > + ( m_sb_state_array , sb_state_array , header_size ); - if ( m_blocksize_info[i].m_page_full_level == 0 ) { - m_blocksize_info[i].m_page_full_level = 1; + host.deallocate( sb_state_array, header_size ); + } + else { + Kokkos::memory_fence(); } } -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_CONSTRUCTOR_INFO - printf( "\n" ); - printf( " m_lg_sb_size: %12lu\n", m_lg_sb_size ); - printf( " m_sb_size: %12lu\n", m_sb_size ); - printf( " m_max_sb_blocks: %12lu\n", size_t(1) << m_lg_max_sb_blocks ); - printf( "m_lg_max_sb_blocks: %12lu\n", m_lg_max_sb_blocks ); - printf( " m_num_sb: %12lu\n", m_num_sb ); - printf( " m_ceil_num_sb: %12lu\n", m_ceil_num_sb ); - printf( " m_num_block_size: %12lu\n", m_num_block_size ); - printf( " data bytes: %12lu\n", m_data_size ); - printf( " sb_blocks bytes: %12lu\n", m_sb_blocks_size ); - printf( " empty_sb bytes: %12lu\n", m_empty_sb_size ); - printf( " partfull_sb bytes: %12lu\n", m_partfull_sb_size ); - printf( " total bytes: %12lu\n", m_total_size ); - printf( " m_empty_sb size: %12u\n", m_empty_sb.size() ); - printf( "m_partfull_sb size: %12u\n", m_partfull_sb.size() ); - printf( "\n" ); - fflush( stdout ); -#endif - -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO - // Print the blocksize info for all the block sizes. - printf( "SIZE BLOCKS_PER_SB PAGES_PER_SB SB_FULL_LEVEL PAGE_FULL_LEVEL\n" ); - for ( size_t i = 0; i < m_num_block_size; ++i ) { - printf( "%4zu %13u %12u %13u %15u\n", i + LG_MIN_BLOCK_SIZE, - m_blocksize_info[i].m_blocks_per_sb, m_blocksize_info[i].m_pages_per_sb, - m_blocksize_info[i].m_sb_full_level, m_blocksize_info[i].m_page_full_level ); - } - printf( "\n" ); -#endif - } + //-------------------------------------------------------------------------- - /// \brief The actual block size allocated given alloc_size. - KOKKOS_INLINE_FUNCTION - size_t allocate_block_size( const size_t alloc_size ) const - { return size_t(1) << ( get_block_size_index( alloc_size ) + LG_MIN_BLOCK_SIZE ); } - - /// \brief Allocate a chunk of memory. - /// \param alloc_size Size of the requested allocated in number of bytes. - /// - /// The function returns a void pointer to a memory location on success and - /// NULL on failure. - KOKKOS_FUNCTION - void * allocate( size_t alloc_size ) const - { - void * p = 0; +private: - // Only support allocations up to the superblock size. Just return 0 - // (failed allocation) for any size above this. - if ( alloc_size <= m_sb_size ) + /* Given a size 'n' get the block size in which it can be allocated. + * Restrict lower bound to minimum block size. + */ + KOKKOS_FORCEINLINE_FUNCTION + unsigned get_block_size_lg2( unsigned n ) const noexcept { - int block_size_id = get_block_size_index( alloc_size ); - uint32_t blocks_per_sb = m_blocksize_info[block_size_id].m_blocks_per_sb; - uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb; + const unsigned i = Kokkos::Impl::integral_power_of_two_that_contains( n ); -#ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND - // Without this test it looks like pages_per_sb might come back wrong. - if ( pages_per_sb == 0 ) return NULL; -#endif - - unsigned word_size = blocks_per_sb > 32 ? 32 : blocks_per_sb; - unsigned word_mask = ( uint64_t(1) << word_size ) - 1; + return i < m_min_block_size_lg2 ? m_min_block_size_lg2 : i ; + } - // Instead of forcing an atomic read to guarantee the updated value, - // reading the old value is actually beneficial because more threads will - // attempt allocations on the old active superblock instead of waiting on - // the new active superblock. This will help hide the latency of - // switching the active superblock. - uint32_t sb_id = volatile_load( &m_active(block_size_id) ); +public: - // If the active is locked, keep reading it atomically until the lock is - // released. - while ( sb_id == SUPERBLOCK_LOCK ) { - sb_id = atomic_fetch_or( &m_active(block_size_id), uint32_t(0) ); - } + KOKKOS_INLINE_FUNCTION + uint32_t allocate_block_size( uint32_t alloc_size ) const noexcept + { + return alloc_size <= (1UL << m_max_block_size_lg2) + ? ( 1u << get_block_size_lg2( alloc_size ) ) + : 0 ; + } - load_fence(); + //-------------------------------------------------------------------------- + /**\brief Allocate a block of memory that is at least 'alloc_size' + * + * The block of memory is aligned to the minimum block size, + * currently is 64 bytes, will never be less than 32 bytes. + * + * If concurrent allocations and deallocations are taking place + * then a single allocation attempt may fail due to lack of available space. + * The allocation attempt will try up to 'attempt_limit' times. + */ + KOKKOS_FUNCTION + void * allocate( size_t alloc_size + , int32_t attempt_limit = 1 ) const noexcept + { + void * p = 0 ; - bool allocation_done = false; + const uint32_t block_size_lg2 = get_block_size_lg2( alloc_size ); - while ( !allocation_done ) { - bool need_new_sb = false; + if ( block_size_lg2 <= m_max_block_size_lg2 ) { - if ( sb_id != INVALID_SUPERBLOCK ) { - // Use the value from the clock register as the hash value. - uint64_t hash_val = get_clock_register(); + // Allocation will fit within a superblock + // that has block sizes ( 1 << block_size_lg2 ) - // Get the starting position for this superblock's bits in the bitset. - uint32_t pos_base = sb_id << m_lg_max_sb_blocks; + const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2 ; + const uint32_t block_state = block_count_lg2 << state_shift ; + const uint32_t block_count = 1u << block_count_lg2 ; + const uint32_t block_count_mask = block_count - 1 ; - // Mod the hash value to choose a page in the superblock. The - // initial block searched is the first block of that page. - uint32_t pos_rel = uint32_t( hash_val & ( pages_per_sb - 1 ) ) << LG_BLOCKS_PER_PAGE; + // Superblock hints for this block size: + // hint_sb_id_ptr[0] is the dynamically changing hint + // hint_sb_id_ptr[1] is the static start point - // Get the absolute starting position for this superblock's bits in the bitset. - uint32_t pos = pos_base + pos_rel; + volatile uint32_t * const hint_sb_id_ptr + = m_sb_state_array /* memory pool state array */ + + m_hint_offset /* offset to hint portion of array */ + + HINT_PER_BLOCK_SIZE /* number of hints per block size */ + * ( block_size_lg2 - m_min_block_size_lg2 ); /* block size id */ - // Keep track of the number of pages searched. Pages in the superblock are - // searched linearly from the starting page. All pages in the superblock are - // searched until either a location is found, or it is proven empty. - uint32_t pages_searched = 0; + const int32_t sb_id_begin = int32_t( hint_sb_id_ptr[1] ); - bool search_done = false; + // Fast query clock register 'tic' to pseudo-randomize + // the guess for which block within a superblock should + // be claimed. If not available then a search occurs. - while ( !search_done ) { - bool success = false; - unsigned prev_val = 0; + const uint32_t block_id_hint = block_count_mask & + (uint32_t)( Kokkos::Impl::clock_tic() +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) + // Spread out potentially concurrent access + // by threads within a warp or thread block. + + ( threadIdx.x + blockDim.x * threadIdx.y ) +#endif + ); - Kokkos::tie( success, prev_val ) = m_sb_blocks.set_any_in_word( pos, word_mask ); + int32_t sb_id = -1 ; - if ( !success ) { - if ( ++pages_searched >= pages_per_sb ) { - // Searched all the pages in this superblock. Look for a new superblock. - // - // The previous method tried limiting the number of pages searched, but - // that caused a huge performance issue in CUDA where the outer loop - // executed massive numbers of times. Threads weren't able to find a - // free location when the superblock wasn't full and were able to execute - // the outer loop many times before the superblock was switched for a new - // one. Switching to an exhaustive search eliminated this possiblity and - // didn't slow anything down for the tests. - need_new_sb = true; - search_done = true; - } - else { - // Move to the next page making sure the new search position - // doesn't go past this superblock's bits. - pos += BLOCKS_PER_PAGE; - pos = ( pos < pos_base + blocks_per_sb ) ? pos : pos_base; - } - } - else { - // Reserved a memory location to allocate. - memory_fence(); + volatile uint32_t * sb_state_array = 0 ; - search_done = true; - allocation_done = true; + while ( attempt_limit ) { - uint32_t lg_block_size = block_size_id + LG_MIN_BLOCK_SIZE; + int32_t hint_sb_id = -1 ; - p = m_data + ( size_t(sb_id) << m_lg_sb_size ) + - ( ( pos - pos_base ) << lg_block_size ); + if ( sb_id < 0 ) { - uint32_t used_bits = Kokkos::Impl::bit_count( prev_val ); + sb_id = hint_sb_id = int32_t( *hint_sb_id_ptr ); - if ( used_bits == 0 ) { - // This page was empty. Decrement the number of empty pages for - // the superblock. - atomic_decrement( &m_sb_header(sb_id).m_empty_pages ); - } - else if ( used_bits == m_blocksize_info[block_size_id].m_page_full_level - 1 ) - { - // This page is full. Increment the number of full pages for - // the superblock. - uint32_t full_pages = atomic_fetch_add( &m_sb_header(sb_id).m_full_pages, 1 ); - - // This allocation made the superblock full, so a new one needs to be found. - if ( full_pages == m_blocksize_info[block_size_id].m_sb_full_level - 1 ) { - need_new_sb = true; - } - } - } - } - } - else { - // This is the first allocation for this block size. A superblock needs - // to be set as the active one. If this point is reached any other time, - // it is an error. - need_new_sb = true; - } - - if ( need_new_sb ) { - uint32_t new_sb_id = find_superblock( block_size_id, sb_id ); - - if ( new_sb_id == sb_id ) { - allocation_done = true; -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO - printf( "** No superblocks available. **\n" ); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - fflush( stdout ); -#endif -#endif + sb_state_array = m_sb_state_array + ( sb_id * m_sb_state_size ); } - else { - sb_id = new_sb_id; - } - } - } - } -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO - else { - printf( "** Requested allocation size (%zu) larger than superblock size (%lu). **\n", - alloc_size, m_sb_size ); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - fflush( stdout ); -#endif - } -#endif - return p; - } + // Require: + // 0 <= sb_id + // sb_state_array == m_sb_state_array + m_sb_state_size * sb_id - /// \brief Release allocated memory back to the pool. - /// \param alloc_ptr Pointer to chunk of memory previously allocated by - /// the allocator. - /// \param alloc_size Size of the allocated memory in number of bytes. - KOKKOS_FUNCTION - void deallocate( void * alloc_ptr, size_t alloc_size ) const - { - char * ap = static_cast<char *>( alloc_ptr ); - - // Only deallocate memory controlled by this pool. - if ( ap >= m_data && ap + alloc_size <= m_data + m_data_size ) { - // Get the superblock for the address. This can be calculated by math on - // the address since the superblocks are stored contiguously in one memory - // chunk. - uint32_t sb_id = ( ap - m_data ) >> m_lg_sb_size; - - // Get the starting position for this superblock's bits in the bitset. - uint32_t pos_base = sb_id << m_lg_max_sb_blocks; - - // Get the relative position for this memory location's bit in the bitset. - uint32_t offset = ( ap - m_data ) - ( size_t(sb_id) << m_lg_sb_size ); - uint32_t lg_block_size = m_sb_header(sb_id).m_lg_block_size; - uint32_t block_size_id = lg_block_size - LG_MIN_BLOCK_SIZE; - uint32_t pos_rel = offset >> lg_block_size; + if ( block_state == ( state_header_mask & *sb_state_array ) ) { - bool success = false; - unsigned prev_val = 0; + // This superblock state is assigned to this block size. + // Try to claim a bit. - memory_fence(); + const Kokkos::pair<int,int> result = + CB::acquire_bounded_lg2( sb_state_array + , block_count_lg2 + , block_id_hint + , block_state + ); - Kokkos::tie( success, prev_val ) = m_sb_blocks.fetch_word_reset( pos_base + pos_rel ); + // If result.first < 0 then failed to acquire + // due to either full or buffer was wrong state. + // Could be wrong state if a deallocation raced the + // superblock to empty before the acquire could succeed. - // If the memory location was previously deallocated, do nothing. - if ( success ) { - uint32_t page_fill_level = Kokkos::Impl::bit_count( prev_val ); + if ( 0 <= result.first ) { // acquired a bit - if ( page_fill_level == 1 ) { - // This page is now empty. Increment the number of empty pages for the - // superblock. - uint32_t empty_pages = atomic_fetch_add( &m_sb_header(sb_id).m_empty_pages, 1 ); + // Set the allocated block pointer - if ( !volatile_load( &m_sb_header(sb_id).m_is_active ) && - empty_pages == m_blocksize_info[block_size_id].m_pages_per_sb - 1 ) - { - // This deallocation caused the superblock to be empty. Change the - // superblock category from partially full to empty. - unsigned pos = block_size_id * m_ceil_num_sb + sb_id; + p = ((char*)( m_sb_state_array + m_data_offset )) + + ( uint32_t(sb_id) << m_sb_size_lg2 ) // superblock memory + + ( result.first << block_size_lg2 ); // block memory - if ( m_partfull_sb.reset( pos ) ) { - // Reset the empty pages and block size for the superblock. - volatile_store( &m_sb_header(sb_id).m_empty_pages, uint32_t(0) ); - volatile_store( &m_sb_header(sb_id).m_lg_block_size, uint32_t(0) ); + break ; // Success + } - store_fence(); +// printf(" acquire block_count_lg2(%d) block_state(0x%x) sb_id(%d) result(%d,%d)\n" , block_count_lg2 , block_state , sb_id , result.first , result.second ); - m_empty_sb.set( sb_id ); - } - } - } - else if ( page_fill_level == m_blocksize_info[block_size_id].m_page_full_level ) { - // This page is no longer full. Decrement the number of full pages for - // the superblock. - uint32_t full_pages = atomic_fetch_sub( &m_sb_header(sb_id).m_full_pages, 1 ); - - if ( !volatile_load( &m_sb_header(sb_id).m_is_active ) && - full_pages == m_blocksize_info[block_size_id].m_sb_full_level ) - { - // This deallocation caused the number of full pages to decrease below - // the full threshold. Change the superblock category from full to - // partially full. - unsigned pos = block_size_id * m_ceil_num_sb + sb_id; - m_partfull_sb.set( pos ); } - } - } - } -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR - else { - printf( "\n** MemoryPool::deallocate() ADDRESS_OUT_OF_RANGE(0x%llx) **\n", - reinterpret_cast<uint64_t>( alloc_ptr ) ); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - fflush( stdout ); -#endif - } -#endif - } + //------------------------------------------------------------------ + // Arrive here if failed to acquire a block. + // Must find a new superblock. - /// \brief Tests if the memory pool has no more memory available to allocate. - KOKKOS_INLINE_FUNCTION - bool is_empty() const - { - // The allocator is empty if all superblocks are full. A superblock is - // full if it has >= 80% of its pages allocated. + // Start searching at designated index for this block size. + // Look for a partially full superblock of this block size. + // Look for an empty superblock just in case cannot find partfull. - // Look at all the superblocks. If one is not full, then the allocator - // isn't empty. - for ( size_t i = 0; i < m_num_sb; ++i ) { - uint32_t lg_block_size = m_sb_header(i).m_lg_block_size; + sb_id = -1 ; - // A superblock only has a block size of 0 when it is empty. - if ( lg_block_size == 0 ) return false; + int32_t sb_id_empty = -1 ; - uint32_t block_size_id = lg_block_size - LG_MIN_BLOCK_SIZE; - uint32_t full_pages = volatile_load( &m_sb_header(i).m_full_pages ); + sb_state_array = m_sb_state_array + sb_id_begin * m_sb_state_size ; - if ( full_pages < m_blocksize_info[block_size_id].m_sb_full_level ) return false; - } - - // All the superblocks were full. The allocator is empty. - return true; - } + for ( int32_t i = 0 , id = sb_id_begin ; i < m_sb_count ; ++i ) { - // The following functions are used for debugging. - void print_status() const - { - printf( "\n" ); + // Query state of the candidate superblock. + // Note that the state may change at any moment + // as concurrent allocations and deallocations occur. + + const uint32_t state = *sb_state_array ; + const uint32_t used = state & state_used_mask ; -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO - typename SBHeaderView::HostMirror host_sb_header = create_mirror_view( m_sb_header ); - deep_copy( host_sb_header, m_sb_header ); + if ( block_state == ( state & state_header_mask ) ) { - UInt32View num_allocated_blocks( "Allocated Blocks", m_num_sb ); + // Superblock is assigned to this block size - // Count the number of allocated blocks per superblock. - { - MempoolImpl::count_allocated_blocks< UInt32View, SBHeaderView, MempoolBitset > - mch( m_num_sb, num_allocated_blocks, m_sb_header, - m_sb_blocks, m_sb_size, m_lg_max_sb_blocks ); - } + if ( used < block_count ) { - typename UInt32View::HostMirror host_num_allocated_blocks = - create_mirror_view( num_allocated_blocks ); - deep_copy( host_num_allocated_blocks, num_allocated_blocks ); - - // Print header info of all superblocks. - printf( "SB_ID SIZE ACTIVE EMPTY_PAGES FULL_PAGES USED_BLOCKS\n" ); - for ( size_t i = 0; i < m_num_sb; ++i ) { - printf( "%5zu %4u %6d %11u %10u %10u\n", i, - host_sb_header(i).m_lg_block_size, host_sb_header(i).m_is_active, - host_sb_header(i).m_empty_pages, host_sb_header(i).m_full_pages, - host_num_allocated_blocks(i) ); - } + // There is room to allocate one block - printf( "\n" ); -#endif + sb_id = id ; - UInt32View page_histogram( "Page Histogram", 33 ); + if ( used + 1 < block_count ) { - // Get a View version of the blocksize info. - typedef View< BlockSizeHeader *, device_type > BSHeaderView; - BSHeaderView blocksize_info( "BlockSize Headers", MAX_BLOCK_SIZES ); + // There is room to allocate more than one block - Kokkos::Impl::DeepCopy< backend_memory_space, Kokkos::HostSpace > - dc( blocksize_info.ptr_on_device(), m_blocksize_info, - sizeof(BlockSizeHeader) * m_num_block_size ); + Kokkos::atomic_compare_exchange + ( hint_sb_id_ptr , uint32_t(hint_sb_id) , uint32_t(sb_id) ); + } - Kokkos::pair< double, uint32_t > result = Kokkos::pair< double, uint32_t >( 0.0, 0 ); + break ; + } + } + else if ( ( used == 0 ) && ( sb_id_empty == -1 ) ) { - // Create the page histogram. - { - MempoolImpl::create_histogram< UInt32View, BSHeaderView, SBHeaderView, MempoolBitset > - mch( 0, m_num_sb, page_histogram, blocksize_info, m_sb_header, m_sb_blocks, - m_lg_max_sb_blocks, LG_MIN_BLOCK_SIZE, BLOCKS_PER_PAGE, result ); - } + // Superblock is not assigned to this block size + // and is the first empty superblock encountered. + // Save this id to use if a partfull superblock is not found. - typename UInt32View::HostMirror host_page_histogram = create_mirror_view( page_histogram ); - deep_copy( host_page_histogram, page_histogram ); + sb_id_empty = id ; + } - // Find the used and total pages and blocks. - uint32_t used_pages = 0; - uint32_t used_blocks = 0; - for ( uint32_t i = 1; i < 33; ++i ) { - used_pages += host_page_histogram(i); - used_blocks += i * host_page_histogram(i); - } - uint32_t total_pages = used_pages + host_page_histogram(0); + if ( ++id < m_sb_count ) { + sb_state_array += m_sb_state_size ; + } + else { + id = 0 ; + sb_state_array = m_sb_state_array ; + } + } - unsigned num_empty_sb = m_empty_sb.count(); - unsigned num_non_empty_sb = m_num_sb - num_empty_sb; - unsigned num_partfull_sb = m_partfull_sb.count(); +// printf(" search m_sb_count(%d) sb_id(%d) sb_id_empty(%d)\n" , m_sb_count , sb_id , sb_id_empty ); - uint32_t total_blocks = result.second; - double ave_sb_full = num_non_empty_sb == 0 ? 0.0 : result.first / num_non_empty_sb; - double percent_used_sb = double( m_num_sb - num_empty_sb ) / m_num_sb; - double percent_used_pages = total_pages == 0 ? 0.0 : double(used_pages) / total_pages; - double percent_used_blocks = total_blocks == 0 ? 0.0 : double(used_blocks) / total_blocks; + if ( sb_id < 0 ) { - // Count active superblocks. - typename UInt32View::HostMirror host_active = create_mirror_view( m_active ); - deep_copy( host_active, m_active ); + // Did not find a partfull superblock for this block size. - unsigned num_active_sb = 0; - for ( size_t i = 0; i < m_num_block_size; ++i ) { - num_active_sb += host_active(i) != INVALID_SUPERBLOCK; - } + if ( 0 <= sb_id_empty ) { -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_ACTIVE_SUPERBLOCKS - // Print active superblocks. - printf( "BS_ID SB_ID\n" ); - for ( size_t i = 0; i < m_num_block_size; ++i ) { - uint32_t sb_id = host_active(i); + // Found first empty superblock following designated superblock + // Attempt to claim it for this block size. + // If the claim fails assume that another thread claimed it + // for this block size and try to use it anyway, + // but do not update hint. - if ( sb_id == INVALID_SUPERBLOCK ) { - printf( "%5zu I\n", i ); - } - else if ( sb_id == SUPERBLOCK_LOCK ) { - printf( "%5zu L\n", i ); - } - else { - printf( "%5zu %7u\n", i, sb_id ); - } - } - printf( "\n" ); - fflush( stdout ); -#endif + sb_id = sb_id_empty ; -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO - // Print the summary page histogram. - printf( "USED_BLOCKS PAGE_COUNT\n" ); - for ( uint32_t i = 0; i < 33; ++i ) { - printf( "%10u %10u\n", i, host_page_histogram[i] ); - } - printf( "\n" ); -#endif + sb_state_array = m_sb_state_array + ( sb_id * m_sb_state_size ); -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO - // Print the page histogram for a few individual superblocks. -// const uint32_t num_sb_id = 2; -// uint32_t sb_id[num_sb_id] = { 0, 10 }; - const uint32_t num_sb_id = 1; - uint32_t sb_id[num_sb_id] = { 0 }; + // If successfully changed assignment of empty superblock 'sb_id' + // to this block_size then update the hint. - for ( uint32_t i = 0; i < num_sb_id; ++i ) { - deep_copy( page_histogram, 0 ); + const uint32_t state_empty = state_header_mask & *sb_state_array ; - { - MempoolImpl::create_histogram< UInt32View, BSHeaderView, SBHeaderView, MempoolBitset > - mch( sb_id[i], sb_id[i] + 1, page_histogram, blocksize_info, m_sb_header, - m_sb_blocks, m_lg_max_sb_blocks, LG_MIN_BLOCK_SIZE, BLOCKS_PER_PAGE, result ); - } + if ( state_empty == + Kokkos::atomic_compare_exchange + (sb_state_array,state_empty,block_state) ) { - deep_copy( host_page_histogram, page_histogram ); + // If this thread claimed the block then update the hint - printf( "SB_ID USED_BLOCKS PAGE_COUNT\n" ); - for ( uint32_t j = 0; j < 33; ++j ) { - printf( "%5u %10u %10u\n", sb_id[i], j, host_page_histogram[j] ); - } - printf( "\n" ); - } - -/* - // Print the blocks used for each page of a few individual superblocks. - for ( uint32_t i = 0; i < num_sb_id; ++i ) { - uint32_t lg_block_size = host_sb_header(sb_id[i]).m_lg_block_size; - - if ( lg_block_size != 0 ) { - printf( "SB_ID BLOCK ID USED_BLOCKS\n" ); - - uint32_t block_size_id = lg_block_size - LG_MIN_BLOCK_SIZE; - uint32_t pages_per_sb = m_blocksize_info[block_size_id].m_pages_per_sb; - - for ( uint32_t j = 0; j < pages_per_sb; ++j ) { - unsigned start_pos = ( sb_id[i] << m_lg_max_sb_blocks ) + j * BLOCKS_PER_PAGE; - unsigned end_pos = start_pos + BLOCKS_PER_PAGE; - uint32_t num_allocated_blocks = 0; - - for ( unsigned k = start_pos; k < end_pos; ++k ) { - num_allocated_blocks += m_sb_blocks.test( k ); + Kokkos::atomic_compare_exchange + ( hint_sb_id_ptr , uint32_t(hint_sb_id) , uint32_t(sb_id) ); + } + } + else { + // Did not find a potentially usable superblock + --attempt_limit ; + } } + } // end allocation attempt loop - printf( "%5u %8u %11u\n", sb_id[i], j, num_allocated_blocks ); - } - - printf( "\n" ); + //-------------------------------------------------------------------- + } + else { + Kokkos::abort("Kokkos MemoryPool allocation request exceeded specified maximum allocation size"); } - } -*/ -#endif - - printf( " Used blocks: %10u / %10u = %10.6lf\n", used_blocks, total_blocks, - percent_used_blocks ); - printf( " Used pages: %10u / %10u = %10.6lf\n", used_pages, total_pages, - percent_used_pages ); - printf( " Used SB: %10zu / %10zu = %10.6lf\n", m_num_sb - num_empty_sb, m_num_sb, - percent_used_sb ); - printf( " Active SB: %10u\n", num_active_sb ); - printf( " Empty SB: %10u\n", num_empty_sb ); - printf( " Partfull SB: %10u\n", num_partfull_sb ); - printf( " Full SB: %10lu\n", - m_num_sb - num_active_sb - num_empty_sb - num_partfull_sb ); - printf( "Ave. SB Full %%: %10.6lf\n", ave_sb_full ); - printf( "\n" ); - fflush( stdout ); - -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - fflush( stdout ); -#endif - } - - KOKKOS_INLINE_FUNCTION - size_t get_min_block_size() const { return MIN_BLOCK_SIZE; } + return p ; + } + // end allocate + //-------------------------------------------------------------------------- + + /**\brief Return an allocated block of memory to the pool. + * + * Requires: p is return value from allocate( alloc_size ); + * + * For now the alloc_size is ignored. + */ KOKKOS_INLINE_FUNCTION - size_t get_mem_size() const { return m_data_size; } - -private: - /// \brief Returns the index into the active array for the given size. - /// - /// Computes log2 of the largest power of two >= the given size - /// ( ie ceil( log2(size) ) ) shifted by LG_MIN_BLOCK_SIZE. - KOKKOS_FORCEINLINE_FUNCTION - int get_block_size_index( const size_t size ) const - { - // We know the size fits in a 32 bit unsigned because the size of a - // superblock is limited to 2^31, so casting to an unsigned is safe. - - // Find the most significant nonzero bit. - uint32_t first_nonzero_bit = - Kokkos::Impl::bit_scan_reverse( static_cast<unsigned>( size ) ); - - // If size is an integral power of 2, ceil( log2(size) ) is equal to the - // most significant nonzero bit. Otherwise, you need to add 1. Since the - // minimum block size is MIN_BLOCK_SIZE, make sure ceil( log2(size) ) is at - // least LG_MIN_BLOCK_SIZE. - uint32_t lg2_size = first_nonzero_bit + !Kokkos::Impl::is_integral_power_of_two( size ); - lg2_size = lg2_size > LG_MIN_BLOCK_SIZE ? lg2_size : LG_MIN_BLOCK_SIZE; - - // Return ceil( log2(size) ) shifted so that the value for MIN_BLOCK_SIZE - // is 0. - return lg2_size - LG_MIN_BLOCK_SIZE; - } - - /// \brief Finds a superblock with free space to become a new active superblock. - /// - /// If this function is called, the current active superblock needs to be replaced - /// because it is full. Initially, only the thread that sets the active superblock - /// to full calls this function. Other threads can still allocate from the "full" - /// active superblock because a full superblock still has locations available. If - /// a thread tries to allocate from the active superblock when it has no free - /// locations, then that thread will call this function, too, and spin on a lock - /// waiting until the active superblock has been replaced. - KOKKOS_FUNCTION - uint32_t find_superblock( int block_size_id, uint32_t old_sb ) const - { - // Try to grab the lock on the head. - uint32_t lock_sb = - Kokkos::atomic_compare_exchange( &m_active(block_size_id), old_sb, SUPERBLOCK_LOCK ); - - load_fence(); - - // Initialize the new superblock to be the previous one so the previous - // superblock is returned if a new superblock can't be found. - uint32_t new_sb = lock_sb; + void deallocate( void * p , size_t /* alloc_size */ ) const noexcept + { + // Determine which superblock and block + const ptrdiff_t d = + ((char*)p) - ((char*)( m_sb_state_array + m_data_offset )); - if ( lock_sb == old_sb ) { - // This thread has the lock. + // Verify contained within the memory pool's superblocks: + const int ok_contains = + ( 0 <= d ) && ( size_t(d) < ( size_t(m_sb_count) << m_sb_size_lg2 ) ); - // 1. Look for a partially filled superblock that is of the right block - // size. + int ok_block_aligned = 0 ; + int ok_dealloc_once = 0 ; - size_t max_tries = m_ceil_num_sb >> LG_BLOCKS_PER_PAGE; - size_t tries = 0; - bool search_done = false; + if ( ok_contains ) { - // Set the starting search position to the beginning of this block - // size's bitset. - unsigned pos = block_size_id * m_ceil_num_sb; + const int sb_id = d >> m_sb_size_lg2 ; - while ( !search_done ) { - bool success = false; - unsigned prev_val = 0; + // State array for the superblock. + volatile uint32_t * const sb_state_array = + m_sb_state_array + ( sb_id * m_sb_state_size ); - Kokkos::tie( success, prev_val ) = m_partfull_sb.reset_any_in_word( pos ); + const uint32_t block_state = (*sb_state_array) & state_header_mask ; + const uint32_t block_size_lg2 = + m_sb_size_lg2 - ( block_state >> state_shift ); - if ( !success ) { - if ( ++tries >= max_tries ) { - // Exceeded number of words for this block size's bitset. - search_done = true; - } - else { - pos += BLOCKS_PER_PAGE; - } - } - else { - // Found a superblock. - - // It is possible that the newly found superblock is the same as the - // old superblock. In this case putting the old value back in yields - // correct behavior. This could happen as follows. This thread - // grabs the lock and transitions the superblock to the full state. - // Before it searches for a new superblock, other threads perform - // enough deallocations to transition the superblock to the partially - // full state. This thread then searches for a partially full - // superblock and finds the one it removed. There's potential for - // this to cause a performance issue if the same superblock keeps - // being removed and added due to the right mix and ordering of - // allocations and deallocations. - search_done = true; - new_sb = pos - block_size_id * m_ceil_num_sb; - - // Set the head status for the superblock. - volatile_store( &m_sb_header(new_sb).m_is_active, uint32_t(true) ); - - // If there was a previous active superblock, mark it as not active. - // It is now in the full category and as such isn't tracked. - if ( lock_sb != INVALID_SUPERBLOCK ) { - volatile_store( &m_sb_header(lock_sb).m_is_active, uint32_t(false) ); - } + ok_block_aligned = 0 == ( d & ( ( 1 << block_size_lg2 ) - 1 ) ); - store_fence(); - } - } + if ( ok_block_aligned ) { - // 2. Look for an empty superblock. - if ( new_sb == lock_sb ) { - tries = 0; - search_done = false; + // Map address to block's bit + // mask into superblock and then shift down for block index - // Set the starting search position to the beginning of this block - // size's bitset. - pos = 0; + const uint32_t bit = + ( d & ( ptrdiff_t( 1 << m_sb_size_lg2 ) - 1 ) ) >> block_size_lg2 ; - while ( !search_done ) { - bool success = false; - unsigned prev_val = 0; + const int result = + CB::release( sb_state_array , bit , block_state ); - Kokkos::tie( success, prev_val ) = m_empty_sb.reset_any_in_word( pos ); + ok_dealloc_once = 0 <= result ; - if ( !success ) { - if ( ++tries >= max_tries ) { - // Exceeded number of words for this block size's bitset. - search_done = true; - } - else { - pos += BLOCKS_PER_PAGE; - } - } - else { - // Found a superblock. - - // It is possible that the newly found superblock is the same as - // the old superblock. In this case putting the old value back in - // yields correct behavior. This could happen as follows. This - // thread grabs the lock and transitions the superblock to the full - // state. Before it searches for a new superblock, other threads - // perform enough deallocations to transition the superblock to the - // partially full state and then the empty state. This thread then - // searches for a partially full superblock and none exist. This - // thread then searches for an empty superblock and finds the one - // it removed. The likelihood of this happening is so remote that - // the potential for this to cause a performance issue is - // infinitesimal. - search_done = true; - new_sb = pos; - - // Set the empty pages, block size, and head status for the - // superblock. - volatile_store( &m_sb_header(new_sb).m_empty_pages, - m_blocksize_info[block_size_id].m_pages_per_sb ); - volatile_store( &m_sb_header(new_sb).m_lg_block_size, - block_size_id + LG_MIN_BLOCK_SIZE ); - volatile_store( &m_sb_header(new_sb).m_is_active, uint32_t(true) ); - - // If there was a previous active superblock, mark it as not active. - // It is now in the full category and as such isn't tracked. - if ( lock_sb != INVALID_SUPERBLOCK ) { - volatile_store( &m_sb_header(lock_sb).m_is_active, uint32_t(false) ); - } +// printf(" deallocate from sb_id(%d) result(%d) bit(%d) state(0x%x)\n" +// , sb_id +// , result +// , uint32_t(d >> block_size_lg2) +// , *sb_state_array ); - store_fence(); - } } } - // Write the new active superblock to release the lock. - atomic_exchange( &m_active(block_size_id), new_sb ); - } - else { - // Either another thread has the lock and is switching the active - // superblock for this block size or another thread has already changed - // the active superblock since this thread read its value. Keep - // atomically reading the active superblock until it isn't locked to get - // the new active superblock. - do { - new_sb = atomic_fetch_or( &m_active(block_size_id), uint32_t(0) ); - } while ( new_sb == SUPERBLOCK_LOCK ); - - load_fence(); - - // Assertions: - // 1. An invalid superblock should never be found here. - // 2. If the new superblock is the same as the previous superblock, the - // allocator is empty. -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR - if ( new_sb == INVALID_SUPERBLOCK ) { - printf( "\n** MemoryPool::find_superblock() FOUND_INACTIVE_SUPERBLOCK **\n" ); -#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - fflush( stdout ); + if ( ! ok_contains || ! ok_block_aligned || ! ok_dealloc_once ) { +#if 0 + printf("Kokkos MemoryPool deallocate(0x%lx) contains(%d) block_aligned(%d) dealloc_once(%d)\n",(uintptr_t)p,ok_contains,ok_block_aligned,ok_dealloc_once); #endif - Kokkos::abort( "" ); + Kokkos::abort("Kokkos MemoryPool::deallocate given erroneous pointer"); } -#endif } - - return new_sb; - } - - /// Returns 64 bits from a clock register. - KOKKOS_FORCEINLINE_FUNCTION - uint64_t get_clock_register(void) const - { -#if defined( __CUDA_ARCH__ ) - // Return value of 64-bit hi-res clock register. - return clock64(); -#elif defined( __i386__ ) || defined( __x86_64 ) - // Return value of 64-bit hi-res clock register. - unsigned a = 0, d = 0; - - __asm__ volatile( "rdtsc" : "=a" (a), "=d" (d) ); - - return ( (uint64_t) a ) | ( ( (uint64_t) d ) << 32 ); -#elif defined( __powerpc ) || defined( __powerpc__ ) || defined( __powerpc64__ ) || \ - defined( __POWERPC__ ) || defined( __ppc__ ) || defined( __ppc64__ ) - unsigned int cycles = 0; - - asm volatile( "mftb %0" : "=r" (cycles) ); - - return (uint64_t) cycles; -#else - const uint64_t ticks = - std::chrono::high_resolution_clock::now().time_since_epoch().count(); - - return ticks; -#endif - } + // end deallocate + //-------------------------------------------------------------------------- }; -} // namespace Experimental -} // namespace Kokkos - -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINTERR -#undef KOKKOS_ENABLE_MEMPOOL_PRINTERR -#endif - -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO -#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INFO -#endif - -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO -#undef KOKKOS_ENABLE_MEMPOOL_PRINT_BLOCKSIZE_INFO -#endif - -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO -#undef KOKKOS_ENABLE_MEMPOOL_PRINT_SUPERBLOCK_INFO -#endif +} // namespace Kokkos -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO -#undef KOKKOS_ENABLE_MEMPOOL_PRINT_PAGE_INFO -#endif - -#ifdef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO -#undef KOKKOS_ENABLE_MEMPOOL_PRINT_INDIVIDUAL_PAGE_INFO -#endif +#endif /* #ifndef KOKKOS_MEMORYPOOL_HPP */ -#endif // KOKKOS_MEMORYPOOL_HPP diff --git a/lib/kokkos/core/src/Kokkos_NumericTraits.hpp b/lib/kokkos/core/src/Kokkos_NumericTraits.hpp new file mode 100644 index 0000000000000000000000000000000000000000..339571941d8824b77981b36ed32f9d3b131bad78 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_NumericTraits.hpp @@ -0,0 +1,217 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_NUMERICTRAITS_HPP +#define KOKKOS_NUMERICTRAITS_HPP + +#include<climits> +#include<cfloat> + +namespace Kokkos { + +template<class T> +struct reduction_identity; /*{ + KOKKOS_FORCEINLINE_FUNCTION constexpr static T sum() { return T(); } // 0 + KOKKOS_FORCEINLINE_FUNCTION constexpr static T prod() // 1 + { static_assert( false, "Missing specialization of Kokkos::reduction_identity for custom prod reduction type"); return T(); } + KOKKOS_FORCEINLINE_FUNCTION constexpr static T max() // minimum value + { static_assert( false, "Missing specialization of Kokkos::reduction_identity for custom max reduction type"); return T(); } + KOKKOS_FORCEINLINE_FUNCTION constexpr static T min() // maximum value + { static_assert( false, "Missing specialization of Kokkos::reduction_identity for custom min reduction type"); return T(); } + KOKKOS_FORCEINLINE_FUNCTION constexpr static T bor() // 0, only for integer type + { static_assert( false, "Missing specialization of Kokkos::reduction_identity for custom bor reduction type"); return T(); } + KOKKOS_FORCEINLINE_FUNCTION constexpr static T band() // !0, only for integer type + { static_assert( false, "Missing specialization of Kokkos::reduction_identity for custom band reduction type"); return T(); } + KOKKOS_FORCEINLINE_FUNCTION constexpr static T lor() // 0, only for integer type + { static_assert( false, "Missing specialization of Kokkos::reduction_identity for custom lor reduction type"); return T(); } + KOKKOS_FORCEINLINE_FUNCTION constexpr static T land() // !0, only for integer type + { static_assert( false, "Missing specialization of Kokkos::reduction_identity for custom land reduction type"); return T(); } +};*/ + +template<> +struct reduction_identity<signed char> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static signed char sum() {return static_cast<signed char>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static signed char prod() {return static_cast<signed char>(1);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static signed char max() {return SCHAR_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static signed char min() {return SCHAR_MAX;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static signed char bor() {return static_cast<signed char>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static signed char band() {return ~static_cast<signed char>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static signed char lor() {return static_cast<signed char>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static signed char land() {return static_cast<signed char>(1);} +}; + +template<> +struct reduction_identity<short> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static short sum() {return static_cast<short>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static short prod() {return static_cast<short>(1);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static short max() {return SHRT_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static short min() {return SHRT_MAX;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static short bor() {return static_cast<short>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static short band() {return ~static_cast<short>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static short lor() {return static_cast<short>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static short land() {return static_cast<short>(1);} +}; + +template<> +struct reduction_identity<int> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static int sum() {return static_cast<int>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static int prod() {return static_cast<int>(1);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static int max() {return INT_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static int min() {return INT_MAX;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static int bor() {return static_cast<int>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static int band() {return ~static_cast<int>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static int lor() {return static_cast<int>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static int land() {return static_cast<int>(1);} +}; + +template<> +struct reduction_identity<long> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static long sum() {return static_cast<long>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long prod() {return static_cast<long>(1);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long max() {return LLONG_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long min() {return LLONG_MAX;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long bor() {return static_cast<long>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long band() {return ~static_cast<long>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long lor() {return static_cast<long>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long land() {return static_cast<long>(1);} +}; + +template<> +struct reduction_identity<long long> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static long long sum() {return static_cast<long long>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long long prod() {return static_cast<long long>(1);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long long max() {return LLONG_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long long min() {return LLONG_MAX;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long long bor() {return static_cast<long long>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long long band() {return ~static_cast<long long>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long long lor() {return static_cast<long long>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long long land() {return static_cast<long long>(1);} +}; + +template<> +struct reduction_identity<unsigned char> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned char sum() {return static_cast<unsigned char>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned char prod() {return static_cast<unsigned char>(1);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned char max() {return static_cast<unsigned char>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned char min() {return UCHAR_MAX;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned char bor() {return static_cast<unsigned char>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned char band() {return ~static_cast<unsigned char>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned char lor() {return static_cast<unsigned char>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned char land() {return static_cast<unsigned char>(1);} +}; + +template<> +struct reduction_identity<unsigned short> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned short sum() {return static_cast<unsigned short>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned short prod() {return static_cast<unsigned short>(1);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned short max() {return static_cast<unsigned short>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned short min() {return USHRT_MAX;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned short bor() {return static_cast<unsigned short>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned short band() {return ~static_cast<unsigned short>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned short lor() {return static_cast<unsigned short>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned short land() {return static_cast<unsigned short>(1);} +}; + +template<> +struct reduction_identity<unsigned int> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned int sum() {return static_cast<unsigned int>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned int prod() {return static_cast<unsigned int>(1);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned int max() {return static_cast<unsigned int>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned int min() {return UINT_MAX;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned int bor() {return static_cast<unsigned int>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned int band() {return ~static_cast<unsigned int>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned int lor() {return static_cast<unsigned int>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned int land() {return static_cast<unsigned int>(1);} +}; + +template<> +struct reduction_identity<unsigned long> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long sum() {return static_cast<unsigned long>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long prod() {return static_cast<unsigned long>(1);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long max() {return static_cast<unsigned long>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long min() {return ULONG_MAX;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long bor() {return static_cast<unsigned long>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long band() {return ~static_cast<unsigned long>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long lor() {return static_cast<unsigned long>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long land() {return static_cast<unsigned long>(1);} +}; + +template<> +struct reduction_identity<unsigned long long> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long long sum() {return static_cast<unsigned long long>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long long prod() {return static_cast<unsigned long long>(1);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long long max() {return static_cast<unsigned long long>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long long min() {return ULLONG_MAX;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long long bor() {return static_cast<unsigned long long>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long long band() {return ~static_cast<unsigned long long>(0x0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long long lor() {return static_cast<unsigned long long>(0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static unsigned long long land() {return static_cast<unsigned long long>(1);} +}; + +template<> +struct reduction_identity<float> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static float sum() {return static_cast<float>(0.0f);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static float prod() {return static_cast<float>(1.0f);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static float max() {return FLT_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static float min() {return FLT_MAX;} +}; + +template<> +struct reduction_identity<double> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static double sum() {return static_cast<double>(0.0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static double prod() {return static_cast<double>(1.0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static double max() {return DBL_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static double min() {return DBL_MAX;} +}; + +template<> +struct reduction_identity<long double> { + KOKKOS_FORCEINLINE_FUNCTION constexpr static long double sum() {return static_cast<long double>(0.0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long double prod() {return static_cast<long double>(1.0);} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long double max() {return LDBL_MIN;} + KOKKOS_FORCEINLINE_FUNCTION constexpr static long double min() {return LDBL_MAX;} +}; + +} + +#endif diff --git a/lib/kokkos/core/src/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/Kokkos_OpenMP.hpp index c0c43b92f4d72f4fb6ae5ba95dc5270887f1cd32..3e11621ce6a30700bd1710a1a013b3c2586296c6 100644 --- a/lib/kokkos/core/src/Kokkos_OpenMP.hpp +++ b/lib/kokkos/core/src/Kokkos_OpenMP.hpp @@ -44,22 +44,23 @@ #ifndef KOKKOS_OPENMP_HPP #define KOKKOS_OPENMP_HPP -#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_OPENMP) -#if defined( KOKKOS_ENABLE_OPENMP) && !defined(_OPENMP) +#if !defined(_OPENMP) #error "You enabled Kokkos OpenMP support without enabling OpenMP in the compiler!" #endif -#if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP ) - -#include <omp.h> +#include <Kokkos_Core_fwd.hpp> #include <cstddef> #include <iosfwd> #include <Kokkos_HostSpace.hpp> + #ifdef KOKKOS_ENABLE_HBWSPACE #include <Kokkos_HBWSpace.hpp> #endif + #include <Kokkos_ScratchSpace.hpp> #include <Kokkos_Parallel.hpp> #include <Kokkos_TaskScheduler.hpp> @@ -79,26 +80,26 @@ public: //@{ //! Tag this class as a kokkos execution space - typedef OpenMP execution_space ; + using execution_space = OpenMP; #ifdef KOKKOS_ENABLE_HBWSPACE - typedef Experimental::HBWSpace memory_space ; + using memory_space = Experimental::HBWSpace; #else - typedef HostSpace memory_space ; + using memory_space = HostSpace; #endif //! This execution space preferred device_type - typedef Kokkos::Device<execution_space,memory_space> device_type; + using device_type = Kokkos::Device<execution_space,memory_space>; - typedef LayoutRight array_layout ; - typedef memory_space::size_type size_type ; + using array_layout = LayoutRight; + using size_type = memory_space::size_type; - typedef ScratchMemorySpace< OpenMP > scratch_memory_space ; + using scratch_memory_space = ScratchMemorySpace< OpenMP >; //@} //------------------------------------ //! \name Functions that all Kokkos execution spaces must implement. //@{ - inline static bool in_parallel() { return omp_in_parallel(); } + inline static bool in_parallel(); /** \brief Set the device in a "sleep" state. A noop for OpenMP. */ static bool sleep(); @@ -153,6 +154,8 @@ public: KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); } + + static const char* name(); }; } // namespace Kokkos @@ -164,7 +167,7 @@ namespace Kokkos { namespace Impl { template<> -struct MemorySpaceAccess +struct MemorySpaceAccess < Kokkos::OpenMP::memory_space , Kokkos::OpenMP::scratch_memory_space > @@ -191,7 +194,7 @@ struct VerifyExecutionCanAccessMemorySpace /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ -#include <OpenMP/Kokkos_OpenMPexec.hpp> +#include <OpenMP/Kokkos_OpenMP_Exec.hpp> #include <OpenMP/Kokkos_OpenMP_Parallel.hpp> #include <OpenMP/Kokkos_OpenMP_Task.hpp> @@ -201,4 +204,3 @@ struct VerifyExecutionCanAccessMemorySpace #endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP ) */ #endif /* #ifndef KOKKOS_OPENMP_HPP */ - diff --git a/lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp b/lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4f50de032532c2e80c759927756e0cea70c7fb30 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp @@ -0,0 +1,186 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMPTARGET_HPP +#define KOKKOS_OPENMPTARGET_HPP + +#include <Kokkos_Core_fwd.hpp> + +#if defined( KOKKOS_ENABLE_OPENMPTARGET ) && defined( _OPENMP ) + +#include <omp.h> + +#include <cstddef> +#include <iosfwd> +#include <Kokkos_OpenMPTargetSpace.hpp> +#include <Kokkos_ScratchSpace.hpp> +#include <Kokkos_Parallel.hpp> +#include <Kokkos_TaskPolicy.hpp> +#include <Kokkos_Layout.hpp> +#include <impl/Kokkos_Tags.hpp> + +#include <KokkosExp_MDRangePolicy.hpp> +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Experimental { +/// \class OpenMPTarget +/// \brief Kokkos device for multicore processors in the host memory space. +class OpenMPTarget { +public: + //------------------------------------ + //! \name Type declarations that all Kokkos devices must provide. + //@{ + + //! Tag this class as a kokkos execution space + typedef OpenMPTarget execution_space ; + typedef OpenMPTargetSpace memory_space ; + //! This execution space preferred device_type + typedef Kokkos::Device<execution_space,memory_space> device_type; + + typedef LayoutLeft array_layout ; + typedef memory_space::size_type size_type ; + + typedef ScratchMemorySpace< OpenMPTarget > scratch_memory_space ; + + //@} + //------------------------------------ + //! \name Functions that all Kokkos execution spaces must implement. + //@{ + + inline static bool in_parallel() { return omp_in_parallel(); } + + /** \brief Set the device in a "sleep" state. A noop for OpenMPTarget. */ + static bool sleep(); + + /** \brief Wake the device from the 'sleep' state. A noop for OpenMPTarget. */ + static bool wake(); + + /** \brief Wait until all dispatched functors complete. A noop for OpenMPTarget. */ + static void fence() {} + + /// \brief Print configuration information to the given output stream. + static void print_configuration( std::ostream & , const bool detail = false ); + + /// \brief Free any resources being consumed by the device. + static void finalize(); + + /** \brief Initialize the device. + * + * 1) If the hardware locality library is enabled and OpenMPTarget has not + * already bound threads then bind OpenMPTarget threads to maximize + * core utilization and group for memory hierarchy locality. + * + * 2) Allocate a HostThread for each OpenMPTarget thread to hold its + * topology and fan in/out data. + */ + static void initialize( unsigned thread_count = 0 , + unsigned use_numa_count = 0 , + unsigned use_cores_per_numa = 0 ); + + static int is_initialized(); + + /** \brief Return the maximum amount of concurrency. */ + static int concurrency(); + + //@} + //------------------------------------ + /** \brief This execution space has a topological thread pool which can be queried. + * + * All threads within a pool have a common memory space for which they are cache coherent. + * depth = 0 gives the number of threads in the whole pool. + * depth = 1 gives the number of threads in a NUMA region, typically sharing L3 cache. + * depth = 2 gives the number of threads at the finest granularity, typically sharing L1 cache. + */ + inline static int thread_pool_size( int depth = 0 ); + + /** \brief The rank of the executing thread in this thread pool */ + KOKKOS_INLINE_FUNCTION static int thread_pool_rank(); + + //------------------------------------ + + inline static unsigned max_hardware_threads() { return thread_pool_size(0); } + + KOKKOS_INLINE_FUNCTION static + unsigned hardware_thread_id() { return thread_pool_rank(); } + + static const char* name(); +private: + static bool m_is_initialized; +}; +} // namespace Experimental +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +template<> +struct VerifyExecutionCanAccessMemorySpace + < Kokkos::Experimental::OpenMPTarget::memory_space + , Kokkos::Experimental::OpenMPTarget::scratch_memory_space + > +{ + enum { value = true }; + inline static void verify( void ) { } + inline static void verify( const void * ) { } +}; + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +#include <OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp> +#include <OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp> +#include <OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp> + +/*--------------------------------------------------------------------------*/ + +#endif /* #if defined( KOKKOS_ENABLE_OPENMPTARGET ) && defined( _OPENMP ) */ +#endif /* #ifndef KOKKOS_OPENMPTARGET_HPP */ + + diff --git a/lib/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp b/lib/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp new file mode 100644 index 0000000000000000000000000000000000000000..710a86e2d834d16e773253348b2bb6e480cf3325 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp @@ -0,0 +1,265 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMPTARGETSPACE_HPP +#define KOKKOS_OPENMPTARGETSPACE_HPP + +#include <cstring> +#include <string> +#include <iosfwd> +#include <typeinfo> + +#include <Kokkos_Core_fwd.hpp> + +#ifdef KOKKOS_ENABLE_OPENMPTARGET + +#include <Kokkos_HostSpace.hpp> +#include <omp.h> +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +/// \brief Initialize lock array for arbitrary size atomics. +/// +/// Arbitrary atomics are implemented using a hash table of locks +/// where the hash value is derived from the address of the +/// object for which an atomic operation is performed. +/// This function initializes the locks to zero (unset). +//void init_lock_array_host_space(); + +/// \brief Aquire a lock for the address +/// +/// This function tries to aquire the lock for the hash value derived +/// from the provided ptr. If the lock is successfully aquired the +/// function returns true. Otherwise it returns false. +//bool lock_address_host_space(void* ptr); + +/// \brief Release lock for the address +/// +/// This function releases the lock for the hash value derived +/// from the provided ptr. This function should only be called +/// after previously successfully aquiring a lock with +/// lock_address. +//void unlock_address_host_space(void* ptr); + +} // namespace Impl +} // namespace Kokkos + +namespace Kokkos { +namespace Experimental { + +/// \class OpenMPTargetSpace +/// \brief Memory management for host memory. +/// +/// OpenMPTargetSpace is a memory space that governs host memory. "Host" +/// memory means the usual CPU-accessible memory. +class OpenMPTargetSpace { +public: + + //! Tag this class as a kokkos memory space + typedef OpenMPTargetSpace memory_space ; + typedef size_t size_type ; + + /// \typedef execution_space + /// \brief Default execution space for this memory space. + /// + /// Every memory space has a default execution space. This is + /// useful for things like initializing a View (which happens in + /// parallel using the View's default execution space). + typedef Kokkos::Experimental::OpenMPTarget execution_space ; + + //! This memory space preferred device_type + typedef Kokkos::Device<execution_space,memory_space> device_type; + + /*--------------------------------*/ + + /**\brief Default memory space instance */ + OpenMPTargetSpace(); + OpenMPTargetSpace( OpenMPTargetSpace && rhs ) = default ; + OpenMPTargetSpace( const OpenMPTargetSpace & rhs ) = default ; + OpenMPTargetSpace & operator = ( OpenMPTargetSpace && ) = default ; + OpenMPTargetSpace & operator = ( const OpenMPTargetSpace & ) = default ; + ~OpenMPTargetSpace() = default ; + + /**\brief Allocate untracked memory in the space */ + void * allocate( const size_t arg_alloc_size ) const ; + + /**\brief Deallocate untracked memory in the space */ + void deallocate( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) const ; + +private: + + friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void > ; +}; +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +class SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void > + : public SharedAllocationRecord< void , void > +{ +private: + + friend Kokkos::Experimental::OpenMPTargetSpace ; + + typedef SharedAllocationRecord< void , void > RecordBase ; + + SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + + static void deallocate( RecordBase * ); + + /**\brief Root record for tracked allocations from this OpenMPTargetSpace instance */ + static RecordBase s_root_record ; + + const Kokkos::Experimental::OpenMPTargetSpace m_space ; + +protected: + + ~SharedAllocationRecord(); + SharedAllocationRecord() = default ; + + SharedAllocationRecord( const Kokkos::Experimental::OpenMPTargetSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const RecordBase::function_type arg_dealloc = & deallocate + ); + +public: + + std::string get_label() const; + + KOKKOS_INLINE_FUNCTION static + SharedAllocationRecord * allocate( const Kokkos::Experimental::OpenMPTargetSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + ); + + /**\brief Allocate tracked memory in the space */ + static + void * allocate_tracked( const Kokkos::Experimental::OpenMPTargetSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size ); + + /**\brief Reallocate tracked memory in the space */ + static + void * reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ); + + /**\brief Deallocate tracked memory in the space */ + static + void deallocate_tracked( void * const arg_alloc_ptr ); + + + static SharedAllocationRecord * get_record( void * arg_alloc_ptr ); + + static void print_records( std::ostream & , const Kokkos::Experimental::OpenMPTargetSpace & , bool detail = false ); +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//TODO: implement all possible deep_copies +template<class ExecutionSpace> +struct DeepCopy<Kokkos::Experimental::OpenMPTargetSpace,Kokkos::Experimental::OpenMPTargetSpace,ExecutionSpace> { + DeepCopy( void * dst , const void * src , size_t n ) { + omp_target_memcpy( dst , const_cast<void*> (src) , n, 0, 0, omp_get_default_device(), omp_get_default_device()); + } + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + exec.fence(); + omp_target_memcpy( dst , const_cast<void*> (src) , n, 0, 0, omp_get_default_device(), omp_get_default_device()); + } +}; + + +template<class ExecutionSpace> +struct DeepCopy<Kokkos::Experimental::OpenMPTargetSpace,HostSpace,ExecutionSpace> { + DeepCopy( void * dst , const void * src , size_t n ) { + omp_target_memcpy( dst , const_cast<void*> (src) , n, 0, 0, omp_get_default_device(), omp_get_initial_device()); + } + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + exec.fence(); + omp_target_memcpy( dst , const_cast<void*> (src) , n, 0, 0, omp_get_default_device(), omp_get_initial_device()); + } +}; + +template<class ExecutionSpace> +struct DeepCopy<HostSpace,Kokkos::Experimental::OpenMPTargetSpace,ExecutionSpace> { + DeepCopy( void * dst , const void * src , size_t n ) { + omp_target_memcpy( dst , const_cast<void*> (src) , n, 0, 0, omp_get_initial_device(), omp_get_default_device()); + } + DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + exec.fence(); + omp_target_memcpy( dst , const_cast<void*> (src) , n, 0, 0, omp_get_initial_device(), omp_get_default_device()); + } +}; + + +template<> +struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::OpenMPTargetSpace > +{ + enum { value = false }; + inline static void verify( void ) { } + inline static void verify( const void * ) { } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif +#endif /* #define KOKKOS_OPENMPTARGETSPACE_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index 900dce19fe52b538228fbb2a82cb649f5313ec43..8ea5183e353dca7e2ccfc1616338549d2ab7c206 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -41,9 +41,12 @@ //@HEADER */ +#ifndef KOKKOS_PARALLEL_REDUCE_HPP +#define KOKKOS_PARALLEL_REDUCE_HPP -namespace Kokkos { +#include <Kokkos_NumericTraits.hpp> +namespace Kokkos { template<class T, class Enable = void> struct is_reducer_type { @@ -54,7 +57,7 @@ struct is_reducer_type { template<class T> struct is_reducer_type<T,typename std::enable_if< std::is_same<typename std::remove_cv<T>::type, - typename std::remove_cv<typename T::reducer_type>::type>::value + typename std::remove_cv<typename T::reducer>::type>::value >::type> { enum { value = 1 }; }; @@ -62,47 +65,25 @@ struct is_reducer_type<T,typename std::enable_if< namespace Experimental { -template<class Scalar,class Space = HostSpace> +template<class Scalar, class Space> struct Sum { public: //Required - typedef Sum reducer_type; - typedef Scalar value_type; + typedef Sum reducer; + typedef typename std::remove_cv<Scalar>::type value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - value_type init_value; - private: - result_view_type result; - - template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > - struct InitWrapper; - - template<class ValueType > - struct InitWrapper<ValueType,true> { - static ValueType value() { - return static_cast<value_type>(0); - } - }; - - template<class ValueType > - struct InitWrapper<ValueType,false> { - static ValueType value() { - return value_type(); - } - }; + value_type* value; public: - Sum(value_type& result_): - init_value(InitWrapper<value_type>::value()),result(&result_) {} - Sum(const result_view_type& result_): - init_value(InitWrapper<value_type>::value()),result(result_) {} - Sum(value_type& result_, const value_type& init_value_): - init_value(init_value_),result(&result_) {} - Sum(const result_view_type& result_, const value_type& init_value_): - init_value(init_value_),result(result_) {} + KOKKOS_INLINE_FUNCTION + Sum(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + Sum(const result_view_type& value_): value(value_.data()) {} //Required KOKKOS_INLINE_FUNCTION @@ -115,58 +96,41 @@ public: dest += src; } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val = init_value; + val = reduction_identity<value_type>::sum(); } - result_view_type result_view() const { - return result; + KOKKOS_INLINE_FUNCTION + value_type& reference() const { + return *value; + } + + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result_view_type(value); } }; -template<class Scalar,class Space = HostSpace> +template<class Scalar, class Space> struct Prod { public: //Required - typedef Prod reducer_type; - typedef Scalar value_type; + typedef Prod reducer; + typedef typename std::remove_cv<Scalar>::type value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - value_type init_value; - private: - result_view_type result; - - template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > - struct InitWrapper; - - template<class ValueType > - struct InitWrapper<ValueType,true> { - static ValueType value() { - return static_cast<value_type>(1); - } - }; - - template<class ValueType > - struct InitWrapper<ValueType,false> { - static ValueType value() { - return value_type(); - } - }; + value_type* value; public: - Prod(value_type& result_): - init_value(InitWrapper<value_type>::value()),result(&result_) {} - Prod(const result_view_type& result_): - init_value(InitWrapper<value_type>::value()),result(result_) {} - Prod(value_type& result_, const value_type& init_value_): - init_value(init_value_),result(&result_) {} - Prod(const result_view_type& result_, const value_type& init_value_): - init_value(init_value_),result(result_) {} + KOKKOS_INLINE_FUNCTION + Prod(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + Prod(const result_view_type& value_): value(value_.data()) {} //Required KOKKOS_INLINE_FUNCTION @@ -179,58 +143,41 @@ public: dest *= src; } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val = init_value; + val = reduction_identity<value_type>::prod(); + } + + KOKKOS_INLINE_FUNCTION + value_type& reference() const { + return *value; } - result_view_type result_view() const { - return result; + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result_view_type(value); } }; -template<class Scalar, class Space = HostSpace> +template<class Scalar, class Space> struct Min { public: //Required - typedef Min reducer_type; + typedef Min reducer; typedef typename std::remove_cv<Scalar>::type value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - value_type init_value; - private: - result_view_type result; - - template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > - struct InitWrapper; - - template<class ValueType > - struct InitWrapper<ValueType,true> { - static ValueType value() { - return std::numeric_limits<value_type>::max(); - } - }; - - template<class ValueType > - struct InitWrapper<ValueType,false> { - static ValueType value() { - return value_type(); - } - }; + value_type* value; public: - Min(value_type& result_): - init_value(InitWrapper<value_type>::value()),result(&result_) {} - Min(const result_view_type& result_): - init_value(InitWrapper<value_type>::value()),result(result_) {} - Min(value_type& result_, const value_type& init_value_): - init_value(init_value_),result(&result_) {} - Min(const result_view_type& result_, const value_type& init_value_): - init_value(init_value_),result(result_) {} + KOKKOS_INLINE_FUNCTION + Min(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + Min(const result_view_type& value_): value(value_.data()) {} //Required KOKKOS_INLINE_FUNCTION @@ -245,58 +192,41 @@ public: dest = src; } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val = init_value; + val = reduction_identity<value_type>::min(); + } + + KOKKOS_INLINE_FUNCTION + value_type& reference() const { + return *value; } - result_view_type result_view() const { - return result; + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result_view_type(value); } }; -template<class Scalar, class Space = HostSpace> +template<class Scalar, class Space> struct Max { public: //Required - typedef Max reducer_type; + typedef Max reducer; typedef typename std::remove_cv<Scalar>::type value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - value_type init_value; - private: - result_view_type result; - - template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > - struct InitWrapper; - - template<class ValueType > - struct InitWrapper<ValueType,true> { - static ValueType value() { - return std::numeric_limits<value_type>::min(); - } - }; - - template<class ValueType > - struct InitWrapper<ValueType,false> { - static ValueType value() { - return value_type(); - } - }; + value_type* value; public: - Max(value_type& result_): - init_value(InitWrapper<value_type>::value()),result(&result_) {} - Max(const result_view_type& result_): - init_value(InitWrapper<value_type>::value()),result(result_) {} - Max(value_type& result_, const value_type& init_value_): - init_value(init_value_),result(&result_) {} - Max(const result_view_type& result_, const value_type& init_value_): - init_value(init_value_),result(result_) {} + KOKKOS_INLINE_FUNCTION + Max(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + Max(const result_view_type& value_): value(value_.data()) {} //Required KOKKOS_INLINE_FUNCTION @@ -311,35 +241,43 @@ public: dest = src; } - //Optional + //Required KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val = init_value; + val = reduction_identity<value_type>::max(); } - result_view_type result_view() const { - return result; + KOKKOS_INLINE_FUNCTION + value_type& reference() const { + return *value; + } + + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result_view_type(value); } }; -template<class Scalar, class Space = HostSpace> +template<class Scalar, class Space> struct LAnd { public: //Required - typedef LAnd reducer_type; - typedef Scalar value_type; + typedef LAnd reducer; + typedef typename std::remove_cv<Scalar>::type value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; private: - result_view_type result; + value_type* value; public: - LAnd(value_type& result_):result(&result_) {} - LAnd(const result_view_type& result_):result(result_) {} + KOKKOS_INLINE_FUNCTION + LAnd(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + LAnd(const result_view_type& value_): value(value_.data()) {} - //Required KOKKOS_INLINE_FUNCTION void join(value_type& dest, const value_type& src) const { dest = dest && src; @@ -350,33 +288,41 @@ public: dest = dest && src; } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val = 1; + val = reduction_identity<value_type>::land(); } - result_view_type result_view() const { - return result; + KOKKOS_INLINE_FUNCTION + value_type& reference() const { + return *value; + } + + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result_view_type(value); } }; -template<class Scalar, class Space = HostSpace> +template<class Scalar, class Space> struct LOr { public: //Required - typedef LOr reducer_type; - typedef Scalar value_type; + typedef LOr reducer; + typedef typename std::remove_cv<Scalar>::type value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; private: - result_view_type result; + value_type* value; public: - LOr(value_type& result_):result(&result_) {} - LOr(const result_view_type& result_):result(result_) {} + KOKKOS_INLINE_FUNCTION + LOr(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + LOr(const result_view_type& value_): value(value_.data()) {} //Required KOKKOS_INLINE_FUNCTION @@ -389,76 +335,41 @@ public: dest = dest || src; } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val = 0; - } - - result_view_type result_view() const { - return result; - } -}; - -template<class Scalar, class Space = HostSpace> -struct LXor { -public: - //Required - typedef LXor reducer_type; - typedef Scalar value_type; - - typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - -private: - result_view_type result; - -public: - - LXor(value_type& result_):result(&result_) {} - LXor(const result_view_type& result_):result(result_) {} - - //Required - KOKKOS_INLINE_FUNCTION - void join(value_type& dest, const value_type& src) const { - dest = dest? (!src) : src; + val = reduction_identity<value_type>::lor(); } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest = dest? (!src) : src; + value_type& reference() const { + return *value; } - //Optional KOKKOS_INLINE_FUNCTION - void init( value_type& val) const { - val = 0; - } - - result_view_type result_view() const { - return result; + result_view_type view() const { + return result_view_type(value); } }; -template<class Scalar, class Space = HostSpace> +template<class Scalar, class Space> struct BAnd { public: //Required - typedef BAnd reducer_type; + typedef BAnd reducer; typedef typename std::remove_cv<Scalar>::type value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - value_type init_value; - private: - result_view_type result; + value_type* value; public: - BAnd(value_type& result_): - init_value(value_type() | (~value_type())),result(&result_) {} - BAnd(const result_view_type& result_): - init_value(value_type() | (~value_type())),result(result_) {} + KOKKOS_INLINE_FUNCTION + BAnd(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + BAnd(const result_view_type& value_): value(value_.data()) {} //Required KOKKOS_INLINE_FUNCTION @@ -471,37 +382,41 @@ public: dest = dest & src; } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val = init_value; + val = reduction_identity<value_type>::band(); } - result_view_type result_view() const { - return result; + KOKKOS_INLINE_FUNCTION + value_type& reference() const { + return *value; + } + + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result_view_type(value); } }; -template<class Scalar, class Space = HostSpace> +template<class Scalar, class Space> struct BOr { public: //Required - typedef BOr reducer_type; + typedef BOr reducer; typedef typename std::remove_cv<Scalar>::type value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - value_type init_value; - private: - result_view_type result; + value_type* value; public: - BOr(value_type& result_): - init_value(value_type() & (~value_type())),result(&result_) {} - BOr(const result_view_type& result_): - init_value(value_type() & (~value_type())),result(result_) {} + KOKKOS_INLINE_FUNCTION + BOr(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + BOr(const result_view_type& value_): value(value_.data()) {} //Required KOKKOS_INLINE_FUNCTION @@ -514,57 +429,19 @@ public: dest = dest | src; } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val = init_value; - } - - result_view_type result_view() const { - return result; - } -}; - -template<class Scalar, class Space = HostSpace> -struct BXor { -public: - //Required - typedef BXor reducer_type; - typedef typename std::remove_cv<Scalar>::type value_type; - - typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - - value_type init_value; - -private: - result_view_type result; - -public: - - BXor(value_type& result_): - init_value(value_type() & (~value_type())),result(&result_) {} - BXor(const result_view_type& result_): - init_value(value_type() & (~value_type())),result(result_) {} - - //Required - KOKKOS_INLINE_FUNCTION - void join(value_type& dest, const value_type& src) const { - dest = dest ^ src; + val = reduction_identity<value_type>::bor(); } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { - dest = dest ^ src; + value_type& reference() const { + return *value; } - //Optional KOKKOS_INLINE_FUNCTION - void init( value_type& val) const { - val = init_value; - } - - result_view_type result_view() const { - return result; + result_view_type view() const { + return result_view_type(value); } }; @@ -586,7 +463,7 @@ struct ValLocScalar { } }; -template<class Scalar, class Index, class Space = HostSpace> +template<class Scalar, class Index, class Space> struct MinLoc { private: typedef typename std::remove_cv<Scalar>::type scalar_type; @@ -594,43 +471,21 @@ private: public: //Required - typedef MinLoc reducer_type; + typedef MinLoc reducer; typedef ValLocScalar<scalar_type,index_type> value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - scalar_type init_value; - private: - result_view_type result; - - template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > - struct InitWrapper; - - template<class ValueType > - struct InitWrapper<ValueType,true> { - static ValueType value() { - return std::numeric_limits<scalar_type>::max(); - } - }; - - template<class ValueType > - struct InitWrapper<ValueType,false> { - static ValueType value() { - return scalar_type(); - } - }; + value_type* value; public: - MinLoc(value_type& result_): - init_value(InitWrapper<scalar_type>::value()),result(&result_) {} - MinLoc(const result_view_type& result_): - init_value(InitWrapper<scalar_type>::value()),result(result_) {} - MinLoc(value_type& result_, const scalar_type& init_value_): - init_value(init_value_),result(&result_) {} - MinLoc(const result_view_type& result_, const scalar_type& init_value_): - init_value(init_value_),result(result_) {} + KOKKOS_INLINE_FUNCTION + MinLoc(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + MinLoc(const result_view_type& value_): value(value_.data()) {} //Required @@ -646,18 +501,24 @@ public: dest = src; } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val.val = init_value; + val.val = reduction_identity<scalar_type>::min(); + val.loc = reduction_identity<index_type>::min(); } - result_view_type result_view() const { - return result; + KOKKOS_INLINE_FUNCTION + value_type& reference() { + return *value; + } + + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result_view_type(value); } }; -template<class Scalar, class Index, class Space = HostSpace> +template<class Scalar, class Index, class Space> struct MaxLoc { private: typedef typename std::remove_cv<Scalar>::type scalar_type; @@ -665,43 +526,21 @@ private: public: //Required - typedef MaxLoc reducer_type; + typedef MaxLoc reducer; typedef ValLocScalar<scalar_type,index_type> value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - scalar_type init_value; - private: - result_view_type result; - - template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > - struct InitWrapper; - - template<class ValueType > - struct InitWrapper<ValueType,true> { - static ValueType value() { - return std::numeric_limits<scalar_type>::min(); - } - }; - - template<class ValueType > - struct InitWrapper<ValueType,false> { - static ValueType value() { - return scalar_type(); - } - }; + value_type* value; public: - MaxLoc(value_type& result_): - init_value(InitWrapper<scalar_type>::value()),result(&result_) {} - MaxLoc(const result_view_type& result_): - init_value(InitWrapper<scalar_type>::value()),result(result_) {} - MaxLoc(value_type& result_, const scalar_type& init_value_): - init_value(init_value_),result(&result_) {} - MaxLoc(const result_view_type& result_, const scalar_type& init_value_): - init_value(init_value_),result(result_) {} + KOKKOS_INLINE_FUNCTION + MaxLoc(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + MaxLoc(const result_view_type& value_): value(value_.data()) {} //Required KOKKOS_INLINE_FUNCTION @@ -716,14 +555,20 @@ public: dest = src; } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val.val = init_value; + val.val = reduction_identity<scalar_type>::max();; + val.loc = reduction_identity<index_type>::min(); } - result_view_type result_view() const { - return result; + KOKKOS_INLINE_FUNCTION + value_type& reference() { + return *value; + } + + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result_view_type(value); } }; @@ -744,68 +589,28 @@ struct MinMaxScalar { } }; -template<class Scalar, class Space = HostSpace> +template<class Scalar, class Space> struct MinMax { private: typedef typename std::remove_cv<Scalar>::type scalar_type; public: //Required - typedef MinMax reducer_type; + typedef MinMax reducer; typedef MinMaxScalar<scalar_type> value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - scalar_type min_init_value; - scalar_type max_init_value; - private: - result_view_type result; - - template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > - struct MinInitWrapper; - - template<class ValueType > - struct MinInitWrapper<ValueType,true> { - static ValueType value() { - return std::numeric_limits<scalar_type>::max(); - } - }; - - template<class ValueType > - struct MinInitWrapper<ValueType,false> { - static ValueType value() { - return scalar_type(); - } - }; - - template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > - struct MaxInitWrapper; - - template<class ValueType > - struct MaxInitWrapper<ValueType,true> { - static ValueType value() { - return std::numeric_limits<scalar_type>::min(); - } - }; - - template<class ValueType > - struct MaxInitWrapper<ValueType,false> { - static ValueType value() { - return scalar_type(); - } - }; + value_type* value; public: - MinMax(value_type& result_): - min_init_value(MinInitWrapper<scalar_type>::value()),max_init_value(MaxInitWrapper<scalar_type>::value()),result(&result_) {} - MinMax(const result_view_type& result_): - min_init_value(MinInitWrapper<scalar_type>::value()),max_init_value(MaxInitWrapper<scalar_type>::value()),result(result_) {} - MinMax(value_type& result_, const scalar_type& min_init_value_, const scalar_type& max_init_value_): - min_init_value(min_init_value_),max_init_value(max_init_value_),result(&result_) {} - MinMax(const result_view_type& result_, const scalar_type& min_init_value_, const scalar_type& max_init_value_): - min_init_value(min_init_value_),max_init_value(max_init_value_),result(result_) {} + KOKKOS_INLINE_FUNCTION + MinMax(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + MinMax(const result_view_type& value_): value(value_.data()) {} //Required KOKKOS_INLINE_FUNCTION @@ -828,15 +633,20 @@ public: } } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val.min_val = min_init_value; - val.max_val = max_init_value; + val.max_val = reduction_identity<scalar_type>::max();; + val.min_val = reduction_identity<scalar_type>::min(); + } + + KOKKOS_INLINE_FUNCTION + value_type& reference() { + return *value; } - result_view_type result_view() const { - return result; + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result_view_type(value); } }; @@ -862,7 +672,7 @@ struct MinMaxLocScalar { } }; -template<class Scalar, class Index, class Space = HostSpace> +template<class Scalar, class Index, class Space> struct MinMaxLoc { private: typedef typename std::remove_cv<Scalar>::type scalar_type; @@ -870,61 +680,21 @@ private: public: //Required - typedef MinMaxLoc reducer_type; + typedef MinMaxLoc reducer; typedef MinMaxLocScalar<scalar_type,index_type> value_type; typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - scalar_type min_init_value; - scalar_type max_init_value; - private: - result_view_type result; - - template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > - struct MinInitWrapper; - - template<class ValueType > - struct MinInitWrapper<ValueType,true> { - static ValueType value() { - return std::numeric_limits<scalar_type>::max(); - } - }; - - template<class ValueType > - struct MinInitWrapper<ValueType,false> { - static ValueType value() { - return scalar_type(); - } - }; - - template<class ValueType, bool is_arithmetic = std::is_arithmetic<ValueType>::value > - struct MaxInitWrapper; - - template<class ValueType > - struct MaxInitWrapper<ValueType,true> { - static ValueType value() { - return std::numeric_limits<scalar_type>::min(); - } - }; - - template<class ValueType > - struct MaxInitWrapper<ValueType,false> { - static ValueType value() { - return scalar_type(); - } - }; + value_type* value; public: - MinMaxLoc(value_type& result_): - min_init_value(MinInitWrapper<scalar_type>::value()),max_init_value(MaxInitWrapper<scalar_type>::value()),result(&result_) {} - MinMaxLoc(const result_view_type& result_): - min_init_value(MinInitWrapper<scalar_type>::value()),max_init_value(MaxInitWrapper<scalar_type>::value()),result(result_) {} - MinMaxLoc(value_type& result_, const scalar_type& min_init_value_, const scalar_type& max_init_value_): - min_init_value(min_init_value_),max_init_value(max_init_value_),result(&result_) {} - MinMaxLoc(const result_view_type& result_, const scalar_type& min_init_value_, const scalar_type& max_init_value_): - min_init_value(min_init_value_),max_init_value(max_init_value_),result(result_) {} + KOKKOS_INLINE_FUNCTION + MinMaxLoc(value_type& value_): value(&value_) {} + + KOKKOS_INLINE_FUNCTION + MinMaxLoc(const result_view_type& value_): value(value_.data()) {} //Required KOKKOS_INLINE_FUNCTION @@ -951,15 +721,22 @@ public: } } - //Optional KOKKOS_INLINE_FUNCTION void init( value_type& val) const { - val.min_val = min_init_value; - val.max_val = max_init_value; + val.max_val = reduction_identity<scalar_type>::max();; + val.min_val = reduction_identity<scalar_type>::min(); + val.max_loc = reduction_identity<index_type>::min(); + val.min_loc = reduction_identity<index_type>::min(); } - result_view_type result_view() const { - return result; + KOKKOS_INLINE_FUNCTION + value_type& reference() { + return *value; + } + + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result_view_type(value); } }; } @@ -1125,6 +902,244 @@ namespace Impl { }; } + +//---------------------------------------------------------------------------- + +#if 0 + +//---------------------------------------------------------------------------- + +namespace Impl { + +template< class OutType , class InType > +inline +typename std::enable_if + < std::is_same< OutType , InType >::value , InType const & >::type +forward_execution_policy( InType const & p ) { return p ; } + +template< class OutType , class InType > +inline +typename std::enable_if + < ! std::is_same< OutType , InType >::value , OutType >::type +forward_execution_policy( InType const & p ) { return OutType(p); } + + +template< class OutType , class InType > +inline +typename std::enable_if + < std::is_same< OutType , InType >::value , InType const & >::type +forward_reducer( InType const & r ) { return r ; } + +template< class OutType , class InType > +inline +typename std::enable_if< Kokkos::is_view< InType >::value , OutType >::type +forward_reducer( InType const & v ) +{ return OutType( v.data() ); } + +template< class OutType > +inline +OutType +forward_reducer( typename OutType::reference ref ) +{ return OutType( ref ); } + +} /* namespace Impl */ + +//---------------------------------------------------------------------------- +// parallel_reduce with 4 args: label, policy, closure, and reducer + +/**\brief Parallel reduce with an explicit Reducer */ +template< class PolicyType , class ClosureType , class ReduceType > +inline +typename std::enable_if< Kokkos::is_reducer< ReduceType >::value >::type +parallel_reduce( std::string const & arg_label + , PolicyType && arg_policy + , ClosureType && arg_closure + , ReduceType && arg_reduce + ) +{ + //------------------------------ + + using input_policy_type = + typename std::remove_const< + typename std::remove_reference< PolicyType >::type >::type ; + + using input_reduce_type = + typename std::remove_const< + typename std::remove_reference< ReduceType >::type >::type ; + + using Analysis = Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::REDUCE + , input_policy_type + , ClosureType + > ; + + //------------------------------ + // Policy is either given or an integer value + // If an integer value then is a RangePolicy with queried execution space + + enum { is_policy = Kokkos::is_execution_policy< input_policy_type >::value }; + enum { is_intval = std::is_integral< input_policy_type >::value }; + + static_assert( is_policy || is_intval , + "Kokkos::parallel_reduce 2nd argument must be execution policy or integral value" ); + + using policy_type = typename std::conditional + < is_policy , input_policy_type + , Kokkos::RangePolicy< typename Analysis::execution_space > + >::type ; + + //------------------------------ + // ReduceType is either a reducer, view, or value reference + + enum { is_reducer = Kokkos::is_reducer< input_reduce_type >::value }; + enum { is_view = Kokkos::is_view< input_reduce_type >::value }; + enum { is_ref = std::is_same< ReduceType + , typename Analysis::reference_type + >::value }; + + static_assert( is_reducer || is_view || is_ref , + "Kokkos::parallel_reduce 4th argument must be reducer, output View, or output variable" ); + + // If input_reducer_type is_view or is_ref then need its memory_space. + // A View has a memory_space, a reference is in the HostSpace. + + using has_space = typename std::conditional + < is_view , input_reduce_type , Kokkos::HostSpace >::type ; + + using memory_space = typename has_space::memory_space ; + + using reduce_type = typename std::conditional + < is_reducer , input_reduce_type + , typename Analysis::Reducer< memory_space > + >::type ; + + //------------------------------ + + #if (KOKKOS_ENABLE_PROFILING) + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelReduce(arg_label, 0, &kpID); + } + #endif + + //------------------------------ + // Disable tracking while creating the closure: + + Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + + Kokkos::Impl::ParallelReduce< ClosureType , policy_type, reduce_type + , typename Analysis::execution_space > + closure( arg_closure + , forward_execution_policy< policy_type >( arg_policy ) + , forward_reducer< reduce_type >( arg_reduce ) ); + + Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + + // Enable tracking after creating the closure + + closure.execute(); + + //------------------------------ + + #if (KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelReduce(kpID); + } + #endif +} + +//---------------------------------------------------------------------------- +// parallel_reduce with 3 args: policy, closure, and reducer + +template< class PolicyType , class ClosureType , class ReduceType > +inline +typename std::enable_if + < Kokkos::is_execution_policy< + typename std::remove_const< + typename std::remove_reference< PolicyType >::type >::type + >::value + || + std::is_integral< + typename std::remove_const< + typename std::remove_reference< PolicyType >::type >::type + >::value + >::type ; +parallel_reduce( PolicyType && arg_policy + , ClosureType && arg_closure + , ReduceType && arg_reduce + ) +{ + parallel_reduce( typeid(ClosureType).name() + , std::forward< PolicyType >( arg_policy ) + , std::forward< ClosureType >( arg_closure ) + , std::forward< ReduceType >( arg_reduce ) ); +} + +// parallel_reduce with 3 args: label, policy, and closure + +template< class PolicyType , class ClosureType > +inline +void +parallel_reduce( std::string const & arg_label + , PolicyType && arg_policy + , ClosureType && arg_closure + ) +{ + // Deduce a Reducer from the Closure + + using input_policy_type = + typename std::remove_const< + typename std::remove_reference< PolicyType >::type >::type ; + + using Analysis = Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::REDUCE + , input_policy_type + , ClosureType + > ; + + static_assert( Analysis::has_final_member_function , + "Kokkos::parallel_reduce functor does not have a final member function" ); + + parallel_reduce( arg_label + , std::forward< PolicyType >( arg_policy ) + , std::forward< ClosureType >( arg_closure ) + , typename Analysis::Reducer<>() ); +} + +//---------------------------------------------------------------------------- +// parallel_reduce with 2 arguments: policy and closure: + +/**\brief Parallel reduce processed by ClosureType::final */ +template< class PolicyType , class ClosureType > +inline +parallel_reduce( PolicyType && arg_policy + , ClosureType && arg_closure ) +{ + // Deduce a Reducer from the Closure + + using input_policy_type = + typename std::remove_const< + typename std::remove_reference< PolicyType >::type >::type ; + + using Analysis = Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::REDUCE + , input_policy_type + , ClosureType + > ; + + static_assert( Analysis::has_final_member_function , + "Kokkos::parallel_reduce functor does not have a final member function" ); + + parallel_reduce( typeid(ClosureType).name() + , std::forward< PolicyType >( arg_policy ) + , std::forward< ClosureType >( arg_closure ) + , typename Analysis::Reducer<>() ); +} + +#endif + +//---------------------------------------------------------------------------- + /*! \fn void parallel_reduce(label,policy,functor,return_argument) \brief Perform a parallel reduction. \param label An optional Label giving the call name. Must be able to construct a std::string from the argument. @@ -1351,6 +1366,7 @@ void parallel_reduce(const std::string& label, Impl::ParallelReduceAdaptor<policy_type,FunctorType,result_view_type>::execute(label,policy_type(0,policy),functor,result_view); } +} //namespace Kokkos +#endif // KOKKOS_PARALLEL_REDUCE_HPP -} //namespace Kokkos diff --git a/lib/kokkos/core/src/Kokkos_Qthreads.hpp b/lib/kokkos/core/src/Kokkos_Qthreads.hpp index 0507552c3f95e7fb63527603c7123a19daee2b14..0323d32d872b872ad9f8068b67c38ad2c7d03e41 100644 --- a/lib/kokkos/core/src/Kokkos_Qthreads.hpp +++ b/lib/kokkos/core/src/Kokkos_Qthreads.hpp @@ -44,9 +44,10 @@ #ifndef KOKKOS_QTHREADS_HPP #define KOKKOS_QTHREADS_HPP -#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_QTHREADS ) -#ifdef KOKKOS_ENABLE_QTHREADS +#include <Kokkos_Core_fwd.hpp> // Defines to enable experimental Qthreads functionality. #define QTHREAD_LOCAL_PRIORITY @@ -150,6 +151,8 @@ public: int shepherd_size() const; int shepherd_worker_size() const; + + static const char* name(); }; } // namespace Kokkos @@ -194,5 +197,5 @@ struct VerifyExecutionCanAccessMemorySpace //#include <Qthreads/Kokkos_Qthreads_TaskQueue.hpp> // Uncomment when Tasking working. #endif // #define KOKKOS_ENABLE_QTHREADS - #endif // #define KOKKOS_QTHREADS_HPP + diff --git a/lib/kokkos/core/src/Kokkos_ScratchSpace.hpp b/lib/kokkos/core/src/Kokkos_ScratchSpace.hpp index ff6da6bef4231323cb4b0626d300be26ce902496..c65822c72e76029a2da69fd187419dacb232106c 100644 --- a/lib/kokkos/core/src/Kokkos_ScratchSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_ScratchSpace.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,9 +44,9 @@ #ifndef KOKKOS_SCRATCHSPACE_HPP #define KOKKOS_SCRATCHSPACE_HPP -#include <stdio.h> +#include <cstdio> #include <Kokkos_Core_fwd.hpp> -#include <impl/Kokkos_Tags.hpp> +#include <Kokkos_Concepts.hpp> /*--------------------------------------------------------------------------*/ @@ -57,7 +57,7 @@ namespace Kokkos { */ template< class ExecSpace > class ScratchMemorySpace { - static_assert (Impl::is_execution_space<ExecSpace>::value,"Instantiating ScratchMemorySpace on non-execution-space type."); + static_assert (is_execution_space<ExecSpace>::value,"Instantiating ScratchMemorySpace on non-execution-space type."); public: // Alignment of memory chunks returned by 'get' @@ -161,6 +161,3 @@ public: #endif /* #ifndef KOKKOS_SCRATCHSPACE_HPP */ -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp index 72710e81679863bfc3c5e680663cf0feda2b5868..73e8ae303010b6f76ca2465059fe1c86ff843262 100644 --- a/lib/kokkos/core/src/Kokkos_Serial.hpp +++ b/lib/kokkos/core/src/Kokkos_Serial.hpp @@ -47,6 +47,9 @@ #ifndef KOKKOS_SERIAL_HPP #define KOKKOS_SERIAL_HPP +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_SERIAL ) + #include <cstddef> #include <iosfwd> #include <Kokkos_Parallel.hpp> @@ -63,7 +66,6 @@ #include <KokkosExp_MDRangePolicy.hpp> -#if defined( KOKKOS_ENABLE_SERIAL ) namespace Kokkos { @@ -163,6 +165,7 @@ public: KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); } inline static unsigned max_hardware_threads() { return thread_pool_size(0); } + static const char* name(); //-------------------------------------------------------------------------- }; @@ -515,7 +518,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( reducer ) - , m_result_ptr( reducer.result_view().data() ) + , m_result_ptr( reducer.view().data() ) { /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value @@ -798,7 +801,7 @@ public: : m_functor( arg_functor ) , m_league( arg_policy.league_size() ) , m_reducer( reducer ) - , m_result_ptr( reducer.result_view().data() ) + , m_result_ptr( reducer.view().data() ) , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) ) @@ -821,5 +824,3 @@ public: #endif // defined( KOKKOS_ENABLE_SERIAL ) #endif /* #define KOKKOS_SERIAL_HPP */ -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp index e25039d236d68544cecf3dc968f853179e94a52d..7edda7aa754ff89d0099ae6c386b4350bc5e57f3 100644 --- a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp +++ b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp @@ -46,25 +46,10 @@ //---------------------------------------------------------------------------- -#include <Kokkos_Core_fwd.hpp> - -// If compiling with CUDA then must be using CUDA 8 or better -// and use relocateable device code to enable the task policy. -// nvcc relocatable device code option: --relocatable-device-code=true - -#if ( defined( KOKKOS_ENABLE_CUDA ) ) - #if ( 8000 <= CUDA_VERSION ) && \ - defined( KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE ) - - #define KOKKOS_ENABLE_TASKDAG - - #endif -#else - #define KOKKOS_ENABLE_TASKDAG -#endif - +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_TASKDAG ) +#include <Kokkos_Core_fwd.hpp> //---------------------------------------------------------------------------- #include <Kokkos_MemoryPool.hpp> @@ -371,7 +356,7 @@ struct TaskPolicyData TaskPolicyData & operator = ( TaskPolicyData const & ) = default ; KOKKOS_INLINE_FUNCTION - TaskPolicyData( DepFutureType && arg_future + TaskPolicyData( DepFutureType const & arg_future , Kokkos::TaskPriority const & arg_priority ) : m_scheduler( 0 ) , m_dependence( arg_future ) @@ -385,6 +370,15 @@ struct TaskPolicyData , m_dependence() , m_priority( static_cast<int>( arg_priority ) ) {} + + KOKKOS_INLINE_FUNCTION + TaskPolicyData( scheduler_type const & arg_scheduler + , DepFutureType const & arg_future + , Kokkos::TaskPriority const & arg_priority ) + : m_scheduler( & arg_scheduler ) + , m_dependence( arg_future ) + , m_priority( static_cast<int>( arg_priority ) ) + {} }; } // namespace Impl @@ -413,6 +407,7 @@ public: using execution_space = ExecSpace ; using memory_space = typename queue_type::memory_space ; + using memory_pool = typename queue_type::memory_pool ; using member_type = typename Kokkos::Impl::TaskQueueSpecialization< ExecSpace >::member_type ; @@ -431,9 +426,7 @@ public: KOKKOS_INLINE_FUNCTION TaskScheduler & operator = ( TaskScheduler const & rhs ) = default ; - TaskScheduler( memory_space const & arg_memory_space - , unsigned const arg_memory_pool_capacity - , unsigned const arg_memory_pool_log2_superblock = 12 ) + TaskScheduler( memory_pool const & arg_memory_pool ) : m_track() , m_queue(0) { @@ -442,21 +435,37 @@ public: record_type ; record_type * record = - record_type::allocate( arg_memory_space + record_type::allocate( memory_space() , "TaskQueue" , sizeof(queue_type) ); - m_queue = new( record->data() ) - queue_type( arg_memory_space - , arg_memory_pool_capacity - , arg_memory_pool_log2_superblock ); + m_queue = new( record->data() ) queue_type( arg_memory_pool ); record->m_destroy.m_queue = m_queue ; m_track.assign_allocated_record_to_uninitialized( record ); } + TaskScheduler( memory_space const & arg_memory_space + , size_t const mempool_capacity + , unsigned const mempool_min_block_size // = 1u << 6 + , unsigned const mempool_max_block_size // = 1u << 10 + , unsigned const mempool_superblock_size // = 1u << 12 + ) + : TaskScheduler( memory_pool( arg_memory_space + , mempool_capacity + , mempool_min_block_size + , mempool_max_block_size + , mempool_superblock_size ) ) + {} + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + memory_pool * memory() const noexcept + { return m_queue ? m_queue->m_memory : (memory_pool*) 0 ; } + //---------------------------------------- /**\brief Allocation size for a spawned task */ template< typename FunctorType > @@ -502,7 +511,12 @@ public: : (queue_type*) 0 ); if ( 0 == queue ) { - Kokkos::abort("Kokkos spawn given null Future" ); + Kokkos::abort("Kokkos spawn requires scheduler or non-null Future"); + } + + if ( arg_policy.m_dependence.m_task != 0 && + arg_policy.m_dependence.m_task->m_queue != queue ) { + Kokkos::abort("Kokkos spawn given incompatible scheduler and Future"); } //---------------------------------------- @@ -641,7 +655,7 @@ public: KOKKOS_INLINE_FUNCTION int allocation_capacity() const noexcept - { return m_queue->m_memory.get_mem_size(); } + { return m_queue->m_memory.capacity(); } KOKKOS_INLINE_FUNCTION int allocated_task_count() const noexcept @@ -696,6 +710,22 @@ TaskTeam( T const & arg >( arg , arg_priority ); } +template< typename E , typename F > +Kokkos::Impl:: + TaskPolicyData< Kokkos::Impl::TaskBase<void,void,void>::TaskTeam , F > +KOKKOS_INLINE_FUNCTION +TaskTeam( TaskScheduler<E> const & arg_scheduler + , F const & arg_future + , typename std::enable_if< Kokkos::is_future<F>::value , + TaskPriority >::type const & arg_priority = TaskPriority::Regular + ) +{ + return + Kokkos::Impl::TaskPolicyData + < Kokkos::Impl::TaskBase<void,void,void>::TaskTeam , F > + ( arg_scheduler , arg_future , arg_priority ); +} + // Construct a TaskSingle execution policy template< typename T > @@ -721,6 +751,22 @@ TaskSingle( T const & arg >( arg , arg_priority ); } +template< typename E , typename F > +Kokkos::Impl:: + TaskPolicyData< Kokkos::Impl::TaskBase<void,void,void>::TaskSingle , F > +KOKKOS_INLINE_FUNCTION +TaskSingle( TaskScheduler<E> const & arg_scheduler + , F const & arg_future + , typename std::enable_if< Kokkos::is_future<F>::value , + TaskPriority >::type const & arg_priority = TaskPriority::Regular + ) +{ + return + Kokkos::Impl::TaskPolicyData + < Kokkos::Impl::TaskBase<void,void,void>::TaskSingle , F > + ( arg_scheduler , arg_future , arg_priority ); +} + //---------------------------------------------------------------------------- /**\brief A host control thread spawns a task with options @@ -849,3 +895,4 @@ void wait( TaskScheduler< ExecSpace > const & scheduler ) #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_TASKSCHEDULER_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Threads.hpp b/lib/kokkos/core/src/Kokkos_Threads.hpp index 8aa968d0535f1f6c32ac170a73d2ec60d018d824..d4069a1f597105af11679d3bcffec3992b57ebad 100644 --- a/lib/kokkos/core/src/Kokkos_Threads.hpp +++ b/lib/kokkos/core/src/Kokkos_Threads.hpp @@ -44,9 +44,10 @@ #ifndef KOKKOS_THREADS_HPP #define KOKKOS_THREADS_HPP -#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_THREADS ) -#if defined( KOKKOS_ENABLE_PTHREAD ) +#include <Kokkos_Core_fwd.hpp> #include <cstddef> #include <iosfwd> @@ -178,6 +179,7 @@ public: inline static unsigned max_hardware_threads() { return thread_pool_size(0); } KOKKOS_INLINE_FUNCTION static unsigned hardware_thread_id() { return thread_pool_rank(); } + static const char* name(); //@} //---------------------------------------- }; @@ -190,7 +192,7 @@ namespace Kokkos { namespace Impl { template<> -struct MemorySpaceAccess +struct MemorySpaceAccess < Kokkos::Threads::memory_space , Kokkos::Threads::scratch_memory_space > @@ -227,6 +229,6 @@ struct VerifyExecutionCanAccessMemorySpace //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */ +#endif /* #if defined( KOKKOS_ENABLE_THREADS ) */ #endif /* #define KOKKOS_THREADS_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_Timer.hpp b/lib/kokkos/core/src/Kokkos_Timer.hpp index 5fd4752cd9f7bee0cbbecd9c97acfdd1e65c75cd..ec71386cbbfa24e0906210bc7c62dad94210e804 100644 --- a/lib/kokkos/core/src/Kokkos_Timer.hpp +++ b/lib/kokkos/core/src/Kokkos_Timer.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef KOKKOS_TIMER_HPP #define KOKKOS_TIMER_HPP -#include <stddef.h> +#include <cstddef> #ifdef _MSC_VER #undef KOKKOS_ENABLE_LIBRT @@ -110,3 +110,4 @@ public: } // namespace Kokkos #endif /* #ifndef KOKKOS_TIMER_HPP */ + diff --git a/lib/kokkos/core/src/Kokkos_Vectorization.hpp b/lib/kokkos/core/src/Kokkos_Vectorization.hpp index 9d7df2de5ec5c475e8054c63c1c14fd5292f0eea..00526cb398b7e4fdfe603392a5da43ba0d447e46 100644 --- a/lib/kokkos/core/src/Kokkos_Vectorization.hpp +++ b/lib/kokkos/core/src/Kokkos_Vectorization.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -51,3 +51,4 @@ #endif #endif + diff --git a/lib/kokkos/core/src/Kokkos_View.hpp b/lib/kokkos/core/src/Kokkos_View.hpp index 6575824d3d0d5c82912afd9ae7c19f9081b306b8..3312aa6a9677aeca5417856b4113b11337eb35cb 100644 --- a/lib/kokkos/core/src/Kokkos_View.hpp +++ b/lib/kokkos/core/src/Kokkos_View.hpp @@ -625,7 +625,7 @@ private: #define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \ View::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \ - Kokkos::Impl::view_verify_operator_bounds ARG ; + Kokkos::Impl::view_verify_operator_bounds< typename traits::memory_space > ARG ; #else @@ -646,12 +646,7 @@ public: ), reference_type >::type operator()( Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,args...) ) return m_map.reference(); } @@ -669,12 +664,7 @@ public: operator()( const I0 & i0 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) return m_map.reference(i0); } @@ -690,13 +680,7 @@ public: operator()( const I0 & i0 , Args ... args ) const { - - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) return m_map.m_handle[ i0 ]; } @@ -712,12 +696,7 @@ public: operator()( const I0 & i0 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,args...) ) return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; } @@ -733,12 +712,7 @@ public: ), reference_type >::type operator[]( const I0 & i0 ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) return m_map.reference(i0); } @@ -752,12 +726,7 @@ public: ), reference_type >::type operator[]( const I0 & i0 ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) return m_map.m_handle[ i0 ]; } @@ -771,12 +740,7 @@ public: ), reference_type >::type operator[]( const I0 & i0 ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0) ) return m_map.m_handle[ m_map.m_offset.m_stride.S0 * i0 ]; } @@ -794,12 +758,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) return m_map.reference(i0,i1); } @@ -815,12 +774,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) return m_map.m_handle[ i0 + m_map.m_offset.m_dim.N0 * i1 ]; } @@ -836,12 +790,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) return m_map.m_handle[ i0 + m_map.m_offset.m_stride * i1 ]; } @@ -857,12 +806,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) return m_map.m_handle[ i1 + m_map.m_offset.m_dim.N1 * i0 ]; } @@ -878,12 +822,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) return m_map.m_handle[ i1 + m_map.m_offset.m_stride * i0 ]; } @@ -899,12 +838,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,args...) ) return m_map.m_handle[ i0 * m_map.m_offset.m_stride.S0 + i1 * m_map.m_offset.m_stride.S1 ]; } @@ -923,12 +857,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) ) return m_map.m_handle[ m_map.m_offset(i0,i1,i2) ]; } @@ -943,12 +872,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,args...) ) return m_map.reference(i0,i1,i2); } @@ -966,12 +890,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) ) return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3) ]; } @@ -986,12 +905,7 @@ public: operator()( const I0 & i0 , const I1 & i1 , const I2 & i2 , const I3 & i3 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,args...) ) return m_map.reference(i0,i1,i2,i3); } @@ -1011,12 +925,7 @@ public: , const I4 & i4 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) ) return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4) ]; } @@ -1033,12 +942,7 @@ public: , const I4 & i4 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,args...) ) return m_map.reference(i0,i1,i2,i3,i4); } @@ -1058,12 +962,7 @@ public: , const I4 & i4 , const I5 & i5 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) ) return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5) ]; } @@ -1080,12 +979,7 @@ public: , const I4 & i4 , const I5 & i5 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,args...) ) return m_map.reference(i0,i1,i2,i3,i4,i5); } @@ -1105,12 +999,7 @@ public: , const I4 & i4 , const I5 & i5 , const I6 & i6 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6) ]; } @@ -1127,12 +1016,7 @@ public: , const I4 & i4 , const I5 & i5 , const I6 & i6 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,args...) ) return m_map.reference(i0,i1,i2,i3,i4,i5,i6); } @@ -1152,12 +1036,7 @@ public: , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) return m_map.m_handle[ m_map.m_offset(i0,i1,i2,i3,i4,i5,i6,i7) ]; } @@ -1174,12 +1053,7 @@ public: , const I4 & i4 , const I5 & i5 , const I6 & i6 , const I7 & i7 , Args ... args ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (NULL,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track.template get_label<typename traits::memory_space>().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) - #endif - + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (m_track,m_map,i0,i1,i2,i3,i4,i5,i6,i7,args...) ) return m_map.reference(i0,i1,i2,i3,i4,i5,i6,i7); } diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile index 0668f89c86e040e5dd1017fc3c3f0a233e9affa3..8fb13b89549b52ef790c6c1488321e9df18d3b41 100644 --- a/lib/kokkos/core/src/Makefile +++ b/lib/kokkos/core/src/Makefile @@ -9,14 +9,14 @@ default: messages build-lib echo "End Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/config/nvcc_wrapper + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper else CXX = g++ endif CXXFLAGS = -O3 LINK ?= $(CXX) -LDFLAGS ?= +LDFLAGS ?= include $(KOKKOS_PATH)/Makefile.kokkos @@ -66,7 +66,7 @@ else KOKKOS_DEBUG_CMAKE = ON endif -messages: +messages: echo "Start Build" build-makefile-kokkos: @@ -80,13 +80,13 @@ build-makefile-kokkos: echo "KOKKOS_CXX_STANDARD = $(KOKKOS_CXX_STANDARD)" >> Makefile.kokkos echo "KOKKOS_OPTIONS = $(KOKKOS_OPTIONS)" >> Makefile.kokkos echo "KOKKOS_CUDA_OPTIONS = $(KOKKOS_CUDA_OPTIONS)" >> Makefile.kokkos - echo "CXX ?= $(CXX)" >> Makefile.kokkos + echo "CXX ?= $(CXX)" >> Makefile.kokkos echo "NVCC_WRAPPER ?= $(PREFIX)/bin/nvcc_wrapper" >> Makefile.kokkos - echo "" >> Makefile.kokkos + echo "" >> Makefile.kokkos echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> Makefile.kokkos echo "KOKKOS_HEADERS = $(KOKKOS_HEADERS)" >> Makefile.kokkos echo "KOKKOS_SRC = $(KOKKOS_SRC)" >> Makefile.kokkos - echo "" >> Makefile.kokkos + echo "" >> Makefile.kokkos echo "#Variables used in application Makefiles" >> Makefile.kokkos echo "KOKKOS_CPP_DEPENDS = $(KOKKOS_CPP_DEPENDS)" >> Makefile.kokkos echo "KOKKOS_CXXFLAGS = $(KOKKOS_CXXFLAGS)" >> Makefile.kokkos @@ -133,11 +133,11 @@ build-cmake-kokkos: echo "else()" >> kokkos.cmake echo ' set(NVCC_WRAPPER $$ENV{NVCC_WRAPPER} CACHE FILEPATH "Path to command nvcc_wrapper")' >> kokkos.cmake echo "endif()" >> kokkos.cmake - echo "" >> kokkos.cmake + echo "" >> kokkos.cmake echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> kokkos.cmake echo "set(KOKKOS_HEADERS \"$(KOKKOS_HEADERS)\" CACHE STRING \"Kokkos headers list\")" >> kokkos.cmake echo "set(KOKKOS_SRC \"$(KOKKOS_SRC)\" CACHE STRING \"Kokkos source list\")" >> kokkos.cmake - echo "" >> kokkos.cmake + echo "" >> kokkos.cmake echo "#Variables used in application Makefiles" >> kokkos.cmake echo "set(KOKKOS_CPP_DEPENDS \"$(KOKKOS_CPP_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake echo "set(KOKKOS_CXXFLAGS \"$(KOKKOS_CXXFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake @@ -164,7 +164,7 @@ build-cmake-kokkos: build-lib: build-makefile-kokkos build-cmake-kokkos $(KOKKOS_LINK_DEPENDS) -mkdir: +mkdir: mkdir -p $(PREFIX) mkdir -p $(PREFIX)/bin mkdir -p $(PREFIX)/include @@ -187,7 +187,7 @@ copy-openmp: mkdir mkdir -p $(PREFIX)/include/OpenMP cp $(COPY_FLAG) $(KOKKOS_HEADERS_OPENMP) $(PREFIX)/include/OpenMP -install: mkdir $(CONDITIONAL_COPIES) build-lib +install: mkdir $(CONDITIONAL_COPIES) build-lib cp $(COPY_FLAG) $(NVCC_WRAPPER) $(PREFIX)/bin cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp similarity index 87% rename from lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp rename to lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp index 2d50c6e54886087deea707d0dbb155566ed51428..4e0ea93920cee66f3fde2f7f1d0f7351a0a67650 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp @@ -41,7 +41,10 @@ //@HEADER */ -#include <stdio.h> +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_OPENMP ) + +#include <cstdio> #include <limits> #include <iostream> #include <vector> @@ -51,7 +54,6 @@ #include <impl/Kokkos_CPUDiscovery.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> -#ifdef KOKKOS_ENABLE_OPENMP namespace Kokkos { namespace Impl { @@ -82,13 +84,13 @@ bool s_using_hwloc = false; namespace Kokkos { namespace Impl { -int OpenMPexec::m_map_rank[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; +int OpenMPExec::m_map_rank[ OpenMPExec::MAX_THREAD_COUNT ] = { 0 }; -int OpenMPexec::m_pool_topo[ 4 ] = { 0 }; +int OpenMPExec::m_pool_topo[ 4 ] = { 0 }; -HostThreadTeamData * OpenMPexec::m_pool[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; +HostThreadTeamData * OpenMPExec::m_pool[ OpenMPExec::MAX_THREAD_COUNT ] = { 0 }; -void OpenMPexec::verify_is_process( const char * const label ) +void OpenMPExec::verify_is_process( const char * const label ) { if ( omp_in_parallel() ) { std::string msg( label ); @@ -97,7 +99,7 @@ void OpenMPexec::verify_is_process( const char * const label ) } } -void OpenMPexec::verify_initialized( const char * const label ) +void OpenMPExec::verify_initialized( const char * const label ) { if ( 0 == m_pool[0] ) { std::string msg( label ); @@ -122,7 +124,7 @@ void OpenMPexec::verify_initialized( const char * const label ) namespace Kokkos { namespace Impl { -void OpenMPexec::clear_thread_data() +void OpenMPExec::clear_thread_data() { const size_t member_bytes = sizeof(int64_t) * @@ -149,7 +151,7 @@ void OpenMPexec::clear_thread_data() /* END #pragma omp parallel */ } -void OpenMPexec::resize_thread_data( size_t pool_reduce_bytes +void OpenMPExec::resize_thread_data( size_t pool_reduce_bytes , size_t team_reduce_bytes , size_t team_shared_bytes , size_t thread_local_bytes ) @@ -231,7 +233,7 @@ namespace Kokkos { //---------------------------------------------------------------------------- int OpenMP::is_initialized() -{ return 0 != Impl::OpenMPexec::m_pool[0]; } +{ return 0 != Impl::OpenMPExec::m_pool[0]; } void OpenMP::initialize( unsigned thread_count , unsigned use_numa_count , @@ -257,7 +259,7 @@ void OpenMP::initialize( unsigned thread_count , static int omp_max_threads = nthreads; - const bool is_initialized = 0 != Impl::OpenMPexec::m_pool[0] ; + const bool is_initialized = 0 != Impl::OpenMPExec::m_pool[0] ; bool thread_spawn_failed = false ; @@ -270,7 +272,7 @@ void OpenMP::initialize( unsigned thread_count , ( 1 < Kokkos::hwloc::get_available_numa_count() ) || ( 1 < Kokkos::hwloc::get_available_threads_per_core() ) ); - std::pair<unsigned,unsigned> threads_coord[ Impl::OpenMPexec::MAX_THREAD_COUNT ]; + std::pair<unsigned,unsigned> threads_coord[ Impl::OpenMPExec::MAX_THREAD_COUNT ]; // If hwloc available then use it's maximum value. @@ -316,16 +318,16 @@ void OpenMP::initialize( unsigned thread_count , ? Kokkos::hwloc::bind_this_thread( thread_count , threads_coord ) : omp_rank ; - Impl::OpenMPexec::m_map_rank[ omp_rank ] = thread_r ; + Impl::OpenMPExec::m_map_rank[ omp_rank ] = thread_r ; } /* END #pragma omp critical */ } /* END #pragma omp parallel */ if ( ! thread_spawn_failed ) { - Impl::OpenMPexec::m_pool_topo[0] = thread_count ; - Impl::OpenMPexec::m_pool_topo[1] = Impl::s_using_hwloc ? thread_count / use_numa_count : thread_count; - Impl::OpenMPexec::m_pool_topo[2] = Impl::s_using_hwloc ? thread_count / ( use_numa_count * use_cores_per_numa ) : 1; + Impl::OpenMPExec::m_pool_topo[0] = thread_count ; + Impl::OpenMPExec::m_pool_topo[1] = Impl::s_using_hwloc ? thread_count / use_numa_count : thread_count; + Impl::OpenMPExec::m_pool_topo[2] = Impl::s_using_hwloc ? thread_count / ( use_numa_count * use_cores_per_numa ) : 1; // New, unified host thread team data: { @@ -334,7 +336,7 @@ void OpenMP::initialize( unsigned thread_count , size_t team_shared_bytes = 1024 * thread_count ; size_t thread_local_bytes = 1024 ; - Impl::OpenMPexec::resize_thread_data( pool_reduce_bytes + Impl::OpenMPExec::resize_thread_data( pool_reduce_bytes , team_reduce_bytes , team_shared_bytes , thread_local_bytes @@ -371,15 +373,15 @@ void OpenMP::initialize( unsigned thread_count , void OpenMP::finalize() { - Impl::OpenMPexec::verify_initialized( "OpenMP::finalize" ); - Impl::OpenMPexec::verify_is_process( "OpenMP::finalize" ); + Impl::OpenMPExec::verify_initialized( "OpenMP::finalize" ); + Impl::OpenMPExec::verify_is_process( "OpenMP::finalize" ); // New, unified host thread team data: - Impl::OpenMPexec::clear_thread_data(); + Impl::OpenMPExec::clear_thread_data(); - Impl::OpenMPexec::m_pool_topo[0] = 0 ; - Impl::OpenMPexec::m_pool_topo[1] = 0 ; - Impl::OpenMPexec::m_pool_topo[2] = 0 ; + Impl::OpenMPExec::m_pool_topo[0] = 0 ; + Impl::OpenMPExec::m_pool_topo[1] = 0 ; + Impl::OpenMPExec::m_pool_topo[2] = 0 ; omp_set_num_threads(1); @@ -396,7 +398,7 @@ void OpenMP::finalize() void OpenMP::print_configuration( std::ostream & s , const bool detail ) { - Impl::OpenMPexec::verify_is_process( "OpenMP::print_configuration" ); + Impl::OpenMPExec::verify_is_process( "OpenMP::print_configuration" ); s << "Kokkos::OpenMP" ; @@ -414,12 +416,12 @@ void OpenMP::print_configuration( std::ostream & s , const bool detail ) ; #endif - const bool is_initialized = 0 != Impl::OpenMPexec::m_pool[0] ; + const bool is_initialized = 0 != Impl::OpenMPExec::m_pool[0] ; if ( is_initialized ) { - const int numa_count = Kokkos::Impl::OpenMPexec::m_pool_topo[0] / Kokkos::Impl::OpenMPexec::m_pool_topo[1] ; - const int core_per_numa = Kokkos::Impl::OpenMPexec::m_pool_topo[1] / Kokkos::Impl::OpenMPexec::m_pool_topo[2] ; - const int thread_per_core = Kokkos::Impl::OpenMPexec::m_pool_topo[2] ; + const int numa_count = Kokkos::Impl::OpenMPExec::m_pool_topo[0] / Kokkos::Impl::OpenMPExec::m_pool_topo[1] ; + const int core_per_numa = Kokkos::Impl::OpenMPExec::m_pool_topo[1] / Kokkos::Impl::OpenMPExec::m_pool_topo[2] ; + const int thread_per_core = Kokkos::Impl::OpenMPExec::m_pool_topo[2] ; s << " thread_pool_topology[ " << numa_count << " x " << core_per_numa @@ -428,7 +430,7 @@ void OpenMP::print_configuration( std::ostream & s , const bool detail ) << std::endl ; if ( detail ) { - std::vector< std::pair<unsigned,unsigned> > coord( Kokkos::Impl::OpenMPexec::m_pool_topo[0] ); + std::vector< std::pair<unsigned,unsigned> > coord( Kokkos::Impl::OpenMPExec::m_pool_topo[0] ); #pragma omp parallel { @@ -442,7 +444,7 @@ void OpenMP::print_configuration( std::ostream & s , const bool detail ) for ( unsigned i = 0 ; i < coord.size() ; ++i ) { s << " thread omp_rank[" << i << "]" - << " kokkos_rank[" << Impl::OpenMPexec::m_map_rank[ i ] << "]" + << " kokkos_rank[" << Impl::OpenMPExec::m_map_rank[ i ] << "]" << " hwloc_coord[" << coord[i].first << "." << coord[i].second << "]" << std::endl ; } @@ -457,6 +459,11 @@ int OpenMP::concurrency() { return thread_pool_size(0); } +const char* OpenMP::name() { return "OpenMP"; } + } // namespace Kokkos +#else +void KOKKOS_CORE_SRC_OPENMP_EXEC_PREVENT_LINK_ERROR() {} #endif //KOKKOS_ENABLE_OPENMP + diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp similarity index 97% rename from lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp rename to lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp index 39ace3131927d8071c50fc44dedb046bf598f0de..75b7f5da4a4bf8f2dfa2aeb0b5726a543b17823f 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp @@ -44,6 +44,9 @@ #ifndef KOKKOS_OPENMPEXEC_HPP #define KOKKOS_OPENMPEXEC_HPP +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_OPENMP ) + #include <Kokkos_OpenMP.hpp> #include <impl/Kokkos_Traits.hpp> @@ -66,7 +69,7 @@ namespace Impl { //---------------------------------------------------------------------------- /** \brief Data for OpenMP thread execution */ -class OpenMPexec { +class OpenMPExec { public: friend class Kokkos::OpenMP ; @@ -324,17 +327,21 @@ public: namespace Kokkos { +inline +bool OpenMP::in_parallel() +{ return omp_in_parallel(); } + inline int OpenMP::thread_pool_size( int depth ) { - return Impl::OpenMPexec::pool_size(depth); + return Impl::OpenMPExec::pool_size(depth); } KOKKOS_INLINE_FUNCTION int OpenMP::thread_pool_rank() { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - return Impl::OpenMPexec::m_map_rank[ omp_get_thread_num() ]; + return Impl::OpenMPExec::m_map_rank[ omp_get_thread_num() ]; #else return -1 ; #endif @@ -342,4 +349,6 @@ int OpenMP::thread_pool_rank() } // namespace Kokkos +#endif #endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */ + diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp index ecacffb77331c9d14134dc2dcc9a8eafabbc175f..c47e0fc654f78d7b9e3f46c9b2aa14bdc81e1fa3 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,9 +44,12 @@ #ifndef KOKKOS_OPENMP_PARALLEL_HPP #define KOKKOS_OPENMP_PARALLEL_HPP +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_OPENMP ) + #include <omp.h> #include <iostream> -#include <OpenMP/Kokkos_OpenMPexec.hpp> +#include <OpenMP/Kokkos_OpenMP_Exec.hpp> #include <impl/Kokkos_FunctorAdapter.hpp> //---------------------------------------------------------------------------- @@ -58,7 +61,7 @@ namespace Impl { template< class FunctorType , class ... Traits > class ParallelFor< FunctorType , Kokkos::RangePolicy< Traits ... > - , Kokkos::OpenMP + , Kokkos::OpenMP > { private: @@ -77,7 +80,7 @@ private: exec_range( const FunctorType & functor , const Member ibeg , const Member iend ) { - #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep #endif @@ -94,7 +97,7 @@ private: , const Member ibeg , const Member iend ) { const TagType t{} ; - #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep #endif @@ -111,12 +114,12 @@ public: enum { is_dynamic = std::is_same< typename Policy::schedule_type::type , Kokkos::Dynamic >::value }; - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); + OpenMPExec::verify_is_process("Kokkos::OpenMP parallel_for"); + OpenMPExec::verify_initialized("Kokkos::OpenMP parallel_for"); #pragma omp parallel { - HostThreadTeamData & data = *OpenMPexec::get_thread_data(); + HostThreadTeamData & data = *OpenMPExec::get_thread_data(); data.set_work_partition( m_policy.end() - m_policy.begin() , m_policy.chunk_size() ); @@ -200,11 +203,6 @@ private: , const Member ibeg , const Member iend , reference_type update ) { - #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION - #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - #pragma ivdep - #endif - #endif for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { functor( iwork , update ); } @@ -218,11 +216,6 @@ private: , reference_type update ) { const TagType t{} ; - #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION - #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - #pragma ivdep - #endif - #endif for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { functor( t , iwork , update ); } @@ -235,13 +228,13 @@ public: enum { is_dynamic = std::is_same< typename Policy::schedule_type::type , Kokkos::Dynamic >::value }; - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); + OpenMPExec::verify_is_process("Kokkos::OpenMP parallel_reduce"); + OpenMPExec::verify_initialized("Kokkos::OpenMP parallel_reduce"); const size_t pool_reduce_bytes = Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); - OpenMPexec::resize_thread_data( pool_reduce_bytes + OpenMPExec::resize_thread_data( pool_reduce_bytes , 0 // team_reduce_bytes , 0 // team_shared_bytes , 0 // thread_local_bytes @@ -249,7 +242,7 @@ public: #pragma omp parallel { - HostThreadTeamData & data = *OpenMPexec::get_thread_data(); + HostThreadTeamData & data = *OpenMPExec::get_thread_data(); data.set_work_partition( m_policy.end() - m_policy.begin() , m_policy.chunk_size() ); @@ -282,12 +275,12 @@ public: // Reduction: - const pointer_type ptr = pointer_type( OpenMPexec::get_thread_data(0)->pool_reduce_local() ); + const pointer_type ptr = pointer_type( OpenMPExec::get_thread_data(0)->pool_reduce_local() ); - for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) { + for ( int i = 1 ; i < OpenMPExec::pool_size() ; ++i ) { ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr - , OpenMPexec::get_thread_data(i)->pool_reduce_local() ); + , OpenMPExec::get_thread_data(i)->pool_reduce_local() ); } Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); @@ -305,7 +298,7 @@ public: inline ParallelReduce( const FunctorType & arg_functor , Policy arg_policy - , const ViewType & arg_result_view + , const ViewType & arg_view , typename std::enable_if< Kokkos::is_view< ViewType >::value && !Kokkos::is_reducer_type<ReducerType>::value @@ -313,7 +306,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( InvalidType() ) - , m_result_ptr( arg_result_view.data() ) + , m_result_ptr( arg_view.data() ) { /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value @@ -327,7 +320,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( reducer ) - , m_result_ptr( reducer.result_view().data() ) + , m_result_ptr( reducer.view().data() ) { /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value @@ -378,11 +371,6 @@ private: , const Member ibeg , const Member iend , reference_type update , const bool final ) { - #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION - #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - #pragma ivdep - #endif - #endif for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { functor( iwork , update , final ); } @@ -396,11 +384,6 @@ private: , reference_type update , const bool final ) { const TagType t{} ; - #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION - #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - #pragma ivdep - #endif - #endif for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { functor( t , iwork , update , final ); } @@ -411,13 +394,13 @@ public: inline void execute() const { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_scan"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_scan"); + OpenMPExec::verify_is_process("Kokkos::OpenMP parallel_scan"); + OpenMPExec::verify_initialized("Kokkos::OpenMP parallel_scan"); const int value_count = Analysis::value_count( m_functor ); const size_t pool_reduce_bytes = 2 * Analysis::value_size( m_functor ); - OpenMPexec::resize_thread_data( pool_reduce_bytes + OpenMPExec::resize_thread_data( pool_reduce_bytes , 0 // team_reduce_bytes , 0 // team_shared_bytes , 0 // thread_local_bytes @@ -425,7 +408,7 @@ public: #pragma omp parallel { - HostThreadTeamData & data = *OpenMPexec::get_thread_data(); + HostThreadTeamData & data = *OpenMPExec::get_thread_data(); const WorkRange range( m_policy, data.pool_rank(), data.pool_size() ); @@ -565,22 +548,22 @@ public: { enum { is_dynamic = std::is_same< SchedTag , Kokkos::Dynamic >::value }; - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); + OpenMPExec::verify_is_process("Kokkos::OpenMP parallel_for"); + OpenMPExec::verify_initialized("Kokkos::OpenMP parallel_for"); const size_t pool_reduce_size = 0 ; // Never shrinks const size_t team_reduce_size = TEAM_REDUCE_SIZE * m_policy.team_size(); const size_t team_shared_size = m_shmem_size + m_policy.scratch_size(1); const size_t thread_local_size = 0 ; // Never shrinks - OpenMPexec::resize_thread_data( pool_reduce_size + OpenMPExec::resize_thread_data( pool_reduce_size , team_reduce_size , team_shared_size , thread_local_size ); #pragma omp parallel { - HostThreadTeamData & data = *OpenMPexec::get_thread_data(); + HostThreadTeamData & data = *OpenMPExec::get_thread_data(); const int active = data.organize_team( m_policy.team_size() ); @@ -723,8 +706,8 @@ public: { enum { is_dynamic = std::is_same< SchedTag , Kokkos::Dynamic >::value }; - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); + OpenMPExec::verify_is_process("Kokkos::OpenMP parallel_reduce"); + OpenMPExec::verify_initialized("Kokkos::OpenMP parallel_reduce"); const size_t pool_reduce_size = Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); @@ -733,14 +716,14 @@ public: const size_t team_shared_size = m_shmem_size + m_policy.scratch_size(1); const size_t thread_local_size = 0 ; // Never shrinks - OpenMPexec::resize_thread_data( pool_reduce_size + OpenMPExec::resize_thread_data( pool_reduce_size , team_reduce_size , team_shared_size , thread_local_size ); #pragma omp parallel { - HostThreadTeamData & data = *OpenMPexec::get_thread_data(); + HostThreadTeamData & data = *OpenMPExec::get_thread_data(); const int active = data.organize_team( m_policy.team_size() ); @@ -785,12 +768,12 @@ public: // Reduction: - const pointer_type ptr = pointer_type( OpenMPexec::get_thread_data(0)->pool_reduce_local() ); + const pointer_type ptr = pointer_type( OpenMPExec::get_thread_data(0)->pool_reduce_local() ); - for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) { + for ( int i = 1 ; i < OpenMPExec::pool_size() ; ++i ) { ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr - , OpenMPexec::get_thread_data(i)->pool_reduce_local() ); + , OpenMPExec::get_thread_data(i)->pool_reduce_local() ); } Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); @@ -830,7 +813,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( reducer ) - , m_result_ptr( reducer.result_view().data() ) + , m_result_ptr( reducer.view().data() ) , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType > @@ -849,5 +832,6 @@ public: //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +#endif #endif /* KOKKOS_OPENMP_PARALLEL_HPP */ diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index 9144d8c2799a7db81af0886aafcff1ebcd828833..d4ade211f804982692dec8c63e75a83bea8778cd 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,15 +36,16 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include <Kokkos_Core.hpp> - +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) +#include <Kokkos_Core.hpp> + #include <impl/Kokkos_TaskQueue_impl.hpp> #include <impl/Kokkos_HostThreadTeam.hpp> @@ -110,21 +111,27 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::execute static task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + HostThreadTeamData & team_data_single = HostThreadTeamDataSingleton::singleton(); - const int team_size = Impl::OpenMPexec::pool_size(2); // Threads per core - // const int team_size = Impl::OpenMPexec::pool_size(1); // Threads per NUMA + const int team_size = Impl::OpenMPExec::pool_size(2); // Threads per core + // const int team_size = Impl::OpenMPExec::pool_size(1); // Threads per NUMA #if 0 fprintf(stdout,"TaskQueue<OpenMP> execute %d\n", team_size ); fflush(stdout); #endif + OpenMPExec::resize_thread_data( 0 /* global reduce buffer */ + , 512 * team_size /* team reduce buffer */ + , 0 /* team shared buffer */ + , 0 /* thread local buffer */ + ); #pragma omp parallel { - Impl::HostThreadTeamData & self = *Impl::OpenMPexec::get_thread_data(); + Impl::HostThreadTeamData & self = *Impl::OpenMPExec::get_thread_data(); // Organizing threads into a team performs a barrier across the // entire pool to insure proper initialization of the team @@ -164,7 +171,7 @@ fflush(stdout); if ( 0 != task && end != task ) { // team member #0 completes the previously executed task, // completion may delete the task - queue->complete( task ); + queue->complete( task ); } // If 0 == m_ready_count then set task = 0 @@ -301,7 +308,7 @@ void TaskQueueSpecialization< Kokkos::OpenMP >:: (*task->m_apply)( task , & single_exec ); - queue->complete( task ); + queue->complete( task ); } while(1); } @@ -310,7 +317,7 @@ void TaskQueueSpecialization< Kokkos::OpenMP >:: }} /* namespace Kokkos::Impl */ //---------------------------------------------------------------------------- - +#else +void KOKKOS_CORE_SRC_OPENMP_KOKKOS_OPENMP_TASK_PREVENT_LINK_ERROR() {} #endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) */ - diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index 3cfdf790bfb75165b936ce547828fd7f248f0b00..82fbef255b7fced3ab68ee962587e4dd45b646fb 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,6 +44,7 @@ #ifndef KOKKOS_IMPL_OPENMP_TASK_HPP #define KOKKOS_IMPL_OPENMP_TASK_HPP +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_TASKDAG ) //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp new file mode 100644 index 0000000000000000000000000000000000000000..57d39c3160749c82bd19e033aacfb96e2155dd4c --- /dev/null +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp @@ -0,0 +1,306 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <algorithm> +#include <omp.h> +#include <Kokkos_Macros.hpp> + +/*--------------------------------------------------------------------------*/ + +#include <stdlib.h> +#include <stdint.h> +#include <memory.h> + +#include <iostream> +#include <sstream> +#include <cstring> + +#include <Kokkos_OpenMPTargetSpace.hpp> +#include <impl/Kokkos_Error.hpp> +#include <Kokkos_Atomic.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + + +namespace Kokkos { +namespace Experimental { +/* Default allocation mechanism */ +OpenMPTargetSpace::OpenMPTargetSpace() +{} + +void * OpenMPTargetSpace::allocate( const size_t arg_alloc_size ) const +{ + static_assert( sizeof(void*) == sizeof(uintptr_t) + , "Error sizeof(void*) != sizeof(uintptr_t)" ); + + void * ptr; + + ptr = omp_target_alloc( arg_alloc_size, omp_get_default_device()); + + return ptr; +} + + +void OpenMPTargetSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_size ) const +{ + if ( arg_alloc_ptr ) { + + omp_target_free( arg_alloc_ptr , omp_get_default_device() ); + + } +} +} // namespace Experimental +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +SharedAllocationRecord< void , void > +SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >::s_root_record ; + +SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >:: +~SharedAllocationRecord() +{ + m_space.deallocate( SharedAllocationRecord< void , void >::m_alloc_ptr + , SharedAllocationRecord< void , void >::m_alloc_size + ); +} + +//TODO: Implement deep copy back see CudaSpace +std::string +SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >::get_label() const { + return std::string("OpenMPTargetAllocation"); +} + +SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >* +SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >:: +allocate( const Kokkos::Experimental::OpenMPTargetSpace & arg_space, + const std::string & arg_label , + const size_t arg_alloc_size) { + return new SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >(arg_space,arg_label,arg_alloc_size); +} + +void +SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >:: +deallocate( SharedAllocationRecord<void, void> * arg_rec ) +{ + delete static_cast<SharedAllocationRecord*>(arg_rec); +} + +SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >:: +SharedAllocationRecord( const Kokkos::Experimental::OpenMPTargetSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const SharedAllocationRecord< void , void >::function_type arg_dealloc + ) + // Pass through allocated [ SharedAllocationHeader , user_memory ] + // Pass through deallocation function + : SharedAllocationRecord< void , void > + ( & SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >::s_root_record + , reinterpret_cast<SharedAllocationHeader*>( arg_space.allocate( sizeof(SharedAllocationHeader) + arg_alloc_size ) ) + , sizeof(SharedAllocationHeader) + arg_alloc_size + , arg_dealloc + ) + , m_space( arg_space ) +{ + SharedAllocationHeader header; + + header.m_record = static_cast< SharedAllocationRecord< void , void > * >( this ); + + strncpy( header.m_label + , arg_label.c_str() + , SharedAllocationHeader::maximum_label_length + ); + + //TODO DeepCopy + // DeepCopy + +} + +//---------------------------------------------------------------------------- + +void * SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >:: +allocate_tracked( const Kokkos::Experimental::OpenMPTargetSpace & arg_space + , const std::string & arg_alloc_label + , const size_t arg_alloc_size ) +{ + if ( ! arg_alloc_size ) return (void *) 0 ; + + SharedAllocationRecord * const r = + allocate( arg_space , arg_alloc_label , arg_alloc_size ); + + RecordBase::increment( r ); + + return r->data(); +} + +void SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >:: +deallocate_tracked( void * const arg_alloc_ptr ) +{ + if ( arg_alloc_ptr != 0 ) { + SharedAllocationRecord * const r = get_record( arg_alloc_ptr ); + + RecordBase::decrement( r ); + } +} + +void * SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >:: +reallocate_tracked( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) +{ + SharedAllocationRecord * const r_old = get_record( arg_alloc_ptr ); + SharedAllocationRecord * const r_new = allocate( r_old->m_space , r_old->get_label() , arg_alloc_size ); + + //Kokkos::Impl::DeepCopy<OpenMPTargetSpace,OpenMPTargetSpace>( r_new->data() , r_old->data() + // , std::min( r_old->size() , r_new->size() ) ); + + RecordBase::increment( r_new ); + RecordBase::decrement( r_old ); + + return r_new->data(); +} + +SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void > * +SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >::get_record( void * alloc_ptr ) +{ + typedef SharedAllocationHeader Header ; + typedef SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void > RecordHost ; + + SharedAllocationHeader const * const head = alloc_ptr ? Header::get_header( alloc_ptr ) : (SharedAllocationHeader *)0 ; + RecordHost * const record = head ? static_cast< RecordHost * >( head->m_record ) : (RecordHost *) 0 ; + + if ( ! alloc_ptr || record->m_alloc_ptr != head ) { + Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >::get_record ERROR" ) ); + } + + return record ; +} + +// Iterate records to print orphaned memory ... +void SharedAllocationRecord< Kokkos::Experimental::OpenMPTargetSpace , void >:: +print_records( std::ostream & s , const Kokkos::Experimental::OpenMPTargetSpace & space , bool detail ) +{ + SharedAllocationRecord< void , void >::print_host_accessible_records( s , "OpenMPTargetSpace" , & s_root_record , detail ); +} + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +template< class > +struct ViewOperatorBoundsErrorAbort ; + +template<> +struct ViewOperatorBoundsErrorAbort< Kokkos::Experimental::OpenMPTargetSpace > { + static void apply( const size_t rank + , const size_t n0 , const size_t n1 + , const size_t n2 , const size_t n3 + , const size_t n4 , const size_t n5 + , const size_t n6 , const size_t n7 + , const size_t i0 , const size_t i1 + , const size_t i2 , const size_t i3 + , const size_t i4 , const size_t i5 + , const size_t i6 , const size_t i7 ); +}; + +void ViewOperatorBoundsErrorAbort< Kokkos::Experimental::OpenMPTargetSpace >:: +apply( const size_t rank + , const size_t n0 , const size_t n1 + , const size_t n2 , const size_t n3 + , const size_t n4 , const size_t n5 + , const size_t n6 , const size_t n7 + , const size_t i0 , const size_t i1 + , const size_t i2 , const size_t i3 + , const size_t i4 , const size_t i5 + , const size_t i6 , const size_t i7 ) +{ + printf( "View operator bounds error : rank(%lu) dim(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu) index(%lu,%lu,%lu,%lu,%lu,%lu,%lu,%lu)" + , rank , n0 , n1 , n2 , n3 , n4 , n5 , n6 , n7 + , i0 , i1 , i2 , i3 , i4 , i5 , i6 , i7 ); + //Kokkos::Impl::throw_runtime_exception( buffer ); +} + +} // namespace Impl +} // namespace Kokkos + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ +/* +namespace Kokkos { +namespace { + const unsigned HOST_SPACE_ATOMIC_MASK = 0xFFFF; + const unsigned HOST_SPACE_ATOMIC_XOR_MASK = 0x5A39; + static int HOST_SPACE_ATOMIC_LOCKS[HOST_SPACE_ATOMIC_MASK+1]; +} + +namespace Impl { +void init_lock_array_host_space() { + static int is_initialized = 0; + if(! is_initialized) + for(int i = 0; i < static_cast<int> (HOST_SPACE_ATOMIC_MASK+1); i++) + HOST_SPACE_ATOMIC_LOCKS[i] = 0; +} + +bool lock_address_host_space(void* ptr) { + return 0 == atomic_compare_exchange( &HOST_SPACE_ATOMIC_LOCKS[ + (( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] , + 0 , 1); +} + +void unlock_address_host_space(void* ptr) { + atomic_exchange( &HOST_SPACE_ATOMIC_LOCKS[ + (( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] , + 0); +} + +} +}*/ diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp new file mode 100644 index 0000000000000000000000000000000000000000..447245ea9a4c7dff78c466146bc1c184563e0f10 --- /dev/null +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp @@ -0,0 +1,273 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdio.h> +#include <limits> +#include <iostream> +#include <vector> +#include <Kokkos_Core.hpp> +#include <impl/Kokkos_Error.hpp> +#include <iostream> +#include <impl/Kokkos_CPUDiscovery.hpp> +#include <impl/Kokkos_Profiling_Interface.hpp> + +#ifdef KOKKOS_ENABLE_OPENMPTARGET + +namespace Kokkos { +namespace Impl { +namespace { + +KOKKOS_INLINE_FUNCTION +int kokkos_omp_in_parallel(); + +int kokkos_omp_in_critical_region = ( Kokkos::HostSpace::register_in_parallel( kokkos_omp_in_parallel ) , 0 ); + +KOKKOS_INLINE_FUNCTION +int kokkos_omp_in_parallel() +{ +#ifndef __CUDA_ARCH__ + return omp_in_parallel() && ! kokkos_omp_in_critical_region ; +#else + return 0; +#endif +} + +bool s_using_hwloc = false; + +} // namespace +} // namespace Impl +} // namespace Kokkos + + +namespace Kokkos { +namespace Experimental { +bool OpenMPTarget::m_is_initialized = false; +} +} + +namespace Kokkos { +namespace Impl { + + +//int OpenMPTargetExec::m_map_rank[ OpenMPTargetExec::MAX_THREAD_COUNT ] = { 0 }; + +//int OpenMPTargetExec::m_pool_topo[ 4 ] = { 0 }; + +//OpenMPTargetExec * OpenMPTargetExec::m_pool[ OpenMPTargetExec::MAX_THREAD_COUNT ] = { 0 }; + +void OpenMPTargetExec::verify_is_process( const char * const label ) +{ + if ( omp_in_parallel() ) { + std::string msg( label ); + msg.append( " ERROR: in parallel" ); + Kokkos::Impl::throw_runtime_exception( msg ); + } +} + +void OpenMPTargetExec::verify_initialized( const char * const label ) +{ + if ( 0 == Kokkos::Experimental::OpenMPTarget::is_initialized() ) { + std::string msg( label ); + msg.append( " ERROR: not initialized" ); + Kokkos::Impl::throw_runtime_exception( msg ); + } + + if ( omp_get_max_threads() != Kokkos::Experimental::OpenMPTarget::thread_pool_size(0) ) { + std::string msg( label ); + msg.append( " ERROR: Initialized but threads modified inappropriately" ); + Kokkos::Impl::throw_runtime_exception( msg ); + } + +} + +void* OpenMPTargetExec::m_scratch_ptr = NULL; +int64_t OpenMPTargetExec::m_scratch_size = 0; + +void OpenMPTargetExec::clear_scratch() +{ + Kokkos::Experimental::OpenMPTargetSpace space; + space.deallocate(m_scratch_ptr,m_scratch_size); + m_scratch_ptr = NULL; + m_scratch_size = NULL; +} + +void* OpenMPTargetExec::get_scratch_ptr() { return m_scratch_ptr; } + +void OpenMPTargetExec::resize_scratch( int64_t reduce_bytes , + int64_t team_reduce_bytes, + int64_t team_shared_bytes, int64_t thread_local_bytes) +{ + Kokkos::Experimental::OpenMPTargetSpace space; + uint64_t total_size = MAX_ACTIVE_TEAMS * reduce_bytes + // Inter Team Reduction + MAX_ACTIVE_TEAMS * team_reduce_bytes + // Intra Team Reduction + MAX_ACTIVE_TEAMS * team_shared_bytes + // Team Local Scratch + MAX_ACTIVE_THREADS * thread_local_bytes; // Thread Private Scratch + + if( total_size > m_scratch_size ) { + space.deallocate(m_scratch_ptr,m_scratch_size); + m_scratch_size = total_size; + m_scratch_ptr = space.allocate(total_size); + } + +} +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { +//---------------------------------------------------------------------------- + +int OpenMPTarget::is_initialized() +{ return m_is_initialized; }// != Impl::OpenMPTargetExec::m_pool[0]; } + +void OpenMPTarget::initialize( unsigned thread_count , + unsigned use_numa_count , + unsigned use_cores_per_numa ) +{ + // Before any other call to OMP query the maximum number of threads + // and save the value for re-initialization unit testing. + + + // Init the array for used for arbitrarily sized atomics + Kokkos::Impl::init_lock_array_host_space(); + + #ifdef KOKKOS_ENABLE_PROFILING + Kokkos::Profiling::initialize(); + #endif + m_is_initialized = true; +} + +//---------------------------------------------------------------------------- + +void OpenMPTarget::finalize() +{ + Kokkos::Impl::OpenMPTargetExec::verify_initialized( "OpenMPTarget::finalize" ); + Kokkos::Impl::OpenMPTargetExec::verify_is_process( "OpenMPTarget::finalize" ); + + m_is_initialized = false; + + omp_set_num_threads(1); + + if ( Kokkos::Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads() ) { + hwloc::unbind_this_thread(); + } + + #ifdef KOKKOS_ENABLE_PROFILING + Kokkos::Profiling::finalize(); + #endif +} + +//---------------------------------------------------------------------------- + +void OpenMPTarget::print_configuration( std::ostream & s , const bool detail ) +{ + Kokkos::Impl::OpenMPTargetExec::verify_is_process( "OpenMPTarget::print_configuration" ); +/* + s << "Kokkos::Experimental::OpenMPTarget" ; + +#if defined( KOKKOS_ENABLE_OPENMPTARGET ) + s << " KOKKOS_ENABLE_OPENMPTARGET" ; +#endif +#if defined( KOKKOS_HAVE_HWLOC ) + + const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + s << " hwloc[" << numa_count_ << "x" << cores_per_numa << "x" << threads_per_core << "]" + << " hwloc_binding_" << ( Impl::s_using_hwloc ? "enabled" : "disabled" ) + ; +#endif + + const bool is_initialized = 0 != Impl::OpenMPTargetExec::m_pool[0] ; + + if ( is_initialized ) { + const int numa_count = Kokkos::Impl::OpenMPTargetExec::m_pool_topo[0] / Kokkos::Impl::OpenMPTargetExec::m_pool_topo[1] ; + const int core_per_numa = Kokkos::Impl::OpenMPTargetExec::m_pool_topo[1] / Kokkos::Impl::OpenMPTargetExec::m_pool_topo[2] ; + const int thread_per_core = Kokkos::Impl::OpenMPTargetExec::m_pool_topo[2] ; + + s << " thread_pool_topology[ " << numa_count + << " x " << core_per_numa + << " x " << thread_per_core + << " ]" + << std::endl ; + + if ( detail ) { + std::vector< std::pair<unsigned,unsigned> > coord( Kokkos::Impl::OpenMPTargetExec::m_pool_topo[0] ); + +#pragma omp parallel + { +#pragma omp critical + { + coord[ omp_get_thread_num() ] = hwloc::get_this_thread_coordinate(); + } +// END #pragma omp critical + } +// END #pragma omp parallel + + for ( unsigned i = 0 ; i < coord.size() ; ++i ) { + s << " thread omp_rank[" << i << "]" + << " kokkos_rank[" << Impl::OpenMPTargetExec::m_map_rank[ i ] << "]" + << " hwloc_coord[" << coord[i].first << "." << coord[i].second << "]" + << std::endl ; + } + } + } + else { + s << " not initialized" << std::endl ; + } +*/ +} + +int OpenMPTarget::concurrency() { + return thread_pool_size(0); +} + +const char* OpenMPTarget::name() { return "OpenMPTarget"; } +} // namespace Experimental +} // namespace Kokkos + +#endif //KOKKOS_ENABLE_OPENMPTARGET diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bec7844ed649ba11d919eeb94f6a6f50ef56a2fb --- /dev/null +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp @@ -0,0 +1,727 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMPTARGETEXEC_HPP +#define KOKKOS_OPENMPTARGETEXEC_HPP + +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_spinwait.hpp> + +#include <Kokkos_Atomic.hpp> +#include <iostream> +#include <sstream> +#include <fstream> +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- +/** \brief Data for OpenMPTarget thread execution */ + + +class OpenMPTargetExec { +public: + enum { MAX_ACTIVE_THREADS = 256*8*56*4 }; + enum { MAX_ACTIVE_TEAMS = MAX_ACTIVE_THREADS/32 }; + +private: + static void* scratch_ptr; + +public: + static void verify_is_process( const char * const ); + static void verify_initialized( const char * const ); + + static void* get_scratch_ptr(); + static void clear_scratch(); + static void resize_scratch( int64_t reduce_bytes , int64_t team_reduce_bytes, int64_t team_shared_bytes, int64_t thread_local_bytes ); + + static void* m_scratch_ptr; + static int64_t m_scratch_size; +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +class OpenMPTargetExecTeamMember { +public: + + enum { TEAM_REDUCE_SIZE = 512 }; + + /** \brief Thread states for team synchronization */ + enum { Active = 0 , Rendezvous = 1 }; + + typedef Kokkos::Experimental::OpenMPTarget execution_space ; + typedef execution_space::scratch_memory_space scratch_memory_space ; + + scratch_memory_space m_team_shared ; + int m_team_scratch_size[2] ; + int m_team_rank ; + int m_team_size ; + int m_league_rank ; + int m_league_size ; + int m_vector_length ; + int m_vector_lane ; + void* m_glb_scratch ; + + /* + // Fan-in team threads, root of the fan-in which does not block returns true + inline + bool team_fan_in() const + { + memory_fence(); + for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) { + + m_exec.pool_rev( m_team_base_rev + j )->state_wait( Active ); + } + + if ( m_team_rank_rev ) { + m_exec.state_set( Rendezvous ); + memory_fence(); + m_exec.state_wait( Rendezvous ); + } + + return 0 == m_team_rank_rev ; + } + + inline + void team_fan_out() const + { + memory_fence(); + for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) { + m_exec.pool_rev( m_team_base_rev + j )->state_set( Active ); + memory_fence(); + } + } + */ +public: + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& team_shmem() const + { return m_team_shared.set_team_thread_mode(0,1,0) ; } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& team_scratch(int) const + { return m_team_shared.set_team_thread_mode(0,1,0) ; } + + KOKKOS_INLINE_FUNCTION + const execution_space::scratch_memory_space& thread_scratch(int) const + { return m_team_shared.set_team_thread_mode(0,team_size(),team_rank()) ; } + + KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } + KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } + KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; } + KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; } + + KOKKOS_INLINE_FUNCTION void team_barrier() const + { + #pragma omp barrier + } + + template<class ValueType> + KOKKOS_INLINE_FUNCTION + void team_broadcast(ValueType& value, const int& thread_id) const + { +/*#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { } +#else + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE + , ValueType , void >::type type ; + + type * const local_value = ((type*) m_exec.scratch_thread()); + if(team_rank() == thread_id) + *local_value = value; + memory_fence(); + team_barrier(); + value = *local_value; +#endif*/ + } + + template< class ValueType, class JoinOp > + KOKKOS_INLINE_FUNCTION ValueType + team_reduce( const ValueType & value + , const JoinOp & op_in ) const { + + #pragma omp barrier + + typedef ValueType value_type; + const JoinLambdaAdapter<value_type,JoinOp> op(op_in); + + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE + , value_type , void >::type type ; + + const int n_values = TEAM_REDUCE_SIZE/sizeof(value_type); + type * team_scratch = (type*) ((char*)m_glb_scratch + TEAM_REDUCE_SIZE*omp_get_team_num()); + for(int i = m_team_rank; i < n_values; i+= m_team_size) { + team_scratch[i] = value_type(); + } + + #pragma omp barrier + + for(int k=0; k<m_team_size; k+=n_values) { + if((k <= m_team_rank) && (k+n_values > m_team_rank)) + team_scratch[m_team_rank%n_values]+=value; + #pragma omp barrier + } + + for(int d = 1; d<n_values;d*=2) { + if((m_team_rank+d<n_values) && (m_team_rank%(2*d)==0)) { + team_scratch[m_team_rank] += team_scratch[m_team_rank+d]; + } + #pragma omp barrier + } + return team_scratch[0]; + } + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the + * league's parallel execution, be the scan's total. + * Parallel execution ordering of the league's teams is non-deterministic. + * As such the base value for each team's scan operation is similarly + * non-deterministic. + */ + template< typename ArgType > + KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value , ArgType * const global_accum ) const + { + /* // Make sure there is enough scratch space: + typedef typename if_c< sizeof(ArgType) < TEAM_REDUCE_SIZE , ArgType , void >::type type ; + + volatile type * const work_value = ((type*) m_exec.scratch_thread()); + + *work_value = value ; + + memory_fence(); + + if ( team_fan_in() ) { + // The last thread to synchronize returns true, all other threads wait for team_fan_out() + // m_team_base[0] == highest ranking team member + // m_team_base[ m_team_size - 1 ] == lowest ranking team member + // + // 1) copy from lower to higher rank, initialize lowest rank to zero + // 2) prefix sum from lowest to highest rank, skipping lowest rank + + type accum = 0 ; + + if ( global_accum ) { + for ( int i = m_team_size ; i-- ; ) { + type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()); + accum += val ; + } + accum = atomic_fetch_add( global_accum , accum ); + } + + for ( int i = m_team_size ; i-- ; ) { + type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()); + const type offset = accum ; + accum += val ; + val = offset ; + } + + memory_fence(); + } + + team_fan_out(); + + return *work_value ;*/ + return ArgType(); + } + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value ; + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const + { return this-> template team_scan<Type>( value , 0 ); } + + //---------------------------------------- + // Private for the driver + +private: + + typedef execution_space::scratch_memory_space space ; + +public: + + inline + OpenMPTargetExecTeamMember( const int league_rank, const int league_size, const int team_size, const int vector_length //const TeamPolicyInternal< OpenMPTarget, Properties ...> & team + , void* const glb_scratch + , const int shmem_size_L1 + , const int shmem_size_L2 + ) + : m_team_shared(0,0) + , m_team_scratch_size{ shmem_size_L1 , shmem_size_L2 } + , m_team_rank(0) + , m_vector_length( vector_length ) + , m_team_size( team_size ) + , m_league_rank( league_rank ) + , m_league_size( league_size ) + , m_glb_scratch( glb_scratch ) + { + const int omp_tid = omp_get_thread_num(); + m_league_rank = league_rank; + m_team_rank = omp_tid/m_vector_length; + m_vector_lane = omp_tid%m_vector_length; + } + + static inline int team_reduce_size() { return TEAM_REDUCE_SIZE ; } +}; + + + +template< class ... Properties > +class TeamPolicyInternal< Kokkos::Experimental::OpenMPTarget, Properties ... >: public PolicyTraits<Properties ...> +{ +public: + + //! Tag this class as a kokkos execution policy + typedef TeamPolicyInternal execution_policy ; + + typedef PolicyTraits<Properties ... > traits; + + TeamPolicyInternal& operator = (const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_vector_length = p.m_vector_length; + m_team_alloc = p.m_team_alloc; + m_team_iter = p.m_team_iter; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + return *this; + } + + //---------------------------------------- + + template< class FunctorType > + inline static + int team_size_max( const FunctorType & ) + { return 1024; } + + template< class FunctorType > + inline static + int team_size_recommended( const FunctorType & ) + { return 256; } + + template< class FunctorType > + inline static + int team_size_recommended( const FunctorType &, const int& vector_length) + { return 256/vector_length; } + + //---------------------------------------- + +private: + + int m_league_size ; + int m_team_size ; + int m_vector_length; + int m_team_alloc ; + int m_team_iter ; + + size_t m_team_scratch_size[2]; + size_t m_thread_scratch_size[2]; + + int m_chunk_size; + + inline void init( const int league_size_request + , const int team_size_request + , const int vector_length_request ) + { + m_league_size = league_size_request ; + + m_team_size = team_size_request; + + m_vector_length = vector_length_request; + + set_auto_chunk_size(); + } + +public: + + inline int vector_length() const { return m_vector_length ; } + inline int team_size() const { return m_team_size ; } + inline int league_size() const { return m_league_size ; } + inline size_t scratch_size(const int& level, int team_size_ = -1) const { + if(team_size_ < 0) + team_size_ = m_team_size; + return m_team_scratch_size[level] + team_size_*m_thread_scratch_size[level] ; + } + + /** \brief Specify league size, request team size */ + TeamPolicyInternal( typename traits::execution_space & + , int league_size_request + , int team_size_request + , int vector_length_request = 1 ) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init( league_size_request , team_size_request , vector_length_request); } + + TeamPolicyInternal( typename traits::execution_space & + , int league_size_request + , const Kokkos::AUTO_t & /* team_size_request */ + , int vector_length_request = 1) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init( league_size_request , 256/vector_length_request , vector_length_request ); } + + TeamPolicyInternal( int league_size_request + , int team_size_request + , int vector_length_request = 1 ) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init( league_size_request , team_size_request , vector_length_request); } + + TeamPolicyInternal( int league_size_request + , const Kokkos::AUTO_t & /* team_size_request */ + , int vector_length_request = 1 ) + : m_team_scratch_size { 0 , 0 } + , m_thread_scratch_size { 0 , 0 } + , m_chunk_size(0) + { init( league_size_request , 256/vector_length_request , vector_length_request ); } + + inline int team_alloc() const { return m_team_alloc ; } + inline int team_iter() const { return m_team_iter ; } + + inline int chunk_size() const { return m_chunk_size ; } + + /** \brief set chunk_size to a discrete value*/ + inline TeamPolicyInternal set_chunk_size(typename traits::index_type chunk_size_) const { + TeamPolicyInternal p = *this; + p.m_chunk_size = chunk_size_; + return p; + } + + inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team) const { + TeamPolicyInternal p = *this; + p.m_team_scratch_size[level] = per_team.value; + return p; + }; + + inline TeamPolicyInternal set_scratch_size(const int& level, const PerThreadValue& per_thread) const { + TeamPolicyInternal p = *this; + p.m_thread_scratch_size[level] = per_thread.value; + return p; + }; + + inline TeamPolicyInternal set_scratch_size(const int& level, const PerTeamValue& per_team, const PerThreadValue& per_thread) const { + TeamPolicyInternal p = *this; + p.m_team_scratch_size[level] = per_team.value; + p.m_thread_scratch_size[level] = per_thread.value; + return p; + }; + +private: + /** \brief finalize chunk_size if it was set to AUTO*/ + inline void set_auto_chunk_size() { + + int concurrency = traits::execution_space::thread_pool_size(0)/m_team_alloc; + if( concurrency==0 ) concurrency=1; + + if(m_chunk_size > 0) { + if(!Impl::is_integral_power_of_two( m_chunk_size )) + Kokkos::abort("TeamPolicy blocking granularity must be power of two" ); + } + + int new_chunk_size = 1; + while(new_chunk_size*100*concurrency < m_league_size) + new_chunk_size *= 2; + if(new_chunk_size < 128) { + new_chunk_size = 1; + while( (new_chunk_size*40*concurrency < m_league_size ) && (new_chunk_size<128) ) + new_chunk_size*=2; + } + m_chunk_size = new_chunk_size; + } + +public: + typedef Impl::OpenMPTargetExecTeamMember member_type ; +}; +} // namespace Impl + + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Experimental { + +inline +int OpenMPTarget::thread_pool_size( int depth ) +{ + //return Impl::OpenMPTargetExec::pool_size(depth); + return omp_get_max_threads(); +} + +KOKKOS_INLINE_FUNCTION +int OpenMPTarget::thread_pool_rank() +{ + return omp_get_thread_num(); +} + +} // namespace Experimental +} // namespace Kokkos + + +namespace Kokkos { + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember> + TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& count) { + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember>(thread,count); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember> + TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& begin, const iType& end) { + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember>(thread,begin,end); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember > + ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& count) { + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember >(thread,count); +} + +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct<Impl::OpenMPTargetExecTeamMember> PerTeam(const Impl::OpenMPTargetExecTeamMember& thread) { + return Impl::ThreadSingleStruct<Impl::OpenMPTargetExecTeamMember>(thread); +} + +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct<Impl::OpenMPTargetExecTeamMember> PerThread(const Impl::OpenMPTargetExecTeamMember& thread) { + return Impl::VectorSingleStruct<Impl::OpenMPTargetExecTeamMember>(thread); +} +} // namespace Kokkos + +namespace Kokkos { + + /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember>& loop_boundaries, const Lambda& lambda) { + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda & lambda, ValueType& result) { + + result = ValueType(); + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + result+=tmp; + } + + //result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>()); +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember>& loop_boundaries, + const Lambda & lambda, const JoinType& join, ValueType& init_result) { + + ValueType result = init_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + + //init_result = loop_boundaries.thread.team_reduce(result,join); +} + +} //namespace Kokkos + + +namespace Kokkos { +/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. + * This functionality requires C++11 support.*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember >& + loop_boundaries, const Lambda& lambda) { + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) + lambda(i); +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of + * val is performed and put into result. This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember >& + loop_boundaries, const Lambda & lambda, ValueType& result) { + result = ValueType(); + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + result+=tmp; + } +} + +/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of + * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. + * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore + * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or + * '1 for *'). This functionality requires C++11 support.*/ +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember >& + loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { + + ValueType result = init_result; +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + ValueType tmp = ValueType(); + lambda(i,tmp); + join(result,tmp); + } + init_result = result; +} + +/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) + * for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. + * Depending on the target execution space the operator might be called twice: once with final=false + * and once with final=true. When final==true val contains the prefix sum value. The contribution of this + * "i" needs to be added to val no matter whether final==true or not. In a serial execution + * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set + * to the final sum value over all vector lanes. + * This functionality requires C++11 support.*/ +template< typename iType, class FunctorType > +KOKKOS_INLINE_FUNCTION +void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember >& + loop_boundaries, const FunctorType & lambda) { + + typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; + typedef typename ValueTraits::value_type value_type ; + + value_type scan_val = value_type(); + +#ifdef KOKKOS_HAVE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,scan_val,true); + } +} + +} // namespace Kokkos + +namespace Kokkos { + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::OpenMPTargetExecTeamMember>& single_struct, const FunctorType& lambda) { + lambda(); +} + +template<class FunctorType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::OpenMPTargetExecTeamMember>& single_struct, const FunctorType& lambda) { + if(single_struct.team_member.team_rank()==0) lambda(); +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::VectorSingleStruct<Impl::OpenMPTargetExecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { + lambda(val); +} + +template<class FunctorType, class ValueType> +KOKKOS_INLINE_FUNCTION +void single(const Impl::ThreadSingleStruct<Impl::OpenMPTargetExecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { + if(single_struct.team_member.team_rank()==0) { + lambda(val); + } + single_struct.team_member.team_broadcast(val,0); +} +} + +#endif /* #ifndef KOKKOS_OPENMPTARGETEXEC_HPP */ + diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b624384e7cb8716c02b745a5a5cf0d78360da93d --- /dev/null +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp @@ -0,0 +1,767 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMPTARGET_PARALLEL_HPP +#define KOKKOS_OPENMPTARGET_PARALLEL_HPP + +#include <omp.h> +#include <iostream> +#include <Kokkos_Parallel.hpp> +#include <OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp> +#include <impl/Kokkos_FunctorAdapter.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::Experimental::OpenMPTarget + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + const FunctorType m_functor ; + const Policy m_policy ; + + +public: + + inline void execute() const { + execute_impl<WorkTag>(); + } + + template< class TagType > + inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + execute_impl() const + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename Policy::member_type begin = m_policy.begin(); + const typename Policy::member_type end = m_policy.end(); + + #pragma omp target teams distribute parallel for map(to:this->m_functor) + for(int i=begin; i<end; i++) + m_functor(i); + } + + + template< class TagType > + inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + execute_impl() const + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename Policy::member_type begin = m_policy.begin(); + const typename Policy::member_type end = m_policy.end(); + + #pragma omp target teams distribute parallel for num_threads(128) map(to:this->m_functor) + for(int i=begin; i<end; i++) + m_functor(TagType(),i); + } + + inline + ParallelFor( const FunctorType & arg_functor + , Policy arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + {} +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<class FunctorType, class PolicyType, class ReducerType, class PointerType, class ValueType, int FunctorHasJoin, int UseReducerType> +struct ParallelReduceSpecialize { + static inline void execute(const FunctorType& f, const PolicyType& p , PointerType result_ptr) { + printf("Error: Invalid Specialization %i %i\n",FunctorHasJoin,UseReducerType); + } +}; + +template<class FunctorType, class ReducerType, class PointerType, class ValueType, class ... PolicyArgs> +struct ParallelReduceSpecialize<FunctorType, Kokkos::RangePolicy<PolicyArgs...>, ReducerType, PointerType, ValueType, 0,0> { + typedef Kokkos::RangePolicy<PolicyArgs...> PolicyType; + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + execute_impl(const FunctorType& f, const PolicyType& p, PointerType result_ptr) + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename PolicyType::member_type begin = p.begin(); + const typename PolicyType::member_type end = p.end(); + + ValueType result = ValueType(); + #pragma omp target teams distribute parallel for num_teams(512) map(to:f) map(tofrom:result) reduction(+: result) + for(int i=begin; i<end; i++) + f(i,result); + + *result_ptr=result; + } + + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + execute_impl(const FunctorType& f, const PolicyType& p, PointerType result_ptr) + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename PolicyType::member_type begin = p.begin(); + const typename PolicyType::member_type end = p.end(); + + ValueType result = ValueType(); + #pragma omp target teams distribute parallel for num_teams(512) map(to:f) map(tofrom: result) reduction(+: result) + for(int i=begin; i<end; i++) + f(TagType(),i,result); + + *result_ptr=result; + } + + + inline static + void execute(const FunctorType& f, const PolicyType& p, PointerType ptr) { + execute_impl<typename PolicyType::work_tag>(f,p,ptr); + } +}; +/* +template<class FunctorType, class PolicyType, class ReducerType, class PointerType, class ValueType> +struct ParallelReduceSpecialize<FunctorType, PolicyType, ReducerType, PointerType, ValueType, 0,1> { + + #pragma omp declare reduction(custom: ValueType : ReducerType::join(omp_out, omp_in)) initializer ( ReducerType::init(omp_priv) ) + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + execute_impl(const FunctorType& f, const PolicyType& p, PointerType result_ptr) + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename PolicyType::member_type begin = p.begin(); + const typename PolicyType::member_type end = p.end(); + + ValueType result = ValueType(); + #pragma omp target teams distribute parallel for num_teams(512) map(to:f) map(tofrom:result) reduction(custom: result) + for(int i=begin; i<end; i++) + f(i,result); + + *result_ptr=result; + } + + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + execute_impl(const FunctorType& f, const PolicyType& p, PointerType result_ptr) + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + const typename PolicyType::member_type begin = p.begin(); + const typename PolicyType::member_type end = p.end(); + + ValueType result = ValueType(); + #pragma omp target teams distribute parallel for num_teams(512) map(to:f) map(tofrom: result) reduction(custom: result) + for(int i=begin; i<end; i++) + f(TagType(),i,result); + + *result_ptr=result; + } + + + inline static + void execute(const FunctorType& f, const PolicyType& p, PointerType ptr) { + execute_impl<typename PolicyType::work_tag>(f,p,ptr); + } +}; +*/ + +template< class FunctorType , class ReducerType, class ... Traits > +class ParallelReduce< FunctorType + , Kokkos::RangePolicy< Traits ...> + , ReducerType + , Kokkos::Experimental::OpenMPTarget + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + // Static Assert WorkTag void if ReducerType not InvalidType + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + + enum {HasJoin = ReduceFunctorHasJoin<FunctorType>::value }; + enum {UseReducer = is_reducer_type<ReducerType>::value }; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + typedef ParallelReduceSpecialize<FunctorType,Policy,ReducerType,pointer_type,typename ValueTraits::value_type,HasJoin,UseReducer> ParForSpecialize; + + const FunctorType m_functor ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + +public: + inline void execute() const { + ParForSpecialize::execute(m_functor,m_policy,m_result_ptr); + } + + template< class ViewType > + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ViewType & arg_result_view + , typename std::enable_if< + Kokkos::is_view< ViewType >::value && + !Kokkos::is_reducer_type<ReducerType>::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result_view.data() ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::Experimental::OpenMPTarget must be a Kokkos::View in HostSpace" );*/ + } + + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::Experimental::OpenMPTarget must be a Kokkos::View in HostSpace" );*/ + } + +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelScan< FunctorType + , Kokkos::RangePolicy< Traits ... > + , Kokkos::Experimental::OpenMPTarget + > +{ +private: + + typedef Kokkos::RangePolicy< Traits ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< FunctorType, WorkTag > ValueJoin ; + typedef Kokkos::Impl::FunctorValueOps< FunctorType, WorkTag > ValueOps ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + const FunctorType m_functor ; + const Policy m_policy ; +/* + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update , const bool final ) + { + #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + #endif + for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { + functor( iwork , update , final ); + } + } + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_range( const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update , const bool final ) + { + const TagType t{} ; + #ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_HAVE_PRAGMA_IVDEP + #pragma ivdep + #endif + #endif + for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { + functor( t , iwork , update , final ); + } + } +*/ +public: + + inline + void execute() const + { +/* OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_scan"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_scan"); + + OpenMPTargetExec::resize_scratch( 2 * ValueTraits::value_size( m_functor ) , 0 ); + +#pragma omp parallel + { + OpenMPTargetExec & exec = * OpenMPTargetExec::get_thread_omp(); + const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + const pointer_type ptr = + pointer_type( exec.scratch_reduce() ) + + ValueTraits::value_count( m_functor ); + ParallelScan::template exec_range< WorkTag > + ( m_functor , range.begin() , range.end() + , ValueInit::init( m_functor , ptr ) , false ); + } + + { + const unsigned thread_count = OpenMPTargetExec::pool_size(); + const unsigned value_count = ValueTraits::value_count( m_functor ); + + pointer_type ptr_prev = 0 ; + + for ( unsigned rank_rev = thread_count ; rank_rev-- ; ) { + + pointer_type ptr = pointer_type( OpenMPTargetExec::pool_rev(rank_rev)->scratch_reduce() ); + + if ( ptr_prev ) { + for ( unsigned i = 0 ; i < value_count ; ++i ) { ptr[i] = ptr_prev[ i + value_count ] ; } + ValueJoin::join( m_functor , ptr + value_count , ptr ); + } + else { + ValueInit::init( m_functor , ptr ); + } + + ptr_prev = ptr ; + } + } + +#pragma omp parallel + { + OpenMPTargetExec & exec = * OpenMPTargetExec::get_thread_omp(); + const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + const pointer_type ptr = pointer_type( exec.scratch_reduce() ); + ParallelScan::template exec_range< WorkTag > + ( m_functor , range.begin() , range.end() + , ValueOps::reference( ptr ) , true ); + } +*/ + } + + //---------------------------------------- + + inline + ParallelScan( const FunctorType & arg_functor + , const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + {} + + //---------------------------------------- +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Properties > +class ParallelFor< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , Kokkos::Experimental::OpenMPTarget + > +{ +private: + + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Experimental::OpenMPTarget, Properties ... > Policy ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + + const FunctorType m_functor ; + const Policy m_policy ; + const int m_shmem_size ; + +public: + + inline void execute() const { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + execute_impl<WorkTag>(); + } + +private: + template< class TagType > + inline + typename std::enable_if< std::is_same< TagType , void >::value >::type + execute_impl() const + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + const int league_size = m_policy.league_size(); + const int team_size = m_policy.team_size(); + const int vector_length = m_policy.vector_length(); + const int nteams = OpenMPTargetExec::MAX_ACTIVE_TEAMS<league_size?OpenMPTargetExec::MAX_ACTIVE_TEAMS:league_size; + + OpenMPTargetExec::resize_scratch(0,Policy::member_type::TEAM_REDUCE_SIZE,0,0); + void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); + + #pragma omp target teams distribute parallel for num_teams(league_size) num_threads(team_size*vector_length) schedule(static,1) \ + map(to:this->m_functor,scratch_ptr) + for(int i=0 ; i<league_size*team_size*vector_length ; i++) { + typename Policy::member_type team(i/(team_size*vector_length),league_size,team_size,vector_length, scratch_ptr, 0,0); + m_functor(team); + } + } + + + template< class TagType > + inline + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + execute_impl() const + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + const int league_size = m_policy.league_size(); + const int team_size = m_policy.team_size(); + const int vector_length = m_policy.vector_length(); + const int nteams = OpenMPTargetExec::MAX_ACTIVE_TEAMS<league_size?OpenMPTargetExec::MAX_ACTIVE_TEAMS:league_size; + + OpenMPTargetExec::resize_scratch(0,Policy::member_type::TEAM_REDUCE_SIZE,0,0); + void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); + #pragma omp target teams distribute parallel for num_teams(league_size) num_threads(team_size*vector_length) schedule(static,1) \ + map(to:this->m_functor,scratch_ptr) + for(int i=0 ; i<league_size ; i++) { + typename Policy::member_type team(i/(team_size*vector_length),league_size,team_size,vector_length, scratch_ptr, 0,0); + m_functor(TagType(), team); + } + } + +public: + + inline + ParallelFor( const FunctorType & arg_functor , + const Policy & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + {} +}; + +template<class FunctorType, class ReducerType, class PointerType, class ValueType, class ... PolicyArgs> +struct ParallelReduceSpecialize<FunctorType, TeamPolicyInternal<PolicyArgs...>, ReducerType, PointerType, ValueType, 0,0> { + typedef TeamPolicyInternal<PolicyArgs...> PolicyType; + + template< class TagType > + inline static + typename std::enable_if< std::is_same< TagType , void >::value >::type + execute_impl(const FunctorType& f, const PolicyType& p, PointerType result_ptr) + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + + const int league_size = p.league_size(); + const int team_size = p.team_size(); + const int vector_length = p.vector_length(); + const int nteams = OpenMPTargetExec::MAX_ACTIVE_TEAMS<league_size?OpenMPTargetExec::MAX_ACTIVE_TEAMS:league_size; + + OpenMPTargetExec::resize_scratch(0,PolicyType::member_type::TEAM_REDUCE_SIZE,0,0); + void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); + + ValueType result = ValueType(); + #pragma omp target teams distribute parallel for num_teams(nteams) num_threads(team_size*vector_length) \ + map(to:f,scratch_ptr) map(tofrom:result) reduction(+: result) schedule(static,1) + for(int i=0 ; i<league_size*team_size*vector_length ; i++) { + typename PolicyType::member_type team(i/(team_size*vector_length),league_size,team_size,vector_length, scratch_ptr, 0,0); + f(team,result); + if(team.m_vector_lane!=0) result = 0; + } + + *result_ptr=result; + } + + + template< class TagType > + inline static + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + execute_impl(const FunctorType& f, const PolicyType& p, PointerType result_ptr) + { + OpenMPTargetExec::verify_is_process("Kokkos::Experimental::OpenMPTarget parallel_for"); + OpenMPTargetExec::verify_initialized("Kokkos::Experimental::OpenMPTarget parallel_for"); + + const int league_size = p.league_size(); + const int team_size = p.team_size(); + const int vector_length = p.vector_length(); + const int nteams = OpenMPTargetExec::MAX_ACTIVE_TEAMS<league_size?OpenMPTargetExec::MAX_ACTIVE_TEAMS:league_size; + + OpenMPTargetExec::resize_scratch(0,PolicyType::member_type::TEAM_REDUCE_SIZE,0,0); + void* scratch_ptr = OpenMPTargetExec::get_scratch_ptr(); + + ValueType result = ValueType(); + #pragma omp target teams distribute parallel for num_teams(nteams) num_threads(team_size*vector_length) \ + map(to:f,scratch_ptr) map(tofrom:result) reduction(+: result) schedule(static,1) + for(int i=0 ; i<league_size*team_size*vector_length ; i++) { + typename PolicyType::member_type team(i/(team_size*vector_length),league_size,team_size,vector_length, scratch_ptr, 0,0); + f(TagType(),team,result); + if(team.vector_lane!=0) result = 0; + } + *result_ptr=result; + } + + + inline static + void execute(const FunctorType& f, const PolicyType& p, PointerType ptr) { + execute_impl<typename PolicyType::work_tag>(f,p,ptr); + } +}; + + +template< class FunctorType , class ReducerType, class ... Properties > +class ParallelReduce< FunctorType + , Kokkos::TeamPolicy< Properties ... > + , ReducerType + , Kokkos::Experimental::OpenMPTarget + > +{ +private: + + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Experimental::OpenMPTarget, Properties ... > Policy ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTag > ValueJoin ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + typedef typename ValueTraits::value_type value_type ; + + enum {HasJoin = ReduceFunctorHasJoin<FunctorType>::value }; + enum {UseReducer = is_reducer_type<ReducerType>::value }; + + typedef ParallelReduceSpecialize<FunctorType,Policy,ReducerType,pointer_type,typename ValueTraits::value_type,HasJoin,UseReducer> ParForSpecialize; + + const FunctorType m_functor ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + const int m_shmem_size ; + +public: + + inline + void execute() const { + ParForSpecialize::execute(m_functor,m_policy,m_result_ptr); + } + + template< class ViewType > + inline + ParallelReduce( const FunctorType & arg_functor , + const Policy & arg_policy , + const ViewType & arg_result , + typename std::enable_if< + Kokkos::is_view< ViewType >::value && + !Kokkos::is_reducer_type<ReducerType>::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result.ptr_on_device() ) + , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + {} + + inline + ParallelReduce( const FunctorType & arg_functor + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::Experimental::OpenMPTarget must be a Kokkos::View in HostSpace" );*/ + } + +}; + +} // namespace Impl +} // namespace Kokkos + + +namespace Kokkos { +namespace Impl { + + template<typename iType> + struct TeamThreadRangeBoundariesStruct<iType,OpenMPTargetExecTeamMember> { + typedef iType index_type; + const iType start; + const iType end; + const iType increment; + + inline + TeamThreadRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const iType& count): + start( thread_.team_rank() ), + end( count ), + increment( thread_.team_size() ) + {} + inline + TeamThreadRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const iType& begin_, const iType& end_): + start( begin_+thread_.team_rank() ), + end( end_ ), + increment( thread_.team_size() ) + {} + }; + + template<typename iType> + struct ThreadVectorRangeBoundariesStruct<iType,OpenMPTargetExecTeamMember> { + typedef iType index_type; + const iType start; + const iType end; + const iType increment; + + inline + ThreadVectorRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const iType& count): + start( thread_.m_vector_lane ), + end( count ), + increment( thread_.m_vector_length ) + {} + inline + ThreadVectorRangeBoundariesStruct (const OpenMPTargetExecTeamMember& thread_, const iType& begin_, const iType& end_): + start( begin_+thread_.m_vector_lane ), + end( end_ ), + increment( thread_.m_vector_length ) + {} + }; + + template<typename iType> + KOKKOS_INLINE_FUNCTION + Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember> + TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& count) { + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember>(thread,count); + } + + template<typename iType> + KOKKOS_INLINE_FUNCTION + Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember> + TeamThreadRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& begin, const iType& end) { + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember>(thread,begin,end); + } + + template<typename iType> + KOKKOS_INLINE_FUNCTION + Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember > + ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& count) { + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember >(thread,count); + } + + template<typename iType> + KOKKOS_INLINE_FUNCTION + Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember> + ThreadVectorRange(const Impl::OpenMPTargetExecTeamMember& thread, const iType& begin, const iType& end) { + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPTargetExecTeamMember>(thread,begin,end); + } + +} + +} +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* KOKKOS_OPENMPTARGET_PARALLEL_HPP */ + diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9c8b90637755a7d96c6f1182f35fc73e248b57ef --- /dev/null +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp @@ -0,0 +1,329 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_ENABLE_OPENMPTARGET ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +#include <impl/Kokkos_TaskQueue_impl.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template class TaskQueue< Kokkos::Experimental::OpenMPTarget > ; + +//---------------------------------------------------------------------------- + +TaskExec< Kokkos::Experimental::OpenMPTarget >:: +TaskExec() + : m_self_exec( 0 ) + , m_team_exec( 0 ) + , m_sync_mask( 0 ) + , m_sync_value( 0 ) + , m_sync_step( 0 ) + , m_group_rank( 0 ) + , m_team_rank( 0 ) + , m_team_size( 1 ) +{ +} + +TaskExec< Kokkos::Experimental::OpenMPTarget >:: +TaskExec( Kokkos::Impl::OpenMPTargetExec & arg_exec , int const arg_team_size ) + : m_self_exec( & arg_exec ) + , m_team_exec( arg_exec.pool_rev(arg_exec.pool_rank_rev() / arg_team_size) ) + , m_sync_mask( 0 ) + , m_sync_value( 0 ) + , m_sync_step( 0 ) + , m_group_rank( arg_exec.pool_rank_rev() / arg_team_size ) + , m_team_rank( arg_exec.pool_rank_rev() % arg_team_size ) + , m_team_size( arg_team_size ) +{ + // This team spans + // m_self_exec->pool_rev( team_size * group_rank ) + // m_self_exec->pool_rev( team_size * ( group_rank + 1 ) - 1 ) + + int64_t volatile * const sync = (int64_t *) m_self_exec->scratch_reduce(); + + sync[0] = int64_t(0) ; + sync[1] = int64_t(0) ; + + for ( int i = 0 ; i < m_team_size ; ++i ) { + m_sync_value |= int64_t(1) << (8*i); + m_sync_mask |= int64_t(3) << (8*i); + } + + Kokkos::memory_fence(); +} + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + +void TaskExec< Kokkos::Experimental::OpenMPTarget >::team_barrier_impl() const +{ + if ( m_team_exec->scratch_reduce_size() < int(2 * sizeof(int64_t)) ) { + Kokkos::abort("TaskQueue<OpenMPTarget> scratch_reduce memory too small"); + } + + // Use team shared memory to synchronize. + // Alternate memory locations between barriers to avoid a sequence + // of barriers overtaking one another. + + int64_t volatile * const sync = + ((int64_t *) m_team_exec->scratch_reduce()) + ( m_sync_step & 0x01 ); + + // This team member sets one byte within the sync variable + int8_t volatile * const sync_self = + ((int8_t *) sync) + m_team_rank ; + +#if 0 +fprintf( stdout + , "barrier group(%d) member(%d) step(%d) wait(%lx) : before(%lx)\n" + , m_group_rank + , m_team_rank + , m_sync_step + , m_sync_value + , *sync + ); +fflush(stdout); +#endif + + *sync_self = int8_t( m_sync_value & 0x03 ); // signal arrival + + while ( m_sync_value != *sync ); // wait for team to arrive + +#if 0 +fprintf( stdout + , "barrier group(%d) member(%d) step(%d) wait(%lx) : after(%lx)\n" + , m_group_rank + , m_team_rank + , m_sync_step + , m_sync_value + , *sync + ); +fflush(stdout); +#endif + + ++m_sync_step ; + + if ( 0 == ( 0x01 & m_sync_step ) ) { // Every other step + m_sync_value ^= m_sync_mask ; + if ( 1000 < m_sync_step ) m_sync_step = 0 ; + } +} + +#endif + +//---------------------------------------------------------------------------- + +void TaskQueueSpecialization< Kokkos::Experimental::OpenMPTarget >::execute + ( TaskQueue< Kokkos::Experimental::OpenMPTarget > * const queue ) +{ + using execution_space = Kokkos::Experimental::OpenMPTarget ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space , void , void > ; + using PoolExec = Kokkos::Impl::OpenMPTargetExec ; + using Member = TaskExec< execution_space > ; + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + // Required: team_size <= 8 + + const int team_size = PoolExec::pool_size(2); // Threads per core + // const int team_size = PoolExec::pool_size(1); // Threads per NUMA + + if ( 8 < team_size ) { + Kokkos::abort("TaskQueue<OpenMPTarget> unsupported team size"); + } + +#pragma omp parallel + { + PoolExec & self = *PoolExec::get_thread_omp(); + + Member single_exec ; + Member team_exec( self , team_size ); + + // Team shared memory + task_root_type * volatile * const task_shared = + (task_root_type **) team_exec.m_team_exec->scratch_thread(); + +// Barrier across entire OpenMPTarget thread pool to insure initialization +#pragma omp barrier + + // Loop until all queues are empty and no tasks in flight + + do { + + task_root_type * task = 0 ; + + // Each team lead attempts to acquire either a thread team task + // or a single thread task for the team. + + if ( 0 == team_exec.team_rank() ) { + + task = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; + + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_task( & queue->m_ready[i][j] ); + } + } + } + + // Team lead broadcast acquired task to team members: + + if ( 1 < team_exec.team_size() ) { + + if ( 0 == team_exec.team_rank() ) *task_shared = task ; + + // Fence to be sure task_shared is stored before the barrier + Kokkos::memory_fence(); + + // Whole team waits for every team member to reach this statement + team_exec.team_barrier(); + + // Fence to be sure task_shared is stored + Kokkos::memory_fence(); + + task = *task_shared ; + } + +#if 0 +fprintf( stdout + , "\nexecute group(%d) member(%d) task_shared(0x%lx) task(0x%lx)\n" + , team_exec.m_group_rank + , team_exec.m_team_rank + , uintptr_t(task_shared) + , uintptr_t(task) + ); +fflush(stdout); +#endif + + if ( 0 == task ) break ; // 0 == m_ready_count + + if ( end == task ) { + // All team members wait for whole team to reach this statement. + // Is necessary to prevent task_shared from being updated + // before it is read by all threads. + team_exec.team_barrier(); + } + else if ( task_root_type::TaskTeam == task->m_task_type ) { + // Thread Team Task + (*task->m_apply)( task , & team_exec ); + + // The m_apply function performs a barrier + + if ( 0 == team_exec.team_rank() ) { + // team member #0 completes the task, which may delete the task + queue->complete( task ); + } + } + else { + // Single Thread Task + + if ( 0 == team_exec.team_rank() ) { + + (*task->m_apply)( task , & single_exec ); + + queue->complete( task ); + } + + // All team members wait for whole team to reach this statement. + // Not necessary to complete the task. + // Is necessary to prevent task_shared from being updated + // before it is read by all threads. + team_exec.team_barrier(); + } + } while(1); + } +// END #pragma omp parallel + +} + +void TaskQueueSpecialization< Kokkos::Experimental::OpenMPTarget >:: + iff_single_thread_recursive_execute + ( TaskQueue< Kokkos::Experimental::OpenMPTarget > * const queue ) +{ + using execution_space = Kokkos::Experimental::OpenMPTarget ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space , void , void > ; + using Member = TaskExec< execution_space > ; + + if ( 1 == omp_get_num_threads() ) { + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + Member single_exec ; + + task_root_type * task = end ; + + do { + + task = end ; + + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_task( & queue->m_ready[i][j] ); + } + } + + if ( end == task ) break ; + + (*task->m_apply)( task , & single_exec ); + + queue->complete( task ); + + } while(1); + } +} + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_OPENMPTARGET ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ + + diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp new file mode 100644 index 0000000000000000000000000000000000000000..91c9d67767a137bca80641b1a358d376d153ab20 --- /dev/null +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp @@ -0,0 +1,356 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_OPENMP_TASK_HPP +#define KOKKOS_IMPL_OPENMP_TASK_HPP + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +class TaskQueueSpecialization< Kokkos::Experimental::OpenMPTarget > +{ +public: + + using execution_space = Kokkos::Experimental::OpenMPTarget ; + using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; + using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ; + + // Must specify memory space + using memory_space = Kokkos::HostSpace ; + + static + void iff_single_thread_recursive_execute( queue_type * const ); + + // Must provide task queue execution function + static void execute( queue_type * const ); + + // Must provide mechanism to set function pointer in + // execution space from the host process. + template< typename FunctorType > + static + void proc_set_apply( task_base_type::function_type * ptr ) + { + using TaskType = TaskBase< Kokkos::Experimental::OpenMPTarget + , typename FunctorType::value_type + , FunctorType + > ; + *ptr = TaskType::apply ; + } +}; + +extern template class TaskQueue< Kokkos::Experimental::OpenMPTarget > ; + +//---------------------------------------------------------------------------- + +template<> +class TaskExec< Kokkos::Experimental::OpenMPTarget > +{ +private: + + TaskExec( TaskExec && ) = delete ; + TaskExec( TaskExec const & ) = delete ; + TaskExec & operator = ( TaskExec && ) = delete ; + TaskExec & operator = ( TaskExec const & ) = delete ; + + + using PoolExec = Kokkos::Impl::OpenMPTargetExec ; + + friend class Kokkos::Impl::TaskQueue< Kokkos::Experimental::OpenMPTarget > ; + friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::Experimental::OpenMPTarget > ; + + PoolExec * const m_self_exec ; ///< This thread's thread pool data structure + PoolExec * const m_team_exec ; ///< Team thread's thread pool data structure + int64_t m_sync_mask ; + int64_t mutable m_sync_value ; + int mutable m_sync_step ; + int m_group_rank ; ///< Which "team" subset of thread pool + int m_team_rank ; ///< Which thread within a team + int m_team_size ; + + TaskExec(); + TaskExec( PoolExec & arg_exec , int arg_team_size ); + + void team_barrier_impl() const ; + +public: + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + void * team_shared() const + { return m_team_exec ? m_team_exec->scratch_thread() : (void*) 0 ; } + + int team_shared_size() const + { return m_team_exec ? m_team_exec->scratch_thread_size() : 0 ; } + + /**\brief Whole team enters this function call + * before any teeam member returns from + * this function call. + */ + void team_barrier() const { if ( 1 < m_team_size ) team_barrier_impl(); } +#else + KOKKOS_INLINE_FUNCTION void team_barrier() const {} + KOKKOS_INLINE_FUNCTION void * team_shared() const { return 0 ; } + KOKKOS_INLINE_FUNCTION int team_shared_size() const { return 0 ; } +#endif + + KOKKOS_INLINE_FUNCTION + int team_rank() const { return m_team_rank ; } + + KOKKOS_INLINE_FUNCTION + int team_size() const { return m_team_size ; } +}; + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::OpenMPTarget > > +TeamThreadRange + ( Impl::TaskExec< Kokkos::Experimental::OpenMPTarget > & thread + , const iType & count ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::OpenMPTarget > >(thread,count); +} + +template<typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Experimental::OpenMPTarget > > +TeamThreadRange + ( Impl:: TaskExec< Kokkos::Experimental::OpenMPTarget > & thread + , const iType & start + , const iType & end ) +{ + return Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Experimental::OpenMPTarget > >(thread,start,end); +} + +/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. + * + * The range i=0..N-1 is mapped to all threads of the the calling thread team. + * This functionality requires C++11 support. +*/ +template<typename iType, class Lambda> +KOKKOS_INLINE_FUNCTION +void parallel_for + ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Experimental::OpenMPTarget > >& loop_boundaries + , const Lambda& lambda + ) +{ + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i); + } +} + +template<typename iType, class Lambda, typename ValueType> +KOKKOS_INLINE_FUNCTION +void parallel_reduce + ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::Experimental::OpenMPTarget > >& loop_boundaries + , const Lambda& lambda + , ValueType& initialized_result) +{ + int team_rank = loop_boundaries.thread.team_rank(); // member num within the team + ValueType result = initialized_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i, result); + } + + if ( 1 < loop_boundaries.thread.team_size() ) { + + ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); + + loop_boundaries.thread.team_barrier(); + shared[team_rank] = result; + + loop_boundaries.thread.team_barrier(); + + // reduce across threads to thread 0 + if (team_rank == 0) { + for (int i = 1; i < loop_boundaries.thread.team_size(); i++) { + shared[0] += shared[i]; + } + } + + loop_boundaries.thread.team_barrier(); + + // broadcast result + initialized_result = shared[0]; + } + else { + initialized_result = result ; + } +} + +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::OpenMPTarget > >& loop_boundaries, + const Lambda & lambda, + const JoinType & join, + ValueType& initialized_result) +{ + int team_rank = loop_boundaries.thread.team_rank(); // member num within the team + ValueType result = initialized_result; + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i, result); + } + + if ( 1 < loop_boundaries.thread.team_size() ) { + ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); + + loop_boundaries.thread.team_barrier(); + shared[team_rank] = result; + + loop_boundaries.thread.team_barrier(); + + // reduce across threads to thread 0 + if (team_rank == 0) { + for (int i = 1; i < loop_boundaries.thread.team_size(); i++) { + join(shared[0], shared[i]); + } + } + + loop_boundaries.thread.team_barrier(); + + // broadcast result + initialized_result = shared[0]; + } + else { + initialized_result = result ; + } +} + +// placeholder for future function +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::OpenMPTarget > >& loop_boundaries, + const Lambda & lambda, + ValueType& initialized_result) +{ +} + +// placeholder for future function +template< typename iType, class Lambda, typename ValueType, class JoinType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::OpenMPTarget > >& loop_boundaries, + const Lambda & lambda, + const JoinType & join, + ValueType& initialized_result) +{ +} + +template< typename ValueType, typename iType, class Lambda > +KOKKOS_INLINE_FUNCTION +void parallel_scan + (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::OpenMPTarget > >& loop_boundaries, + const Lambda & lambda) +{ + ValueType accum = 0 ; + ValueType val, local_total; + ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); + int team_size = loop_boundaries.thread.team_size(); + int team_rank = loop_boundaries.thread.team_rank(); // member num within the team + + // Intra-member scan + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + local_total = 0; + lambda(i,local_total,false); + val = accum; + lambda(i,val,true); + accum += local_total; + } + + shared[team_rank] = accum; + loop_boundaries.thread.team_barrier(); + + // Member 0 do scan on accumulated totals + if (team_rank == 0) { + for( iType i = 1; i < team_size; i+=1) { + shared[i] += shared[i-1]; + } + accum = 0; // Member 0 set accum to 0 in preparation for inter-member scan + } + + loop_boundaries.thread.team_barrier(); + + // Inter-member scan adding in accumulated totals + if (team_rank != 0) { accum = shared[team_rank-1]; } + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + local_total = 0; + lambda(i,local_total,false); + val = accum; + lambda(i,val,true); + accum += local_total; + } +} + +// placeholder for future function +template< typename iType, class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_scan + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Experimental::OpenMPTarget > >& loop_boundaries, + const Lambda & lambda) +{ +} + + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_IMPL_OPENMP_TASK_HPP */ + diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.cpp b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.cpp index 1b92494084c10763ad60ba458888204bd2bd77a3..b91b6171477fec4168870ceb5738299b4accb545 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.cpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.cpp @@ -41,12 +41,13 @@ //@HEADER */ -#include <Kokkos_Core_fwd.hpp> - +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_QTHREADS ) -#include <stdio.h> -#include <stdlib.h> +#include <Kokkos_Core_fwd.hpp> + +#include <cstdio> +#include <cstdlib> #include <iostream> #include <sstream> #include <utility> @@ -215,6 +216,8 @@ void Qthreads::fence() int Qthreads::shepherd_size() const { return Impl::s_number_shepherds; } int Qthreads::shepherd_worker_size() const { return Impl::s_number_workers_per_shepherd; } +const char* Qthreads::name() { return "Qthreads"; } + } // namespace Kokkos //---------------------------------------------------------------------------- @@ -516,4 +519,7 @@ QthreadsTeamPolicyMember::QthreadsTeamPolicyMember( const QthreadsTeamPolicyMemb } // namespace Kokkos +#else +void KOKKOS_SRC_QTHREADS_EXEC_PREVENT_LINK_ERROR() {} #endif // #if defined( KOKKOS_ENABLE_QTHREADS ) + diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.hpp index 64856eb99e014272fd92f638e2d7f312d3039120..c3b773e073deb8f60f12eb4a89978617768154af 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.hpp @@ -44,6 +44,9 @@ #ifndef KOKKOS_QTHREADSEXEC_HPP #define KOKKOS_QTHREADSEXEC_HPP +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_QTHREADS ) + #include <impl/Kokkos_spinwait.hpp> //---------------------------------------------------------------------------- @@ -637,4 +640,6 @@ public: //---------------------------------------------------------------------------- +#endif #endif // #define KOKKOS_QTHREADSEXEC_HPP + diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp index 9f996075403f7cdd06fddfcb60d829dfab64bf0a..b45c7114a37db8a869feeb025d388cb62cad86b7 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp @@ -44,6 +44,9 @@ #ifndef KOKKOS_QTHREADS_PARALLEL_HPP #define KOKKOS_QTHREADS_PARALLEL_HPP +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_QTHREADS ) + #include <vector> #include <Kokkos_Parallel.hpp> @@ -724,4 +727,6 @@ void single(const Impl::ThreadSingleStruct<Impl::QthreadsTeamPolicyMember>& sing } // namespace Kokkos +#endif #endif /* #define KOKKOS_QTHREADS_PARALLEL_HPP */ + diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.cpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.cpp index 614a2c03f03e8c9cfbd15653295a254a350fb25a..3a1493a848c96dc5bfce9e87672dc14773928683 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.cpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.cpp @@ -41,10 +41,10 @@ //@HEADER */ -#include <Kokkos_Core.hpp> - +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_QTHREADS ) && defined( KOKKOS_ENABLE_TASKPOLICY ) +#include <Kokkos_Core.hpp> #include <impl/Kokkos_TaskQueue_impl.hpp> //---------------------------------------------------------------------------- @@ -314,7 +314,7 @@ void TaskQueueSpecialization< Kokkos::Qthreads >:: }} /* namespace Kokkos::Impl */ //---------------------------------------------------------------------------- - +#else +void KOKKOS_SRC_QTHREADS_TASK_PREVENT_LINK_ERROR() {} #endif /* #if defined( KOKKOS_ENABLE_QTHREADS ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ - diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.hpp index 836452dde93767f172e47d2c19f74498e4dde246..4d393a6ed3bf921ec7d81abb8a84d61c2381befd 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.hpp @@ -44,7 +44,8 @@ #ifndef KOKKOS_IMPL_QTHREADS_TASK_HPP #define KOKKOS_IMPL_QTHREADS_TASK_HPP -#if defined( KOKKOS_ENABLE_TASKPOLICY ) +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_QTHREADS ) && defined( KOKKOS_ENABLE_TASKPOLICY ) //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old index aa159cff6a5211d721a7b6beb31a5969851d080d..a59afb2881e9f6771f8b9808af07ed6f54e25496 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,20 +36,22 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ // Experimental unified task-data parallel manycore LDRD. -#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_QTHREADS ) -#include <stdio.h> +#include <Kokkos_Core_fwd.hpp> + +#include <cstdio> +#include <cstdlib> -#include <stdlib.h> #include <stdexcept> #include <iostream> #include <sstream> @@ -448,7 +450,7 @@ TaskPolicy< Kokkos::Qthreads >:: TaskPolicy ( const unsigned /* arg_task_max_count */ , const unsigned /* arg_task_max_size */ - , const unsigned arg_task_default_dependence_capacity + , const unsigned arg_task_default_dependence_capacity , const unsigned arg_task_team_size ) : m_default_dependence_capacity( arg_task_default_dependence_capacity ) @@ -484,5 +486,8 @@ void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthreads > & policy ) } // namespace Experimental } // namespace Kokkos +#else +void KOKKOS_CORE_SRC_QTHREADS_KOKKOS_QTHREADS_TASKPOLICY_PREVENT_LINK_ERROR() {} #endif // #if defined( KOKKOS_ENABLE_TASKDAG ) #endif // #if defined( KOKKOS_ENABLE_QTHREADS ) + diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old index 1e5a4dc593cc6de9fff9d2a762b4f864c6c12e9c..adb6859763d39fbded63fdf476a6b04f639241cf 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -46,6 +46,9 @@ #ifndef KOKKOS_QTHREADS_TASKSCHEDULER_HPP #define KOKKOS_QTHREADS_TASKSCHEDULER_HPP +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_TASKDAG ) + #include <string> #include <typeinfo> #include <stdexcept> @@ -69,7 +72,6 @@ #include <impl/Kokkos_FunctorAdapter.hpp> -#if defined( KOKKOS_ENABLE_TASKDAG ) //---------------------------------------------------------------------------- @@ -505,7 +507,7 @@ public: template< class ValueType > const Future< ValueType , execution_space > & - spawn( const Future< ValueType , execution_space > & f + spawn( const Future< ValueType , execution_space > & f , const bool priority = false ) const { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) @@ -632,7 +634,7 @@ public: } template< class FunctorType > - void respawn( FunctorType * task_functor + void respawn( FunctorType * task_functor , const bool priority = false ) const { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp index 55235cd6d27a9df0e40bd28dff8caa13df94073e..f832e708794e64815b3eff2928d8a340e0991d45 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp @@ -41,7 +41,11 @@ //@HEADER */ -#if defined( KOKKOS_ENABLE_TASKPOLICY ) +#ifndef KOKKOS_QTHREADS_TASKQUEUE_HPP +#define KOKKOS_QTHREADS_TASKQUEUE_HPP + +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_QTHREADS ) && defined( KOKKOS_ENABLE_TASKPOLICY ) //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -60,7 +64,7 @@ private: using execution_space = Kokkos::Qthread ; using memory_space = Kokkos::HostSpace using device_type = Kokkos::Device< execution_space, memory_space > ; - using memory_pool = Kokkos::Experimental::MemoryPool< device_type > ; + using memory_pool = Kokkos::MemoryPool< device_type > ; using task_root_type = Kokkos::Impl::TaskBase< execution_space, void, void > ; friend class Kokkos::TaskScheduler< execution_space > ; @@ -317,3 +321,5 @@ public: //---------------------------------------------------------------------------- #endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif // KOKKOS_QTHREADS_TASKQUEUE_HPP + diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp index 4a9190c731c6034724b63094c55967de78caab64..33411775443682a41df3e93e2b64836d95801609 100644 --- a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp @@ -41,7 +41,11 @@ //@HEADER */ -#if defined( KOKKOS_ENABLE_TASKPOLICY ) +#ifndef KOKKOS_QTHREADS_TASKQUEUE_IMPL_HPP +#define KOKKOS_QTHREADS_TASKQUEUE_IMPL_HPP + +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_QTHREADS ) && defined( KOKKOS_ENABLE_TASKPOLICY ) namespace Kokkos { namespace Impl { @@ -433,4 +437,5 @@ fflush(stdout); #endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif // KOKKOS_QTHREADS_TASKQUEUE_IMPL_HPP diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp index b1f53489f432ba093ea2222b16c88ee68e005374..4c805310cc38ca8cabcadb431939150354018427 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -41,11 +41,13 @@ //@HEADER */ -#include <Kokkos_Core_fwd.hpp> -#if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD ) +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_THREADS ) + +#include <Kokkos_Core_fwd.hpp> -#include <stdint.h> +#include <cstdint> #include <limits> #include <utility> #include <iostream> @@ -512,8 +514,8 @@ void ThreadsExec::print_configuration( std::ostream & s , const bool detail ) s << "Kokkos::Threads" ; -#if defined( KOKKOS_ENABLE_PTHREAD ) - s << " KOKKOS_ENABLE_PTHREAD" ; +#if defined( KOKKOS_ENABLE_THREADS ) + s << " KOKKOS_ENABLE_THREADS" ; #endif #if defined( KOKKOS_ENABLE_HWLOC ) s << " hwloc[" << numa_count << "x" << cores_per_numa << "x" << threads_per_core << "]" ; @@ -817,10 +819,12 @@ int Threads::thread_pool_rank() } #endif +const char* Threads::name() { return "Threads"; } } /* namespace Kokkos */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD ) */ +#else +void KOKKOS_CORE_SRC_THREADS_EXEC_PREVENT_LINK_ERROR() {} +#endif /* #if defined( KOKKOS_ENABLE_THREADS ) */ diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp index a6db02ebac84b96a736519a22a537bdc53ea6b1a..74de3a2596f26bd399eb3eb9c4be2caa46362160 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp @@ -44,7 +44,10 @@ #ifndef KOKKOS_THREADSEXEC_HPP #define KOKKOS_THREADSEXEC_HPP -#include <stdio.h> +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_THREADS ) + +#include <cstdio> #include <utility> #include <impl/Kokkos_spinwait.hpp> @@ -626,6 +629,6 @@ inline void Threads::fence() //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- - +#endif #endif /* #define KOKKOS_THREADSEXEC_HPP */ diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp index 41493a5a2049b97a315baf5b64c94e0949e3fa71..288dab2bacace135bf87579082cfe7aa66640d17 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,18 +36,16 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include <Kokkos_Core_fwd.hpp> - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_PTHREAD ) +#include <Kokkos_Core_fwd.hpp> /* Standard 'C' Linux libraries */ #include <pthread.h> @@ -154,6 +152,8 @@ void ThreadsExec::wait_yield( volatile int & flag , const int value ) #elif defined( KOKKOS_ENABLE_WINTHREAD ) +#include <Kokkos_Core_fwd.hpp> + /* Windows libraries */ #include <winsock2.h> #include <windows.h> @@ -247,9 +247,7 @@ void ThreadsExec::wait_yield( volatile int & flag , const int value ) {} } // namespace Impl } // namespace Kokkos +#else +void KOKKOS_CORE_SRC_THREADS_EXEC_BASE_PREVENT_LINK_ERROR() {} #endif /* end #elif defined( KOKKOS_ENABLE_WINTHREAD ) */ -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - - diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp index 701495428193148f0efaf8dbf1cdededabd66460..c12019413b084cf92a043bd794f5be0ac209c77b 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -44,7 +44,10 @@ #ifndef KOKKOS_THREADSTEAM_HPP #define KOKKOS_THREADSTEAM_HPP -#include <stdio.h> +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_THREADS ) + +#include <cstdio> #include <utility> #include <impl/Kokkos_spinwait.hpp> @@ -173,7 +176,9 @@ public: } template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !Kokkos::is_reducer< Type >::value , Type>::type + team_reduce( const Type & value ) const #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) { return Type(); } #else @@ -202,6 +207,55 @@ public: } #endif + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type + #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + team_reduce( const ReducerType & ) const + {} + #else + team_reduce( const ReducerType & reducer ) const + { + typedef typename ReducerType::value_type value_type; + // Make sure there is enough scratch space: + typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE + , value_type , void >::type type ; + + if ( 0 == m_exec ) return ; + + type * const local_value = ((type*) m_exec->scratch_memory()); + + // Set this thread's contribution + *local_value = reducer.reference() ; + + // Fence to make sure the base team member has access: + memory_fence(); + + if ( team_fan_in() ) { + // The last thread to synchronize returns true, all other threads wait for team_fan_out() + type * const team_value = ((type*) m_team_base[0]->scratch_memory()); + + // Join to the team value: + for ( int i = 1 ; i < m_team_size ; ++i ) { + reducer.join( *team_value , *((type*) m_team_base[i]->scratch_memory()) ); + } + + // Team base thread may "lap" member threads so copy out to their local value. + for ( int i = 1 ; i < m_team_size ; ++i ) { + *((type*) m_team_base[i]->scratch_memory()) = *team_value ; + } + + // Fence to make sure all team members have access + memory_fence(); + } + + team_fan_out(); + + // Value was changed by the team base + reducer.reference() = *((type volatile const *) local_value); + } + #endif + template< class ValueType, class JoinOp > KOKKOS_INLINE_FUNCTION ValueType team_reduce( const ValueType & value @@ -760,7 +814,8 @@ void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Thread * val is performed and put into result. This functionality requires C++11 support.*/ template< typename iType, class Lambda, typename ValueType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>& loop_boundaries, +typename std::enable_if< !Kokkos::is_reducer< ValueType >::value >::type +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>& loop_boundaries, const Lambda & lambda, ValueType& result) { result = ValueType(); @@ -774,6 +829,21 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Thr result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>()); } +template< typename iType, class Lambda, typename ReducerType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type +parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember>& loop_boundaries, + const Lambda & lambda, const ReducerType& reducer) { + + reducer.init(reducer.reference()); + + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,reducer.reference()); + } + + loop_boundaries.thread.team_reduce(reducer); +} + /** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. * * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of @@ -822,17 +892,26 @@ void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Thre * val is performed and put into result. This functionality requires C++11 support.*/ template< typename iType, class Lambda, typename ValueType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >& +typename std::enable_if< !Kokkos::is_reducer< ValueType >::value >::type +parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >& loop_boundaries, const Lambda & lambda, ValueType& result) { result = ValueType(); -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { lambda(i,result); } } +template< typename iType, class Lambda, typename ReducerType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type +parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >& + loop_boundaries, const Lambda & lambda, const ReducerType& reducer) { + reducer.init(reducer.reference()); + for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { + lambda(i,reducer.reference()); + } +} + /** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. * * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of @@ -915,6 +994,6 @@ void single(const Impl::ThreadSingleStruct<Impl::ThreadsExecTeamMember>& single_ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- - +#endif #endif /* #define KOKKOS_THREADSTEAM_HPP */ diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp index 69350f285d9b781e623d432536ee01a39d38828f..0ee0cd3280a549447bc3cfa1b55be607eaafc3df 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,8 +44,11 @@ #ifndef KOKKOS_THREADS_PARALLEL_HPP #define KOKKOS_THREADS_PARALLEL_HPP +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_THREADS ) + #include <vector> -#include <iostream> +#include <iostream> #include <Kokkos_Parallel.hpp> @@ -83,7 +86,7 @@ private: exec_range( const FunctorType & functor , const Member ibeg , const Member iend ) { - #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + #if defined( KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ) && \ defined( KOKKOS_ENABLE_PRAGMA_IVDEP ) #pragma ivdep #endif @@ -99,7 +102,7 @@ private: , const Member ibeg , const Member iend ) { const TagType t{} ; - #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + #if defined( KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ) && \ defined( KOKKOS_ENABLE_PRAGMA_IVDEP ) #pragma ivdep #endif @@ -308,7 +311,7 @@ private: , const Member & ibeg , const Member & iend , reference_type update ) { - #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + #if defined( KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ) && \ defined( KOKKOS_ENABLE_PRAGMA_IVDEP ) #pragma ivdep #endif @@ -325,7 +328,7 @@ private: , reference_type update ) { const TagType t{} ; - #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + #if defined( KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ) && \ defined( KOKKOS_ENABLE_PRAGMA_IVDEP ) #pragma ivdep #endif @@ -348,7 +351,7 @@ private: const WorkRange range( self.m_policy, exec.pool_rank(), exec.pool_size() ); ParallelReduce::template exec_range< WorkTag > - ( self.m_functor , range.begin() , range.end() + ( self.m_functor , range.begin() , range.end() , ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) ); exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); @@ -428,7 +431,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( reducer ) - , m_result_ptr( reducer.result_view().data() ) + , m_result_ptr( reducer.view().data() ) { /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value @@ -543,7 +546,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( reducer ) - , m_result_ptr( reducer.result_view().data() ) + , m_result_ptr( reducer.view().data() ) , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) { /*static_assert( std::is_same< typename ViewType::memory_space @@ -584,7 +587,7 @@ private: , const Member & ibeg , const Member & iend , reference_type update , const bool final ) { - #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + #if defined( KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ) && \ defined( KOKKOS_ENABLE_PRAGMA_IVDEP ) #pragma ivdep #endif @@ -601,7 +604,7 @@ private: , reference_type update , const bool final ) { const TagType t{} ; - #if defined( KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION ) && \ + #if defined( KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ) && \ defined( KOKKOS_ENABLE_PRAGMA_IVDEP ) #pragma ivdep #endif @@ -654,5 +657,6 @@ public: //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- +#endif #endif /* #define KOKKOS_THREADS_PARALLEL_HPP */ diff --git a/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp b/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp index c4db3e15ef4593422eca54ab5d295f5469d3a5ad..77a1e8754da77dd2146635c285b6162f8aca3e5f 100644 --- a/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp +++ b/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp @@ -44,24 +44,20 @@ #ifndef KOKKOS_HOST_EXP_ITERATE_TILE_HPP #define KOKKOS_HOST_EXP_ITERATE_TILE_HPP -#include <iostream> -#include <algorithm> -#include <stdio.h> - #include <Kokkos_Macros.hpp> - -#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__) +#if defined(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__) #define KOKKOS_MDRANGE_IVDEP #endif - #ifdef KOKKOS_MDRANGE_IVDEP #define KOKKOS_ENABLE_IVDEP_MDRANGE _Pragma("ivdep") #else #define KOKKOS_ENABLE_IVDEP_MDRANGE #endif - +#include <iostream> +#include <algorithm> +#include <cstdio> namespace Kokkos { namespace Experimental { namespace Impl { @@ -243,7 +239,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { KOKKOS_ENABLE_IVDEP_MDRANGE \ for( type i0 = (type)0; i0 < static_cast<type>(extent[0]); ++i0) { \ APPLY( func, i0 + m_offset[0] ) \ - } + } #define LOOP_LAYOUT_2( func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -255,7 +251,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i1 = (type)0; i1 < static_cast<type>(extent[0]); ++i1) { \ LOOP_R_1( func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_3( func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -267,7 +263,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i2 = (type)0; i2 < static_cast<type>(extent[0]); ++i2) { \ LOOP_R_2( func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_4( func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -279,7 +275,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i3 = (type)0; i3 < static_cast<type>(extent[0]); ++i3) { \ LOOP_R_3( func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_5( func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -291,7 +287,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i4 = (type)0; i4 < static_cast<type>(extent[0]); ++i4) { \ LOOP_R_4( func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_6( func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -303,7 +299,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i5 = (type)0; i5 < static_cast<type>(extent[0]); ++i5) { \ LOOP_R_5( func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_7( func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -315,7 +311,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i6 = (type)0; i6 < static_cast<type>(extent[0]); ++i6) { \ LOOP_R_6( func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_8( func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -327,7 +323,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i7 = (type)0; i7 < static_cast<type>(extent[0]); ++i7) { \ LOOP_R_7( func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ } \ - } + } // Partial vs Full Tile #define TILE_LOOP_1( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ @@ -459,7 +455,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { KOKKOS_ENABLE_IVDEP_MDRANGE \ for( type i0 = (type)0; i0 < static_cast<type>(extent[0]); ++i0) { \ APPLY_REDUX( val, func, i0 + m_offset[0] ) \ - } + } #define LOOP_LAYOUT_2_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -471,7 +467,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i1 = (type)0; i1 < static_cast<type>(extent[0]); ++i1) { \ LOOP_R_1_REDUX( val, func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_3_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -483,7 +479,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i2 = (type)0; i2 < static_cast<type>(extent[0]); ++i2) { \ LOOP_R_2_REDUX( val, func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_4_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -495,7 +491,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i3 = (type)0; i3 < static_cast<type>(extent[0]); ++i3) { \ LOOP_R_3_REDUX( val, func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_5_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -507,7 +503,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i4 = (type)0; i4 < static_cast<type>(extent[0]); ++i4) { \ LOOP_R_4_REDUX( val, func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_6_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -519,7 +515,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i5 = (type)0; i5 < static_cast<type>(extent[0]); ++i5) { \ LOOP_R_5_REDUX( val, func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_7_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -531,7 +527,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i6 = (type)0; i6 < static_cast<type>(extent[0]); ++i6) { \ LOOP_R_6_REDUX( val, func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ } \ - } + } #define LOOP_LAYOUT_8_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -543,7 +539,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i7 = (type)0; i7 < static_cast<type>(extent[0]); ++i7) { \ LOOP_R_7_REDUX( val, func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ } \ - } + } // Partial vs Full Tile #define TILE_LOOP_1_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ @@ -676,7 +672,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { KOKKOS_ENABLE_IVDEP_MDRANGE \ for( type i0 = (type)0; i0 < static_cast<type>(extent[0]); ++i0) { \ TAGGED_APPLY( tag, func, i0 + m_offset[0] ) \ - } + } #define TAGGED_LOOP_LAYOUT_2( tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -688,7 +684,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i1 = (type)0; i1 < static_cast<type>(extent[0]); ++i1) { \ TAGGED_LOOP_R_1( tag, func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_3( tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -700,7 +696,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i2 = (type)0; i2 < static_cast<type>(extent[0]); ++i2) { \ TAGGED_LOOP_R_2( tag, func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_4( tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -712,7 +708,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i3 = (type)0; i3 < static_cast<type>(extent[0]); ++i3) { \ TAGGED_LOOP_R_3( tag, func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_5( tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -724,7 +720,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i4 = (type)0; i4 < static_cast<type>(extent[0]); ++i4) { \ TAGGED_LOOP_R_4( tag, func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_6( tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -736,7 +732,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i5 = (type)0; i5 < static_cast<type>(extent[0]); ++i5) { \ TAGGED_LOOP_R_5( tag, func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_7( tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -748,7 +744,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i6 = (type)0; i6 < static_cast<type>(extent[0]); ++i6) { \ TAGGED_LOOP_R_6( tag, func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_8( tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -760,7 +756,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i7 = (type)0; i7 < static_cast<type>(extent[0]); ++i7) { \ TAGGED_LOOP_R_7( tag, func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ } \ - } + } // Partial vs Full Tile #define TAGGED_TILE_LOOP_1( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ @@ -892,7 +888,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { KOKKOS_ENABLE_IVDEP_MDRANGE \ for( type i0 = (type)0; i0 < static_cast<type>(extent[0]); ++i0) { \ TAGGED_APPLY_REDUX( val, tag, func, i0 + m_offset[0] ) \ - } + } #define TAGGED_LOOP_LAYOUT_2_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -904,7 +900,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i1 = (type)0; i1 < static_cast<type>(extent[0]); ++i1) { \ TAGGED_LOOP_R_1_REDUX( val, tag, func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_3_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -916,7 +912,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i2 = (type)0; i2 < static_cast<type>(extent[0]); ++i2) { \ TAGGED_LOOP_R_2_REDUX( val, tag, func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_4_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -928,7 +924,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i3 = (type)0; i3 < static_cast<type>(extent[0]); ++i3) { \ TAGGED_LOOP_R_3_REDUX( val, tag, func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_5_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -940,7 +936,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i4 = (type)0; i4 < static_cast<type>(extent[0]); ++i4) { \ TAGGED_LOOP_R_4_REDUX( val, tag, func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_6_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -952,7 +948,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i5 = (type)0; i5 < static_cast<type>(extent[0]); ++i5) { \ TAGGED_LOOP_R_5_REDUX( val, tag, func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_7_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -964,7 +960,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i6 = (type)0; i6 < static_cast<type>(extent[0]); ++i6) { \ TAGGED_LOOP_R_6_REDUX( val, tag, func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ } \ - } + } #define TAGGED_LOOP_LAYOUT_8_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ if (is_left) { \ @@ -976,7 +972,7 @@ namespace Kokkos { namespace Experimental { namespace Impl { for( type i7 = (type)0; i7 < static_cast<type>(extent[0]); ++i7) { \ TAGGED_LOOP_R_7_REDUX( val, tag, func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ } \ - } + } // Partial vs Full Tile #define TAGGED_TILE_LOOP_1_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ @@ -1321,8 +1317,8 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i } else { is_full_tile = false ; - partial_tile[i] = (m_rp.m_upper[i] - 1 - offset[i]) == 0 ? 1 - : (m_rp.m_upper[i] - m_rp.m_tile[i]) > 0 ? (m_rp.m_upper[i] - offset[i]) + partial_tile[i] = (m_rp.m_upper[i] - 1 - offset[i]) == 0 ? 1 + : (m_rp.m_upper[i] - m_rp.m_tile[i]) > 0 ? (m_rp.m_upper[i] - offset[i]) : (m_rp.m_upper[i] - m_rp.m_lower[i]) ; // when single tile encloses range } } @@ -1332,7 +1328,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i template <int Rank> - struct RankTag + struct RankTag { typedef RankTag type; enum { value = (int)Rank }; @@ -1343,13 +1339,13 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i inline void operator()(IType tile_idx) const - { + { point_type m_offset; point_type m_tiledims; if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1361,13 +1357,13 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i } //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims - const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; Tile_Loop_Type< RP::rank, (RP::inner_direction == RP::Left), index_type, Tag >::apply( m_func, full_tile, m_offset, m_rp.m_tile, m_tiledims ); } -#else +#else template <typename IType> inline void @@ -1384,7 +1380,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1396,7 +1392,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i } //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims - const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; if (RP::inner_direction == RP::Left) { if ( full_tile ) { @@ -1437,7 +1433,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1490,7 +1486,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1543,7 +1539,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1596,7 +1592,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1649,7 +1645,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1702,7 +1698,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1804,8 +1800,8 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i } else { is_full_tile = false ; - partial_tile[i] = (m_rp.m_upper[i] - 1 - offset[i]) == 0 ? 1 - : (m_rp.m_upper[i] - m_rp.m_tile[i]) > 0 ? (m_rp.m_upper[i] - offset[i]) + partial_tile[i] = (m_rp.m_upper[i] - 1 - offset[i]) == 0 ? 1 + : (m_rp.m_upper[i] - m_rp.m_tile[i]) > 0 ? (m_rp.m_upper[i] - offset[i]) : (m_rp.m_upper[i] - m_rp.m_lower[i]) ; // when single tile encloses range } } @@ -1815,7 +1811,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i template <int Rank> - struct RankTag + struct RankTag { typedef RankTag type; enum { value = (int)Rank }; @@ -1827,13 +1823,13 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i inline void operator()(IType tile_idx) const - { + { point_type m_offset; point_type m_tiledims; if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1845,13 +1841,13 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i } //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims - const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; Tile_Loop_Type< RP::rank, (RP::inner_direction == RP::Left), index_type, Tag >::apply( m_v, m_func, full_tile, m_offset, m_rp.m_tile, m_tiledims ); } -#else +#else template <typename IType> inline void @@ -1869,7 +1865,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1881,7 +1877,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i } //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims - const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; if (RP::inner_direction == RP::Left) { if ( full_tile ) { @@ -1922,7 +1918,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -1975,7 +1971,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -2028,7 +2024,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -2081,7 +2077,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -2134,7 +2130,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -2187,7 +2183,7 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i if (RP::outer_direction == RP::Left) { for (int i=0; i<RP::rank; ++i) { - m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; tile_idx /= m_rp.m_tile_end[i]; } } @@ -2352,5 +2348,5 @@ struct MDFunctor< MDRange, Functor, void > } } } //end namespace Kokkos::Experimental::Impl - #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp b/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp index 0246a7b9af8f968fe6295bfad20f765265049906..c5685c5b624c6da721b3f91877e655c582f36d2d 100644 --- a/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp @@ -195,3 +195,4 @@ struct PolicyTraits #endif //KOKKOS_IMPL_ANALYZE_POLICY_HPP + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Assembly.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Assembly.hpp index f89af396309f8a51e63694be860740a421a719b4..ea2c90a9e3fc3cb33976ab217ca461d3beca1a69 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Assembly.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Assembly.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,10 +36,12 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ + +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_ASSEMBLY_HPP ) #define KOKKOS_ATOMIC_ASSEMBLY_HPP namespace Kokkos { @@ -110,3 +112,4 @@ namespace Impl { } #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp index da31e0c2cc040ff087f7b63b416640d4bff1a26b..010b15064ee3ebed2cf92ac3e25ab312a3c279bc 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp @@ -41,6 +41,7 @@ //@HEADER */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP ) #define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp index 962a5a17a33b8460d3fca657219b7786762cf0e8..127de528f5303a56da4c0eb25d1012181aa0e598 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp @@ -41,6 +41,7 @@ //@HEADER */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT_HPP ) #define KOKKOS_ATOMIC_DECREMENT_HPP @@ -117,3 +118,4 @@ void atomic_decrement(volatile T* a) { } // End of namespace Kokkos #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp index 428b792853ab4d1570d8cbcafee764e15698b1f6..a1ff47abce66737b6f3875a9e87019d428ab377d 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp @@ -41,6 +41,7 @@ //@HEADER */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP ) #define KOKKOS_ATOMIC_EXCHANGE_HPP @@ -356,5 +357,3 @@ void atomic_assign( volatile T * const dest , const T val ) #endif -//---------------------------------------------------------------------------- - diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp index d6bc953ab29507583acf41bef65b5202ce99937c..860c8e0e4347d66f9fd2a9714c0effe0322e14f4 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp @@ -41,6 +41,7 @@ //@HEADER */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP ) #define KOKKOS_ATOMIC_FETCH_ADD_HPP @@ -279,6 +280,7 @@ T atomic_fetch_add( volatile T * const dest , { while( !Impl::lock_address_host_space( (void*) dest ) ); T return_val = *dest; + // Don't use the following line of code here: // //const T tmp = *dest = return_val + val; @@ -292,6 +294,7 @@ T atomic_fetch_add( volatile T * const dest , const T tmp = *dest; (void) tmp; Impl::unlock_address_host_space( (void*) dest ); + return return_val; } //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp index 334272bc7cf04873d58270ffa5ea0cabfbbe5202..83f5b2a5aadb001115748209d9c098429fb1afff 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,12 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_AND_HPP ) #define KOKKOS_ATOMIC_FETCH_AND_HPP @@ -124,4 +125,3 @@ void atomic_and(volatile T * const dest, const T src) { #endif - diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp index 75931d826d2a61dfc56b517be3fdd43d59e9f2f2..8c73b4c3ef3ec55c12efe1653385ddd47f1bbbdc 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,12 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_OR_HPP ) #define KOKKOS_ATOMIC_FETCH_OR_HPP @@ -124,4 +125,3 @@ void atomic_or(volatile T * const dest, const T src) { #endif - diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp index 7ce4112705b2b66aced2023062b2cc3b842bd7b8..504731d3a2f19529ec9be97b7709638ed38a9bb8 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,12 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_SUB_HPP ) #define KOKKOS_ATOMIC_FETCH_SUB_HPP @@ -238,4 +239,3 @@ void atomic_sub(volatile T * const dest, const T src) { #include<impl/Kokkos_Atomic_Assembly.hpp> #endif - diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp index 527e1bb4e334e7a9e83a0021061f07fd5900db18..65578156d5be52232785d93744c9025c814bd6a8 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,10 +36,12 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ + +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_GENERIC_HPP ) #define KOKKOS_ATOMIC_GENERIC_HPP #include <Kokkos_Macros.hpp> @@ -424,6 +426,6 @@ T atomic_rshift_fetch(volatile T * const dest, const unsigned int val) { return Impl::atomic_oper_fetch(Impl::RShiftOper<T,const unsigned int>(),dest,val); } - -} +} // namespace Kokkos #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp index 2894a45f0b2e30ab78e229c16f37409b38fd4179..2985fad95ebb29d434095d94cbf41fc2e7c3d3c2 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp @@ -41,6 +41,7 @@ //@HEADER */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT_HPP ) #define KOKKOS_ATOMIC_INCREMENT_HPP @@ -115,3 +116,4 @@ void atomic_increment(volatile T* a) { } // End of namespace Kokkos #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp index 6e48faa6948e808c3460b4408ebb85a75617d035..5b555e51a3dc10faf9537aa63033a5aa2e7762cc 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -428,3 +428,4 @@ struct Kokkos_Atomic_is_only_allowed_with_32bit_and_64bit_scalars<8> { }} // namespace Kokkos::Impl #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp index 14066e8be25f42e7a33b2f9261d90769dff6060d..ea6807a2e97f8f90c1c5df53ec12bc9fc0072563 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,12 +36,13 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ #ifndef KOKKOS_ATOMIC_WINDOWS_HPP #define KOKKOS_ATOMIC_WINDOWS_HPP + #ifdef _WIN32 #define NOMINMAX diff --git a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp index 7d7fd3d1334901f1cc57e554f6c46f7f17ca09c4..df16b3738b7abfea350ca4375c4b0e84973877b4 100644 --- a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp @@ -45,12 +45,38 @@ #define KOKKOS_BITOPS_HPP #include <Kokkos_Macros.hpp> -#include <stdint.h> +#include <cstdint> #include <climits> namespace Kokkos { namespace Impl { +/**\brief Find first zero bit. + * + * If none then return -1 ; + */ +KOKKOS_FORCEINLINE_FUNCTION +int bit_first_zero( unsigned i ) noexcept +{ + enum : unsigned { full = ~0u }; + +#if defined( __CUDA_ARCH__ ) + return full != i ? __ffs( ~i ) - 1 : -1 ; +#elif defined( KOKKOS_COMPILER_INTEL ) + return full != i ? _bit_scan_forward( ~i ) : -1 ; +#elif defined( KOKKOS_COMPILER_IBM ) + return full != i ? __cnttz4( ~i ) : -1 ; +#elif defined( KOKKOS_COMPILER_GNU ) || defined( __GNUC__ ) || defined( __GNUG__ ) + return full != i ? __builtin_ffs( ~i ) - 1 : -1 ; +#else + int offset = -1 ; + if ( full != i ) { + for ( offset = 0 ; i & ( 1 << offset ) ; ++offset ); + } + return offset ; +#endif +} + KOKKOS_FORCEINLINE_FUNCTION int bit_scan_forward( unsigned i ) { @@ -121,7 +147,16 @@ int bit_count( unsigned i ) #endif } +KOKKOS_INLINE_FUNCTION +unsigned integral_power_of_two_that_contains( const unsigned N ) +{ + const unsigned i = Kokkos::Impl::bit_scan_reverse( N ); + return ( (1u << i) < N ) ? i + 1 : i ; +} + + } // namespace Impl } // namespace Kokkos #endif // KOKKOS_BITOPS_HPP + diff --git a/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp b/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp index 8ee094675cb861f9daf2c8b054b6dbf7517b401d..79c348515be78d38315e64c3bbf69c115b05836c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp @@ -122,3 +122,4 @@ int mpi_local_rank_on_node() { } } + diff --git a/lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp b/lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp new file mode 100644 index 0000000000000000000000000000000000000000..92111c3c59263eeeeccdb59982febfa7755d7d62 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp @@ -0,0 +1,106 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CLOCKTIC_HPP +#define KOKKOS_CLOCKTIC_HPP + +#include <Kokkos_Macros.hpp> +#include <stdint.h> +#include <chrono> + +namespace Kokkos { +namespace Impl { + +/**\brief Quick query of clock register tics + * + * Primary use case is to, with low overhead, + * obtain a integral value that consistently varies + * across concurrent threads of execution within + * a parallel algorithm. + * This value is often used to "randomly" seed an + * attempt to acquire an indexed resource (e.g., bit) + * from an array of resources (e.g., bitset) such that + * concurrent threads will have high likelihood of + * having different index-seed values. + */ +KOKKOS_FORCEINLINE_FUNCTION +uint64_t clock_tic(void) noexcept +{ +#if defined( __CUDA_ARCH__ ) + + // Return value of 64-bit hi-res clock register. + + return clock64(); + +#elif defined( __i386__ ) || defined( __x86_64 ) + + // Return value of 64-bit hi-res clock register. + + unsigned a = 0, d = 0; + + __asm__ volatile( "rdtsc" : "=a" (a), "=d" (d) ); + + return ( (uint64_t) a ) | ( ( (uint64_t) d ) << 32 ); + +#elif defined( __powerpc ) || defined( __powerpc__ ) || \ + defined( __powerpc64__ ) || defined( __POWERPC__ ) || \ + defined( __ppc__ ) || defined( __ppc64__ ) + + unsigned int cycles = 0; + + asm volatile( "mftb %0" : "=r" (cycles) ); + + return (uint64_t) cycles; + +#else + + return (uint64_t) + std::chrono::high_resolution_clock::now().time_since_epoch().count(); + +#endif +} + +} // namespace Impl +} // namespace Kokkos + +#endif // KOKKOS_CLOCKTIC_HPP diff --git a/lib/kokkos/core/src/impl/Kokkos_ConcurrentBitset.hpp b/lib/kokkos/core/src/impl/Kokkos_ConcurrentBitset.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8f52087caa8a6b165a43d99fbaeb9f169c7632a5 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_ConcurrentBitset.hpp @@ -0,0 +1,357 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CONCURRENTBITSET_HPP +#define KOKKOS_CONCURRENTBITSET_HPP + +#include <stdint.h> +#include <Kokkos_Atomic.hpp> +#include <impl/Kokkos_BitOps.hpp> +#include <impl/Kokkos_ClockTic.hpp> + +namespace Kokkos { +namespace Impl { + +struct concurrent_bitset { +public: + + // 32 bits per integer value + + enum : uint32_t { bits_per_int_lg2 = 5 }; + enum : uint32_t { bits_per_int_mask = ( 1 << bits_per_int_lg2 ) - 1 }; + + // Buffer is uint32_t[ buffer_bound ] + // [ uint32_t { state_header | used_count } , uint32_t bits[*] ] + // + // Maximum bit count is 33 million (1u<<25): + // + // - Maximum bit set size occupies 1 Mbyte + // + // - State header can occupy bits [30-26] + // which can be the bit_count_lg2 + // + // - Accept at least 33 million concurrent calls to 'acquire' + // before risking an overflow race condition on a full bitset. + + enum : uint32_t { max_bit_count_lg2 = 25 }; + enum : uint32_t { max_bit_count = 1u << max_bit_count_lg2 }; + enum : uint32_t { state_shift = 26 }; + enum : uint32_t { state_used_mask = ( 1 << state_shift ) - 1 }; + enum : uint32_t { state_header_mask = uint32_t(0x001f) << state_shift }; + + KOKKOS_INLINE_FUNCTION static constexpr + uint32_t buffer_bound_lg2( uint32_t const bit_bound_lg2 ) noexcept + { + return bit_bound_lg2 <= max_bit_count_lg2 + ? 1 + ( 1u << ( bit_bound_lg2 > bits_per_int_lg2 + ? bit_bound_lg2 - bits_per_int_lg2 : 0 ) ) + : 0 ; + } + + /**\brief Initialize bitset buffer */ + KOKKOS_INLINE_FUNCTION static constexpr + uint32_t buffer_bound( uint32_t const bit_bound ) noexcept + { + return bit_bound <= max_bit_count + ? 1 + ( bit_bound >> bits_per_int_lg2 ) + + ( bit_bound & bits_per_int_mask ? 1 : 0 ) + : 0 ; + } + + /**\brief Claim any bit within the bitset bound. + * + * Return : ( which_bit , bit_count ) + * + * if success then + * bit_count is the atomic-count of claimed > 0 + * which_bit is the claimed bit >= 0 + * else if attempt failed due to filled buffer + * bit_count == which_bit == -1 + * else if attempt failed due to non-matching state_header + * bit_count == which_bit == -2 + * else if attempt failed due to max_bit_count_lg2 < bit_bound_lg2 + * or invalid state_header + * or (1u << bit_bound_lg2) <= bit + * bit_count == which_bit == -3 + * endif + * + * Recommended to have hint + * bit = Kokkos::Impl::clock_tic() & ((1u<<bit_bound_lg2) - 1) + */ + KOKKOS_INLINE_FUNCTION static + Kokkos::pair<int,int> + acquire_bounded_lg2( uint32_t volatile * const buffer + , uint32_t const bit_bound_lg2 + , uint32_t bit = 0 /* optional hint */ + , uint32_t const state_header = 0 /* optional header */ + ) noexcept + { + typedef Kokkos::pair<int,int> type ; + + const uint32_t bit_bound = 1 << bit_bound_lg2 ; + const uint32_t word_count = bit_bound >> bits_per_int_lg2 ; + + if ( ( max_bit_count_lg2 < bit_bound_lg2 ) || + ( state_header & ~state_header_mask ) || + ( bit_bound < bit ) ) { + return type(-3,-3); + } + + // Use potentially two fetch_add to avoid CAS loop. + // Could generate "racing" failure-to-acquire + // when is full at the atomic_fetch_add(+1) + // then a release occurs before the atomic_fetch_add(-1). + + const uint32_t state = (uint32_t) + Kokkos::atomic_fetch_add( (volatile int *) buffer , 1 ); + + const uint32_t state_error = + state_header != ( state & state_header_mask ); + + const uint32_t state_bit_used = state & state_used_mask ; + + if ( state_error || ( bit_bound <= state_bit_used ) ) { + Kokkos::atomic_fetch_add( (volatile int *) buffer , -1 ); + return state_error ? type(-2,-2) : type(-1,-1); + } + + // Do not update bit until count is visible: + + Kokkos::memory_fence(); + + // There is a zero bit available somewhere, + // now find the (first) available bit and set it. + + while(1) { + + const uint32_t word = bit >> bits_per_int_lg2 ; + const uint32_t mask = 1u << ( bit & bits_per_int_mask ); + const uint32_t prev = Kokkos::atomic_fetch_or(buffer + word + 1, mask); + + if ( ! ( prev & mask ) ) { + // Successfully claimed 'result.first' by + // atomically setting that bit. + return type( bit , state_bit_used + 1 ); + } + + // Failed race to set the selected bit + // Find a new bit to try. + + const int j = Kokkos::Impl::bit_first_zero( prev ); + + if ( 0 <= j ) { + bit = ( word << bits_per_int_lg2 ) | uint32_t(j); + } + else { + bit = + ( (word+1) < word_count ? ((word+1) << bits_per_int_lg2) : 0 ) + | ( bit & bits_per_int_mask ); + } + } + } + + /**\brief Claim any bit within the bitset bound. + * + * Return : ( which_bit , bit_count ) + * + * if success then + * bit_count is the atomic-count of claimed > 0 + * which_bit is the claimed bit >= 0 + * else if attempt failed due to filled buffer + * bit_count == which_bit == -1 + * else if attempt failed due to non-matching state_header + * bit_count == which_bit == -2 + * else if attempt failed due to max_bit_count_lg2 < bit_bound_lg2 + * or invalid state_header + * or bit_bound <= bit + * bit_count == which_bit == -3 + * endif + * + * Recommended to have hint + * bit = Kokkos::Impl::clock_tic() % bit_bound + */ + KOKKOS_INLINE_FUNCTION static + Kokkos::pair<int,int> + acquire_bounded( uint32_t volatile * const buffer + , uint32_t const bit_bound + , uint32_t bit = 0 /* optional hint */ + , uint32_t const state_header = 0 /* optional header */ + ) noexcept + { + typedef Kokkos::pair<int,int> type ; + + if ( ( max_bit_count < bit_bound ) || + ( state_header & ~state_header_mask ) || + ( bit_bound <= bit ) ) { + return type(-3,-3); + } + + const uint32_t word_count = bit_bound >> bits_per_int_lg2 ; + + // Use potentially two fetch_add to avoid CAS loop. + // Could generate "racing" failure-to-acquire + // when is full at the atomic_fetch_add(+1) + // then a release occurs before the atomic_fetch_add(-1). + + const uint32_t state = (uint32_t) + Kokkos::atomic_fetch_add( (volatile int *) buffer , 1 ); + + const uint32_t state_error = + state_header != ( state & state_header_mask ); + + const uint32_t state_bit_used = state & state_used_mask ; + + if ( state_error || ( bit_bound <= state_bit_used ) ) { + Kokkos::atomic_fetch_add( (volatile int *) buffer , -1 ); + return state_error ? type(-2,-2) : type(-1,-1); + } + + // Do not update bit until count is visible: + + Kokkos::memory_fence(); + + // There is a zero bit available somewhere, + // now find the (first) available bit and set it. + + while(1) { + + const uint32_t word = bit >> bits_per_int_lg2 ; + const uint32_t mask = 1u << ( bit & bits_per_int_mask ); + const uint32_t prev = Kokkos::atomic_fetch_or(buffer + word + 1, mask); + + if ( ! ( prev & mask ) ) { + // Successfully claimed 'result.first' by + // atomically setting that bit. + return type( bit , state_bit_used + 1 ); + } + + // Failed race to set the selected bit + // Find a new bit to try. + + const int j = Kokkos::Impl::bit_first_zero( prev ); + + if ( 0 <= j ) { + bit = (word << bits_per_int_lg2 ) | uint32_t(j); + } + + if ( ( j < 0 ) || ( bit_bound <= bit ) ) { + bit = + ( (word+1) < word_count ? ((word+1) << bits_per_int_lg2) : 0 ) + | ( bit & bits_per_int_mask ); + } + } + } + + /**\brief + * + * Requires: 'bit' previously acquired and has not yet been released. + * + * Returns: + * 0 <= used count after successful release + * -1 bit was already released + * -2 state_header error + */ + KOKKOS_INLINE_FUNCTION static + int release( uint32_t volatile * const buffer + , uint32_t const bit + , uint32_t const state_header = 0 /* optional header */ + ) noexcept + { + if ( state_header != ( state_header_mask & *buffer ) ) { return -2 ; } + + const uint32_t mask = 1u << ( bit & bits_per_int_mask ); + const uint32_t prev = + Kokkos::atomic_fetch_and( buffer + ( bit >> bits_per_int_lg2 ) + 1 + , ~mask + ); + + if ( ! ( prev & mask ) ) { return -1 ; } + + // Do not update count until bit clear is visible + Kokkos::memory_fence(); + + const int count = + Kokkos::atomic_fetch_add( (volatile int *) buffer , -1 ); + + return ( count & state_used_mask ) - 1 ; + } + + /**\brief + * + * Requires: Bit within bounds and not already set. + * + * Returns: + * 0 <= used count after successful release + * -1 bit was already released + * -2 bit or state_header error + */ + KOKKOS_INLINE_FUNCTION static + int set( uint32_t volatile * const buffer + , uint32_t const bit + , uint32_t const state_header = 0 /* optional header */ + ) noexcept + { + if ( state_header != ( state_header_mask & *buffer ) ) { return -2 ; } + + const uint32_t mask = 1u << ( bit & bits_per_int_mask ); + const uint32_t prev = + Kokkos::atomic_fetch_or( buffer + ( bit >> bits_per_int_lg2 ) + 1 + , mask + ); + + if ( ! ( prev & mask ) ) { return -1 ; } + + // Do not update count until bit clear is visible + Kokkos::memory_fence(); + + const int count = + Kokkos::atomic_fetch_add( (volatile int *) buffer , -1 ); + + return ( count & state_used_mask ) - 1 ; + } +}; + +}} // namespace Kokkos::Impl + +#endif /* #ifndef KOKKOS_CONCURRENTBITSET_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index 7c38430c44986d5dcffad9c03c9f587ffdc91863..f0ff6d78ec21134292d85c8fd230c549bb1a961c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -46,6 +46,7 @@ #include <cctype> #include <cstring> #include <iostream> +#include <sstream> #include <cstdlib> //---------------------------------------------------------------------------- @@ -75,10 +76,10 @@ setenv("MEMKIND_HBW_NODES", "1", 0); #endif // Protect declarations, to prevent "unused variable" warnings. -#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMPTARGET ) const int num_threads = args.num_threads; const int use_numa = args.num_numa; -#endif // defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_PTHREAD ) +#endif // defined( KOKKOS_ENABLE_OPENMP ) || defined( KOKKOS_ENABLE_THREADS ) #if defined( KOKKOS_ENABLE_CUDA ) const int use_gpu = args.device_id; #endif // defined( KOKKOS_ENABLE_CUDA ) @@ -103,7 +104,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0); } #endif -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) { if(num_threads>0) { @@ -135,6 +136,25 @@ setenv("MEMKIND_HBW_NODES", "1", 0); } #endif +#if defined( KOKKOS_ENABLE_OPENMPTARGET ) + if( Impl::is_same< Kokkos::Experimental::OpenMPTarget , Kokkos::DefaultExecutionSpace >::value ) { + if(num_threads>0) { + if(use_numa>0) { + Kokkos::Experimental::OpenMPTarget::initialize(num_threads,use_numa); + } + else { + Kokkos::Experimental::OpenMPTarget::initialize(num_threads); + } + } else { + Kokkos::Experimental::OpenMPTarget::initialize(); + } + //std::cout << "Kokkos::initialize() fyi: OpenMP enabled and initialized" << std::endl ; + } + else { + //std::cout << "Kokkos::initialize() fyi: OpenMP enabled but not initialized" << std::endl ; + } +#endif + #if defined( KOKKOS_ENABLE_CUDA ) if( std::is_same< Kokkos::Cuda , Kokkos::DefaultExecutionSpace >::value || 0 < use_gpu ) { if (use_gpu > -1) { @@ -166,6 +186,13 @@ void finalize_internal( const bool all_spaces = false ) } #endif +#if defined( KOKKOS_ENABLE_OPENMPTARGET ) + if( std::is_same< Kokkos::Experimental::OpenMPTarget , Kokkos::DefaultExecutionSpace >::value || all_spaces ) { + if(Kokkos::Experimental::OpenMPTarget::is_initialized()) + Kokkos::Experimental::OpenMPTarget::finalize(); + } +#endif + #if defined( KOKKOS_ENABLE_OPENMP ) if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value || @@ -175,7 +202,7 @@ void finalize_internal( const bool all_spaces = false ) } #endif -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value || all_spaces ) { @@ -210,7 +237,7 @@ void fence_internal() } #endif -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) if( std::is_same< Kokkos::Threads , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::Threads , Kokkos::HostSpace::execution_space >::value ) { Kokkos::Threads::fence(); @@ -226,6 +253,39 @@ void fence_internal() } +bool check_arg(char const* arg, char const* expected) { + std::size_t arg_len = std::strlen(arg); + std::size_t exp_len = std::strlen(expected); + if (arg_len < exp_len) return false; + if (std::strncmp(arg, expected, exp_len) != 0) return false; + if (arg_len == exp_len) return true; + /* if expected is "--threads", ignore "--threads-for-application" + by checking this character ---------^ + to see if it continues to make a longer name */ + if (std::isalnum(arg[exp_len]) || arg[exp_len] == '-' || arg[exp_len] == '_') { + return false; + } + return true; +} + +bool check_int_arg(char const* arg, char const* expected, int* value) { + if (!check_arg(arg, expected)) return false; + std::size_t arg_len = std::strlen(arg); + std::size_t exp_len = std::strlen(expected); + bool okay = true; + if (arg_len == exp_len || arg[exp_len] != '=') okay = false; + char const* number = arg + exp_len + 1; + if (!Impl::is_unsigned_int(number) || strlen(number) == 0) okay = false; + *value = std::atoi(number); + if (!okay) { + std::ostringstream ss; + ss << "Error: expecting an '=INT' after command line argument '" << expected << "'"; + ss << ". Raised by Kokkos::initialize(int narg, char* argc[])."; + Impl::throw_runtime_exception( ss.str() ); + } + return true; +} + } // namespace } // namespace Impl } // namespace Kokkos @@ -248,76 +308,31 @@ void initialize(int& narg, char* arg[]) int iarg = 0; while (iarg < narg) { - if ((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || (strncmp(arg[iarg],"--threads",9) == 0)) { - //Find the number of threads (expecting --threads=XX) - if (!((strncmp(arg[iarg],"--kokkos-threads=",17) == 0) || (strncmp(arg[iarg],"--threads=",10) == 0))) - Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[])."); - - char* number = strchr(arg[iarg],'=')+1; - - if(!Impl::is_unsigned_int(number) || (strlen(number)==0)) - Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--threads/--kokkos-threads'. Raised by Kokkos::initialize(int narg, char* argc[])."); - - if((strncmp(arg[iarg],"--kokkos-threads",16) == 0) || !kokkos_threads_found) - num_threads = atoi(number); - - //Remove the --kokkos-threads argument from the list but leave --threads - if(strncmp(arg[iarg],"--kokkos-threads",16) == 0) { - for(int k=iarg;k<narg-1;k++) { - arg[k] = arg[k+1]; - } - kokkos_threads_found=1; - narg--; - } else { - iarg++; + if (Impl::check_int_arg(arg[iarg], "--kokkos-threads", &num_threads)) { + for(int k=iarg;k<narg-1;k++) { + arg[k] = arg[k+1]; } - } else if ((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || (strncmp(arg[iarg],"--numa",6) == 0)) { - //Find the number of numa (expecting --numa=XX) - if (!((strncmp(arg[iarg],"--kokkos-numa=",14) == 0) || (strncmp(arg[iarg],"--numa=",7) == 0))) - Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[])."); - - char* number = strchr(arg[iarg],'=')+1; - - if(!Impl::is_unsigned_int(number) || (strlen(number)==0)) - Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--numa/--kokkos-numa'. Raised by Kokkos::initialize(int narg, char* argc[])."); - - if((strncmp(arg[iarg],"--kokkos-numa",13) == 0) || !kokkos_numa_found) - numa = atoi(number); - - //Remove the --kokkos-numa argument from the list but leave --numa - if(strncmp(arg[iarg],"--kokkos-numa",13) == 0) { - for(int k=iarg;k<narg-1;k++) { - arg[k] = arg[k+1]; - } - kokkos_numa_found=1; - narg--; - } else { - iarg++; + kokkos_threads_found=1; + narg--; + } else if (!kokkos_threads_found && Impl::check_int_arg(arg[iarg], "--threads", &num_threads)) { + iarg++; + } else if (Impl::check_int_arg(arg[iarg], "--kokkos-numa", &numa)) { + for(int k=iarg;k<narg-1;k++) { + arg[k] = arg[k+1]; } - } else if ((strncmp(arg[iarg],"--kokkos-device",15) == 0) || (strncmp(arg[iarg],"--device",8) == 0)) { - //Find the number of device (expecting --device=XX) - if (!((strncmp(arg[iarg],"--kokkos-device=",16) == 0) || (strncmp(arg[iarg],"--device=",9) == 0))) - Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[])."); - - char* number = strchr(arg[iarg],'=')+1; - - if(!Impl::is_unsigned_int(number) || (strlen(number)==0)) - Impl::throw_runtime_exception("Error: expecting an '=INT' after command line argument '--device/--kokkos-device'. Raised by Kokkos::initialize(int narg, char* argc[])."); - - if((strncmp(arg[iarg],"--kokkos-device",15) == 0) || !kokkos_device_found) - device = atoi(number); - - //Remove the --kokkos-device argument from the list but leave --device - if(strncmp(arg[iarg],"--kokkos-device",15) == 0) { - for(int k=iarg;k<narg-1;k++) { - arg[k] = arg[k+1]; - } - kokkos_device_found=1; - narg--; - } else { - iarg++; + kokkos_numa_found=1; + narg--; + } else if (!kokkos_numa_found && Impl::check_int_arg(arg[iarg], "--numa", &numa)) { + iarg++; + } else if (Impl::check_int_arg(arg[iarg], "--kokkos-device", &device)) { + for(int k=iarg;k<narg-1;k++) { + arg[k] = arg[k+1]; } - } else if ((strncmp(arg[iarg],"--kokkos-ndevices",17) == 0) || (strncmp(arg[iarg],"--ndevices",10) == 0)) { + kokkos_device_found=1; + narg--; + } else if (!kokkos_device_found && Impl::check_int_arg(arg[iarg], "--device", &device)) { + iarg++; + } else if (Impl::check_arg(arg[iarg], "--kokkos-ndevices") || Impl::check_arg(arg[iarg], "--ndevices")) { //Find the number of device (expecting --device=XX) if (!((strncmp(arg[iarg],"--kokkos-ndevices=",18) == 0) || (strncmp(arg[iarg],"--ndevices=",11) == 0))) @@ -511,20 +526,8 @@ void print_configuration( std::ostream & out , const bool detail ) #else msg << "no" << std::endl; #endif - msg << " KOKKOS_ENABLE_PTHREAD: "; -#ifdef KOKKOS_ENABLE_PTHREAD - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_STDTHREAD: "; -#ifdef KOKKOS_ENABLE_STDTHREAD - msg << "yes" << std::endl; -#else - msg << "no" << std::endl; -#endif - msg << " KOKKOS_ENABLE_WINTHREAD: "; -#ifdef KOKKOS_ENABLE_WINTHREAD + msg << " KOKKOS_ENABLE_THREADS: "; +#ifdef KOKKOS_ENABLE_THREADS msg << "yes" << std::endl; #else msg << "no" << std::endl; @@ -754,7 +757,7 @@ void print_configuration( std::ostream & out , const bool detail ) #ifdef KOKKOS_ENABLE_OPENMP OpenMP::print_configuration(msg, detail); #endif -#if defined( KOKKOS_ENABLE_PTHREAD ) || defined( WINTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) Threads::print_configuration(msg, detail); #endif #ifdef KOKKOS_ENABLE_QTHREADS diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.cpp b/lib/kokkos/core/src/impl/Kokkos_Error.cpp index 36224990d048c2e0394889390cfe78cf826a5fdc..3e9eebbc4cec1cdb830d529886e32d34efd7ba14 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Error.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Error.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,14 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> +#include <cstdio> +#include <cstring> +#include <cstdlib> #include <ostream> #include <sstream> @@ -127,7 +127,8 @@ std::string human_memory_size(size_t arg_bytes) #include <execinfo.h> #include <cxxabi.h> #include <dlfcn.h> -#include <stdlib.h> + +#include <cstdlib> namespace Kokkos { namespace Impl { diff --git a/lib/kokkos/core/src/impl/Kokkos_Error.hpp b/lib/kokkos/core/src/impl/Kokkos_Error.hpp index 7736307dce8b08f4bcccd2aab3c1ccf2ee061067..4bc2637c57980502c4fe7b8a5e1c4f86db43bb86 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Error.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Error.hpp @@ -75,7 +75,9 @@ void abort( const char * const message ) { #ifdef __CUDA_ARCH__ Kokkos::Impl::cuda_abort(message); #else - Kokkos::Impl::host_abort(message); + #ifndef KOKKOS_ENABLE_OPENMPTARGET + Kokkos::Impl::host_abort(message); + #endif #endif } diff --git a/lib/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp b/lib/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp index cabf5a3caccb0bd0beca292f5dcc895867bb1a2e..5a004193d09d0fc94719e55f4c3440ca9bfd3de3 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp @@ -1,3 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + #include <Kokkos_Core.hpp> namespace Kokkos { namespace Impl { @@ -17,3 +60,4 @@ Impl::PerThreadValue PerThread(const int& arg) } } + diff --git a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp index 66c3157c3aba4f6ae4b187d859790986c2458316..dc75fb072f9d1c074824331b02af6b7f0fae3706 100644 --- a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp @@ -55,6 +55,46 @@ namespace Kokkos { namespace Impl { +template< class FunctorType, class Enable = void> +struct ReduceFunctorHasInit { + enum {value = false}; +}; + +template< class FunctorType> +struct ReduceFunctorHasInit<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::init ) >::type > { + enum {value = true}; +}; + +template< class FunctorType, class Enable = void> +struct ReduceFunctorHasJoin { + enum {value = false}; +}; + +template< class FunctorType> +struct ReduceFunctorHasJoin<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::join ) >::type > { + enum {value = true}; +}; + +template< class FunctorType, class Enable = void> +struct ReduceFunctorHasFinal { + enum {value = false}; +}; + +template< class FunctorType> +struct ReduceFunctorHasFinal<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::final ) >::type > { + enum {value = true}; +}; + +template< class FunctorType, class Enable = void> + struct ReduceFunctorHasShmemSize { + enum {value = false}; +}; + +template< class FunctorType> +struct ReduceFunctorHasShmemSize<FunctorType, typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type > { + enum {value = true}; +}; + template< class FunctorType , class ArgTag , class Enable = void > struct FunctorDeclaresValueType : public Impl::false_type {}; @@ -63,6 +103,21 @@ struct FunctorDeclaresValueType< FunctorType , ArgTag , typename Impl::enable_if_type< typename FunctorType::value_type >::type > : public Impl::true_type {}; +template< class FunctorType, bool Enable = + ( FunctorDeclaresValueType<FunctorType,void>::value) || + ( ReduceFunctorHasInit<FunctorType>::value ) || + ( ReduceFunctorHasJoin<FunctorType>::value ) || + ( ReduceFunctorHasFinal<FunctorType>::value ) || + ( ReduceFunctorHasShmemSize<FunctorType>::value ) + > +struct IsNonTrivialReduceFunctor { + enum {value = false}; +}; + +template< class FunctorType> +struct IsNonTrivialReduceFunctor<FunctorType, true> { + enum {value = true}; +}; /** \brief Query Functor and execution policy argument tag for value type. * diff --git a/lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp b/lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp index b425b3f19fa159925364d20ac6d5bc85b45bebae..aabc291a98d704fc9a0631aa593f4550dc57f628 100644 --- a/lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp @@ -48,7 +48,6 @@ #include <Kokkos_Core_fwd.hpp> #include <impl/Kokkos_Traits.hpp> #include <impl/Kokkos_Tags.hpp> -#include <impl/Kokkos_Reducer.hpp> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -101,6 +100,29 @@ private: using Tag = typename has_work_tag<>::type ; using WTag = typename has_work_tag<>::wtag ; + //---------------------------------------- + // Check for T::execution_space + + template< typename T , typename = std::false_type > + struct has_execution_space { using type = void ; enum { value = false }; }; + + template< typename T > + struct has_execution_space + < T , typename std::is_same< typename T::execution_space , void >::type > + { + using type = typename T::execution_space ; + enum { value = true }; + }; + + using policy_has_space = has_execution_space< Policy > ; + using functor_has_space = has_execution_space< Functor > ; + + static_assert( ! policy_has_space::value || + ! functor_has_space::value || + std::is_same< typename policy_has_space::type + , typename functor_has_space::type >::value + , "Execution Policy and Functor execution space must match" ); + //---------------------------------------- // Check for Functor::value_type, which is either a simple type T or T[] @@ -190,6 +212,16 @@ private: public: + using execution_space = typename std::conditional + < functor_has_space::value + , typename functor_has_space::type + , typename std::conditional + < policy_has_space::value + , typename policy_has_space::type + , Kokkos::DefaultExecutionSpace + >::type + >::type ; + using value_type = typename std::remove_extent< candidate_type >::type ; static_assert( ! std::is_const< value_type >::value @@ -214,14 +246,14 @@ public: private: template< bool IsArray , class FF > - KOKKOS_INLINE_FUNCTION static + KOKKOS_INLINE_FUNCTION static constexpr typename std::enable_if< IsArray , unsigned >::type get_length( FF const & f ) { return f.value_count ; } template< bool IsArray , class FF > - KOKKOS_INLINE_FUNCTION static + KOKKOS_INLINE_FUNCTION static constexpr typename std::enable_if< ! IsArray , unsigned >::type - get_length( FF const & ) { return 1 ; } + get_length( FF const & ) { return candidate_is_void ? 0 : 1 ; } public: @@ -229,25 +261,25 @@ public: ! candidate_is_array ? sizeof(ValueType) : 0 }; - KOKKOS_FORCEINLINE_FUNCTION static + KOKKOS_FORCEINLINE_FUNCTION static constexpr unsigned value_count( const Functor & f ) { return FunctorAnalysis::template get_length< candidate_is_array >(f); } - KOKKOS_FORCEINLINE_FUNCTION static + KOKKOS_FORCEINLINE_FUNCTION static constexpr unsigned value_size( const Functor & f ) { return FunctorAnalysis::template get_length< candidate_is_array >(f) * sizeof(ValueType); } //---------------------------------------- template< class Unknown > - KOKKOS_FORCEINLINE_FUNCTION static + KOKKOS_FORCEINLINE_FUNCTION static constexpr unsigned value_count( const Unknown & ) - { return 1 ; } + { return candidate_is_void ? 0 : 1 ; } template< class Unknown > - KOKKOS_FORCEINLINE_FUNCTION static + KOKKOS_FORCEINLINE_FUNCTION static constexpr unsigned value_size( const Unknown & ) - { return sizeof(ValueType); } + { return candidate_is_void ? 0 : sizeof(ValueType); } private: @@ -277,16 +309,16 @@ private: typedef volatile const ValueType & cvref_type ; KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( vref_type , cvref_type ) ); + void enable_if( void (F::*)( vref_type , cvref_type ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( vref_type , cvref_type ) ); KOKKOS_INLINE_FUNCTION static - void join( F const & f + void join( F const * const f , ValueType volatile * dst , ValueType volatile const * src ) - { f.join( *dst , *src ); } + { f->join( *dst , *src ); } }; template< class F > @@ -296,16 +328,16 @@ private: typedef volatile const ValueType * cvref_type ; KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( vref_type , cvref_type ) ); + void enable_if( void (F::*)( vref_type , cvref_type ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( vref_type , cvref_type ) ); KOKKOS_INLINE_FUNCTION static - void join( F const & f + void join( F const * const f , ValueType volatile * dst , ValueType volatile const * src ) - { f.join( dst , src ); } + { f->join( dst , src ); } }; template< class F > @@ -315,22 +347,22 @@ private: typedef volatile const ValueType & cvref_type ; KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag , vref_type , cvref_type ) ); + void enable_if( void (F::*)( WTag , vref_type , cvref_type ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( WTag , vref_type , cvref_type ) ); KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag const & , vref_type , cvref_type ) ); + void enable_if( void (F::*)( WTag const & , vref_type , cvref_type ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( WTag const & , vref_type , cvref_type ) ); KOKKOS_INLINE_FUNCTION static - void join( F const & f + void join( F const * const f , ValueType volatile * dst , ValueType volatile const * src ) - { f.join( WTag() , *dst , *src ); } + { f->join( WTag() , *dst , *src ); } }; template< class F > @@ -340,22 +372,22 @@ private: typedef volatile const ValueType * cvref_type ; KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag , vref_type , cvref_type ) ); + void enable_if( void (F::*)( WTag , vref_type , cvref_type ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( WTag , vref_type , cvref_type ) ); KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag const & , vref_type , cvref_type ) ); + void enable_if( void (F::*)( WTag const & , vref_type , cvref_type ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( WTag const & , vref_type , cvref_type ) ); KOKKOS_INLINE_FUNCTION static - void join( F const & f + void join( F const * const f , ValueType volatile * dst , ValueType volatile const * src ) - { f.join( WTag() , dst , src ); } + { f->join( WTag() , dst , src ); } }; @@ -364,12 +396,14 @@ private: , typename = void > struct DeduceJoin { + enum { value = false }; + KOKKOS_INLINE_FUNCTION static - void join( F const & f + void join( F const * const f , ValueType volatile * dst , ValueType volatile const * src ) { - const int n = FunctorAnalysis::value_count( f ); + const int n = FunctorAnalysis::value_count( *f ); for ( int i = 0 ; i < n ; ++i ) dst[i] += src[i]; } }; @@ -377,8 +411,10 @@ private: template< class F > struct DeduceJoin< F , DISABLE , void > { + enum { value = false }; + KOKKOS_INLINE_FUNCTION static - void join( F const & + void join( F const * const , ValueType volatile * , ValueType volatile const * ) {} }; @@ -386,7 +422,8 @@ private: template< class F , INTERFACE I > struct DeduceJoin< F , I , decltype( has_join_function<F,I>::enable_if( & F::join ) ) > - : public has_join_function<F,I> {}; + : public has_join_function<F,I> + { enum { value = true }; }; //---------------------------------------- @@ -397,38 +434,38 @@ private: struct has_init_function< F , NO_TAG_NOT_ARRAY > { KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( ValueType & ) ); + void enable_if( void (F::*)( ValueType & ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( ValueType & ) ); KOKKOS_INLINE_FUNCTION static - void init( F const & f , ValueType * dst ) - { f.init( *dst ); } + void init( F const * const f , ValueType * dst ) + { f->init( *dst ); } }; template< class F > struct has_init_function< F , NO_TAG_IS_ARRAY > { KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( ValueType * ) ); + void enable_if( void (F::*)( ValueType * ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( ValueType * ) ); KOKKOS_INLINE_FUNCTION static - void init( F const & f , ValueType * dst ) - { f.init( dst ); } + void init( F const * const f , ValueType * dst ) + { f->init( dst ); } }; template< class F > struct has_init_function< F , HAS_TAG_NOT_ARRAY > { KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag , ValueType & ) ); + void enable_if( void (F::*)( WTag , ValueType & ) const ); KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag const & , ValueType & ) ); + void enable_if( void (F::*)( WTag const & , ValueType & ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( WTag , ValueType & ) ); @@ -437,18 +474,18 @@ private: void enable_if( void (*)( WTag const & , ValueType & ) ); KOKKOS_INLINE_FUNCTION static - void init( F const & f , ValueType * dst ) - { f.init( WTag(), *dst ); } + void init( F const * const f , ValueType * dst ) + { f->init( WTag(), *dst ); } }; template< class F > struct has_init_function< F , HAS_TAG_IS_ARRAY > { KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag , ValueType * ) ); + void enable_if( void (F::*)( WTag , ValueType * ) const ); KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag const & , ValueType * ) ); + void enable_if( void (F::*)( WTag const & , ValueType * ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( WTag , ValueType * ) ); @@ -457,8 +494,8 @@ private: void enable_if( void (*)( WTag const & , ValueType * ) ); KOKKOS_INLINE_FUNCTION static - void init( F const & f , ValueType * dst ) - { f.init( WTag(), dst ); } + void init( F const * const f , ValueType * dst ) + { f->init( WTag(), dst ); } }; template< class F = Functor @@ -466,87 +503,29 @@ private: , typename = void > struct DeduceInit { + enum { value = false }; + KOKKOS_INLINE_FUNCTION static - void init( F const & , ValueType * dst ) { new(dst) ValueType(); } + void init( F const * const , ValueType * dst ) { new(dst) ValueType(); } }; template< class F > struct DeduceInit< F , DISABLE , void > { + enum { value = false }; + KOKKOS_INLINE_FUNCTION static - void init( F const & , ValueType * ) {} + void init( F const * const , ValueType * ) {} }; template< class F , INTERFACE I > struct DeduceInit< F , I , decltype( has_init_function<F,I>::enable_if( & F::init ) ) > - : public has_init_function<F,I> {}; - - //---------------------------------------- - -public: - - struct Reducer - { - private: - - Functor const & m_functor ; - ValueType * const m_result ; - int const m_length ; - - public: - - using reducer = Reducer ; - using value_type = FunctorAnalysis::value_type ; - using memory_space = void ; - using reference_type = FunctorAnalysis::reference_type ; - - KOKKOS_INLINE_FUNCTION - void join( ValueType volatile * dst - , ValueType volatile const * src ) const noexcept - { DeduceJoin<>::join( m_functor , dst , src ); } - - KOKKOS_INLINE_FUNCTION - void init( ValueType * dst ) const noexcept - { DeduceInit<>::init( m_functor , dst ); } - - KOKKOS_INLINE_FUNCTION explicit - constexpr Reducer( Functor const & arg_functor - , ValueType * arg_value = 0 - , int arg_length = 0 ) noexcept - : m_functor( arg_functor ), m_result(arg_value), m_length(arg_length) {} - - KOKKOS_INLINE_FUNCTION - constexpr int length() const noexcept { return m_length ; } - - KOKKOS_INLINE_FUNCTION - ValueType & operator[]( int i ) const noexcept - { return m_result[i]; } - - private: - - template< bool IsArray > - constexpr - typename std::enable_if< IsArray , ValueType * >::type - ref() const noexcept { return m_result ; } - - template< bool IsArray > - constexpr - typename std::enable_if< ! IsArray , ValueType & >::type - ref() const noexcept { return *m_result ; } - - public: - - KOKKOS_INLINE_FUNCTION - auto result() const noexcept - -> decltype( Reducer::template ref< candidate_is_array >() ) - { return Reducer::template ref< candidate_is_array >(); } - }; + : public has_init_function<F,I> + { enum { value = true }; }; //---------------------------------------- -private: - template< class , INTERFACE > struct has_final_function ; @@ -555,14 +534,14 @@ private: struct has_final_function< F , NO_TAG_NOT_ARRAY > { KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( ValueType & ) ); + void enable_if( void (F::*)( ValueType & ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( ValueType & ) ); KOKKOS_INLINE_FUNCTION static - void final( F const & f , ValueType * dst ) - { f.final( *dst ); } + void final( F const * const f , ValueType * dst ) + { f->final( *dst ); } }; // No tag, is array @@ -570,14 +549,14 @@ private: struct has_final_function< F , NO_TAG_IS_ARRAY > { KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( ValueType * ) ); + void enable_if( void (F::*)( ValueType * ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( ValueType * ) ); KOKKOS_INLINE_FUNCTION static - void final( F const & f , ValueType * dst ) - { f.final( dst ); } + void final( F const * const f , ValueType * dst ) + { f->final( dst ); } }; // Has tag, not array @@ -585,10 +564,10 @@ private: struct has_final_function< F , HAS_TAG_NOT_ARRAY > { KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag , ValueType & ) ); + void enable_if( void (F::*)( WTag , ValueType & ) const ); KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag const & , ValueType & ) ); + void enable_if( void (F::*)( WTag const & , ValueType & ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( WTag , ValueType & ) ); @@ -597,8 +576,8 @@ private: void enable_if( void (*)( WTag const & , ValueType & ) ); KOKKOS_INLINE_FUNCTION static - void final( F const & f , ValueType * dst ) - { f.final( WTag(), *dst ); } + void final( F const * const f , ValueType * dst ) + { f->final( WTag(), *dst ); } }; // Has tag, is array @@ -606,10 +585,10 @@ private: struct has_final_function< F , HAS_TAG_IS_ARRAY > { KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag , ValueType * ) ); + void enable_if( void (F::*)( WTag , ValueType * ) const ); KOKKOS_INLINE_FUNCTION static - void enable_if( void (F::*)( WTag const & , ValueType * ) ); + void enable_if( void (F::*)( WTag const & , ValueType * ) const ); KOKKOS_INLINE_FUNCTION static void enable_if( void (*)( WTag , ValueType * ) ); @@ -618,8 +597,8 @@ private: void enable_if( void (*)( WTag const & , ValueType * ) ); KOKKOS_INLINE_FUNCTION static - void final( F const & f , ValueType * dst ) - { f.final( WTag(), dst ); } + void final( F const * const f , ValueType * dst ) + { f->final( WTag(), dst ); } }; template< class F = Functor @@ -627,20 +606,139 @@ private: , typename = void > struct DeduceFinal { + enum { value = false }; + KOKKOS_INLINE_FUNCTION - static void final( F const & , ValueType * ) {} + static void final( F const * const , ValueType * ) {} }; template< class F , INTERFACE I > struct DeduceFinal< F , I , decltype( has_final_function<F,I>::enable_if( & F::final ) ) > - : public has_init_function<F,I> {}; + : public has_final_function<F,I> + { enum { value = true }; }; + + //---------------------------------------- + + template< class F = Functor , typename = void > + struct DeduceTeamShmem + { + enum { value = false }; + + static size_t team_shmem_size( F const & , int ) { return 0 ; } + }; + + template< class F > + struct DeduceTeamShmem< F , typename std::enable_if< 0 < sizeof( & F::team_shmem_size ) >::type > + { + enum { value = true }; + + static size_t team_shmem_size( F const * const f , int team_size ) + { return f->team_shmem_size( team_size ); } + }; + + template< class F > + struct DeduceTeamShmem< F , typename std::enable_if< 0 < sizeof( & F::shmem_size ) >::type > + { + enum { value = true }; + + static size_t team_shmem_size( F const * const f , int team_size ) + { return f->shmem_size( team_size ); } + }; + + //---------------------------------------- public: - static void final( Functor const & f , ValueType * result ) - { DeduceFinal<>::final( f , result ); } + inline static + size_t team_shmem_size( Functor const & f ) + { return DeduceTeamShmem<>::team_shmem_size( f ); } + + //---------------------------------------- + + enum { has_join_member_function = DeduceJoin<>::value }; + enum { has_init_member_function = DeduceInit<>::value }; + enum { has_final_member_function = DeduceFinal<>::value }; + + + template< class MemorySpace = typename execution_space::memory_space > + struct Reducer + { + private: + + Functor const * const m_functor ; + ValueType * const m_result ; + + template< bool IsArray > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< IsArray , FunctorAnalysis::ValueType * >::type + ref() const noexcept { return m_result ; } + + template< bool IsArray > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< ! IsArray , FunctorAnalysis::ValueType & >::type + ref() const noexcept { return *m_result ; } + + template< bool IsArray > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< IsArray , int >::type + len() const noexcept { return m_functor->value_count ; } + + template< bool IsArray > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< ! IsArray , int >::type + len() const noexcept { return candidate_is_void ? 0 : 1 ; } + + public: + + using reducer = Reducer ; + using value_type = FunctorAnalysis::value_type ; + using memory_space = MemorySpace ; + using reference_type = FunctorAnalysis::reference_type ; + using functor_type = Functor ; // Adapts a functor + + KOKKOS_INLINE_FUNCTION constexpr + value_type * data() const noexcept { return m_result ; } + + KOKKOS_INLINE_FUNCTION constexpr + reference_type reference() const noexcept + { return Reducer::template ref< candidate_is_array >(); } + + KOKKOS_INLINE_FUNCTION constexpr + int length() const noexcept + { return Reducer::template len< candidate_is_array >(); } + KOKKOS_INLINE_FUNCTION + void copy( ValueType * const dst + , ValueType const * const src ) const noexcept + { for ( int i = 0 ; i < Reducer::template len< candidate_is_array >() ; ++i ) dst[i] = src[i] ; } + + KOKKOS_INLINE_FUNCTION + void join( ValueType volatile * dst + , ValueType volatile const * src ) const noexcept + { DeduceJoin<>::join( m_functor , dst , src ); } + + KOKKOS_INLINE_FUNCTION + void init( ValueType * dst ) const noexcept + { DeduceInit<>::init( m_functor , dst ); } + + KOKKOS_INLINE_FUNCTION + void final( ValueType * dst ) const noexcept + { DeduceFinal<>::final( m_functor , dst ); } + + Reducer( Reducer const & ) = default ; + Reducer( Reducer && ) = default ; + Reducer & operator = ( Reducer const & ) = delete ; + Reducer & operator = ( Reducer && ) = delete ; + + template< class S > + using rebind = Reducer< S > ; + + KOKKOS_INLINE_FUNCTION explicit constexpr + Reducer( Functor const * arg_functor = 0 + , ValueType * arg_value = 0 ) noexcept + : m_functor(arg_functor), m_result(arg_value) {} + }; }; } // namespace Impl diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp index eb1f5ce96c28fa05d70dd2bf840133688d82b247..8cb7430035885a0c779630c212516f4c39256de2 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp @@ -44,11 +44,10 @@ #include <Kokkos_Macros.hpp> - -#include <stddef.h> -#include <stdlib.h> -#include <stdint.h> -#include <memory.h> +#include <cstddef> +#include <cstdlib> +#include <cstdint> +#include <cstring> #include <iostream> #include <sstream> @@ -397,3 +396,4 @@ void unlock_address_hbw_space(void* ptr) { } } #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp index 67be86c9a3ed8595a35915f06a4b8e4ea5ded0b3..2a5c34c375c5b0c83f9684cf14e3512a8e1864b8 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -46,6 +46,7 @@ #if defined(KOKKOS_ENABLE_PROFILING) #include <impl/Kokkos_Profiling_Interface.hpp> #endif + /*--------------------------------------------------------------------------*/ #if defined( __INTEL_COMPILER ) && ! defined ( KOKKOS_ENABLE_CUDA ) @@ -85,10 +86,10 @@ /*--------------------------------------------------------------------------*/ -#include <stddef.h> -#include <stdlib.h> -#include <stdint.h> -#include <memory.h> +#include <cstddef> +#include <cstdlib> +#include <cstdint> +#include <cstring> #include <iostream> #include <sstream> @@ -98,6 +99,10 @@ #include <impl/Kokkos_Error.hpp> #include <Kokkos_Atomic.hpp> +#if ( defined( KOKKOS_ENABLE_ASM ) || defined ( KOKKOS_ENABLE_TM ) ) && defined ( KOKKOS_ENABLE_ISA_X86_64 ) +#include <immintrin.h> +#endif + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -490,16 +495,50 @@ void init_lock_array_host_space() { } bool lock_address_host_space(void* ptr) { +#if defined( KOKKOS_ENABLE_ISA_X86_64 ) && defined ( KOKKOS_ENABLE_TM ) + const unsigned status = _xbegin(); + + if( _XBEGIN_STARTED == status ) { + const int val = HOST_SPACE_ATOMIC_LOCKS[(( size_t(ptr) >> 2 ) & + HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK]; + + if( 0 == val ) { + HOST_SPACE_ATOMIC_LOCKS[(( size_t(ptr) >> 2 ) & + HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] = 1; + } else { + _xabort( 1 ); + } + + _xend(); + + return 1; + } else { +#endif return 0 == atomic_compare_exchange( &HOST_SPACE_ATOMIC_LOCKS[ (( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] , 0 , 1); +#if defined( KOKKOS_ENABLE_ISA_X86_64 ) && defined ( KOKKOS_ENABLE_TM ) + } +#endif } void unlock_address_host_space(void* ptr) { +#if defined( KOKKOS_ENABLE_ISA_X86_64 ) && defined ( KOKKOS_ENABLE_TM ) + const unsigned status = _xbegin(); + + if( _XBEGIN_STARTED == status ) { + HOST_SPACE_ATOMIC_LOCKS[(( size_t(ptr) >> 2 ) & + HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] = 0; + } else { +#endif atomic_exchange( &HOST_SPACE_ATOMIC_LOCKS[ (( size_t(ptr) >> 2 ) & HOST_SPACE_ATOMIC_MASK) ^ HOST_SPACE_ATOMIC_XOR_MASK] , 0); +#if defined( KOKKOS_ENABLE_ISA_X86_64 ) && defined ( KOKKOS_ENABLE_TM ) + } +#endif } } } + diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp index 6b5918eaefc2ee74e951b8caabdeb0d4e8c488c0..c050a16eaee005f618b252f458732dc24cad1f64 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -49,7 +49,6 @@ #include <Kokkos_Atomic.hpp> #include <Kokkos_ExecPolicy.hpp> #include <impl/Kokkos_FunctorAdapter.hpp> -#include <impl/Kokkos_Reducer.hpp> #include <impl/Kokkos_FunctorAnalysis.hpp> //---------------------------------------------------------------------------- @@ -507,8 +506,9 @@ public: const scratch_memory_space & thread_scratch(int) const { return m_scratch.set_team_thread_mode(0,m_data.m_team_size,m_data.m_team_rank); } - //---------------------------------------- + //-------------------------------------------------------------------------- // Team collectives + //-------------------------------------------------------------------------- KOKKOS_INLINE_FUNCTION void team_barrier() const noexcept #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) @@ -519,22 +519,6 @@ public: {} #endif - template< class Closure > - KOKKOS_INLINE_FUNCTION - void team_barrier( Closure const & f ) const noexcept - { - if ( m_data.team_rendezvous() ) { - - // All threads have entered 'team_rendezvous' - // only this thread returned from 'team_rendezvous' - // with a return value of 'true' - - f(); - - m_data.team_rendezvous_release(); - } - } - //-------------------------------------------------------------------------- template< typename T > @@ -613,8 +597,9 @@ public: if ( 0 != m_data.m_team_rank ) { // Non-root copies to their local buffer: - reducer.copy( (value_type*) m_data.team_reduce_local() - , reducer.data() ); + /*reducer.copy( (value_type*) m_data.team_reduce_local() + , reducer.data() );*/ + *((value_type*) m_data.team_reduce_local()) = reducer.reference(); } // Root does not overwrite shared memory until all threads arrive @@ -630,19 +615,19 @@ public: value_type * const src = (value_type*) m_data.team_member(i)->team_reduce_local(); - reducer.join( reducer.data() , src ); + reducer.join( reducer.reference(), *src); } // Copy result to root member's buffer: - reducer.copy( (value_type*) m_data.team_reduce() , reducer.data() ); - + // reducer.copy( (value_type*) m_data.team_reduce() , reducer.data() ); + *((value_type*) m_data.team_reduce()) = reducer.reference(); m_data.team_rendezvous_release(); // This thread released all other threads from 'team_rendezvous' // with a return value of 'false' } else { // Copy from root member's buffer: - reducer.copy( reducer.data() , (value_type*) m_data.team_reduce() ); + reducer.reference() = *((value_type*) m_data.team_reduce()); } } } @@ -652,7 +637,7 @@ public: //-------------------------------------------------------------------------- - template< typename ValueType , class JoinOp > + /*template< typename ValueType , class JoinOp > KOKKOS_INLINE_FUNCTION ValueType team_reduce( ValueType const & value @@ -696,7 +681,7 @@ public: } #else { Kokkos::abort("HostThreadTeamMember team_reduce\n"); return ValueType(); } -#endif +#endif*/ template< typename T > @@ -854,7 +839,7 @@ parallel_reduce , Reducer const & reducer ) { - reducer.init( reducer.data() ); + reducer.init( reducer.reference() ); for( iType i = loop_boundaries.start ; i < loop_boundaries.end @@ -875,9 +860,9 @@ parallel_reduce , ValueType & result ) { - Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > > reducer( & result ); + Kokkos::Experimental::Sum<ValueType> reducer( result ); - reducer.init( reducer.data() ); + reducer.init( result ); for( iType i = loop_boundaries.start ; i < loop_boundaries.end @@ -888,7 +873,7 @@ parallel_reduce loop_boundaries.thread.team_reduce( reducer ); } -template< typename iType, class Space +/*template< typename iType, class Space , class Closure, class Joiner , typename ValueType > KOKKOS_INLINE_FUNCTION void parallel_reduce @@ -910,7 +895,7 @@ void parallel_reduce } loop_boundaries.thread.team_reduce( reducer ); -} +}*/ //---------------------------------------------------------------------------- /** \brief Inter-thread vector parallel_reduce. @@ -923,15 +908,13 @@ void parallel_reduce */ template< typename iType, class Space , class Lambda, typename ValueType > KOKKOS_INLINE_FUNCTION -void parallel_reduce +typename std::enable_if< ! Kokkos::is_reducer<ValueType>::value >::type +parallel_reduce (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >& loop_boundaries, const Lambda & lambda, ValueType& result) { result = ValueType(); -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif for( iType i = loop_boundaries.start ; i < loop_boundaries.end ; i += loop_boundaries.increment) { @@ -939,6 +922,22 @@ void parallel_reduce } } +template< typename iType, class Space , class Lambda, typename ReducerType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< Kokkos::is_reducer< ReducerType >::value >::type +parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >& loop_boundaries, + const Lambda & lambda, + const ReducerType& reducer) +{ + reducer.init(reducer.reference()); + for( iType i = loop_boundaries.start ; + i < loop_boundaries.end ; + i += loop_boundaries.increment) { + lambda(i,reducer.reference()); + } +} + /** \brief Intra-thread vector parallel_reduce. * * Executes lambda(iType i, ValueType & val) for each i=[0..N) @@ -961,9 +960,6 @@ void parallel_reduce const JoinType & join, ValueType& result) { -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif for( iType i = loop_boundaries.start ; i < loop_boundaries.end ; i += loop_boundaries.increment ) { @@ -1055,9 +1051,8 @@ template< class Space , class FunctorType > KOKKOS_INLINE_FUNCTION void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember<Space> > & single , const FunctorType & functor ) { - if ( single.team_member.team_rank() == 0 ) functor(); // 'single' does not perform a barrier. - // single.team_member.team_barrier( functor ); + if ( single.team_member.team_rank() == 0 ) functor(); } template< class Space , class FunctorType , typename ValueType > diff --git a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp index 7489018ac641b70e97b6eba879d4c08aa0776fb9..7a887a9e29f55372a65c5c2653a14d2f0d4c5ec5 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp @@ -41,6 +41,7 @@ //@HEADER */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_MEMORY_FENCE_HPP ) #define KOKKOS_MEMORY_FENCE_HPP namespace Kokkos { @@ -108,4 +109,3 @@ void load_fence() #endif - diff --git a/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp b/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp index 5852efb011f357ace9df66c5d330f9e2a3f39dd1..15ce6964a0369980df0124021062b08caa5f9e39 100644 --- a/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp @@ -212,6 +212,12 @@ #endif #endif +#ifdef KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION +#ifndef KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION +#define KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION +#endif +#endif + #ifdef KOKKOS_HAVE_PRAGMA_LOOPCOUNT #ifndef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT #define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT KOKKOS_HAVE_PRAGMA_LOOPCOUNT @@ -423,6 +429,12 @@ #endif #endif +#if defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_WINTHREAD ) +#ifndef KOKKOS_ENABLE_THREADS +#define KOKKOS_ENABLE_THREADS +#endif +#endif + //------------------------------------------------------------------------------ // Deprecated macros //------------------------------------------------------------------------------ diff --git a/lib/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp b/lib/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp index 556c96d863a2b3d19a5f8c7941f3257dbca34f85..37503ec38d5277fd7b4c2227701ebbff182567a4 100644 --- a/lib/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,13 +44,11 @@ #ifndef KOKKOS_PHYSICAL_LAYOUT_HPP #define KOKKOS_PHYSICAL_LAYOUT_HPP - #include <Kokkos_View.hpp> + namespace Kokkos { namespace Impl { - - struct PhysicalLayout { enum LayoutType {Left,Right,Scalar,Error}; LayoutType layout_type; @@ -71,3 +69,4 @@ struct PhysicalLayout { } } #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp index 8ea1e816cdab4fc29679ee8df8800cf2a59f026e..20eaf99136e33baaf915a2b90ce2924e846d34ec 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp @@ -44,12 +44,14 @@ #ifndef KOKKOSP_DEVICE_INFO_HPP #define KOKKOSP_DEVICE_INFO_HPP +#include <cstdint> + namespace Kokkos { namespace Profiling { - struct KokkosPDeviceInfo { - uint32_t deviceID; - }; +struct KokkosPDeviceInfo { + uint32_t deviceID; +}; } } diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp index 0c006a8c008390e330f35d849f9b93facfeb1879..98482cfab6d8a4139c9162ab48c2993021c8e141 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp @@ -41,197 +41,203 @@ //@HEADER */ -#include <impl/Kokkos_Profiling_Interface.hpp> - +#include <Kokkos_Macros.hpp> #if defined(KOKKOS_ENABLE_PROFILING) -#include <string.h> + +#include <impl/Kokkos_Profiling_Interface.hpp> +#include <cstring> namespace Kokkos { - namespace Profiling { +namespace Profiling { - SpaceHandle::SpaceHandle(const char* space_name) { - strncpy(name,space_name,64); - } +SpaceHandle::SpaceHandle(const char* space_name) { + strncpy(name,space_name,64); +} - bool profileLibraryLoaded() { - return (NULL != initProfileLibrary); - } +bool profileLibraryLoaded() { + return (NULL != initProfileLibrary); +} - void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { - if(NULL != beginForCallee) { - Kokkos::fence(); - (*beginForCallee)(kernelPrefix.c_str(), devID, kernelID); - } - } +void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { + if(NULL != beginForCallee) { + Kokkos::fence(); + (*beginForCallee)(kernelPrefix.c_str(), devID, kernelID); + } +} - void endParallelFor(const uint64_t kernelID) { - if(NULL != endForCallee) { - Kokkos::fence(); - (*endForCallee)(kernelID); - } - } +void endParallelFor(const uint64_t kernelID) { + if(NULL != endForCallee) { + Kokkos::fence(); + (*endForCallee)(kernelID); + } +} - void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { - if(NULL != beginScanCallee) { - Kokkos::fence(); - (*beginScanCallee)(kernelPrefix.c_str(), devID, kernelID); - } - } +void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { + if(NULL != beginScanCallee) { + Kokkos::fence(); + (*beginScanCallee)(kernelPrefix.c_str(), devID, kernelID); + } +} - void endParallelScan(const uint64_t kernelID) { - if(NULL != endScanCallee) { - Kokkos::fence(); - (*endScanCallee)(kernelID); - } - } +void endParallelScan(const uint64_t kernelID) { + if(NULL != endScanCallee) { + Kokkos::fence(); + (*endScanCallee)(kernelID); + } +} - void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { - if(NULL != beginReduceCallee) { - Kokkos::fence(); - (*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID); - } - } +void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { + if(NULL != beginReduceCallee) { + Kokkos::fence(); + (*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID); + } +} - void endParallelReduce(const uint64_t kernelID) { - if(NULL != endReduceCallee) { - Kokkos::fence(); - (*endReduceCallee)(kernelID); - } - } +void endParallelReduce(const uint64_t kernelID) { + if(NULL != endReduceCallee) { + Kokkos::fence(); + (*endReduceCallee)(kernelID); + } +} - void pushRegion(const std::string& kName) { - if( NULL != pushRegionCallee ) { - Kokkos::fence(); - (*pushRegionCallee)(kName.c_str()); - } - } +void pushRegion(const std::string& kName) { + if( NULL != pushRegionCallee ) { + Kokkos::fence(); + (*pushRegionCallee)(kName.c_str()); + } +} - void popRegion() { - if( NULL != popRegionCallee ) { - Kokkos::fence(); - (*popRegionCallee)(); - } - } +void popRegion() { + if( NULL != popRegionCallee ) { + Kokkos::fence(); + (*popRegionCallee)(); + } +} - void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) { - if(NULL != allocateDataCallee) { - (*allocateDataCallee)(space,label.c_str(),ptr,size); - } - } +void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) { + if(NULL != allocateDataCallee) { + (*allocateDataCallee)(space,label.c_str(),ptr,size); + } +} - void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) { - if(NULL != allocateDataCallee) { - (*deallocateDataCallee)(space,label.c_str(),ptr,size); - } - } +void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) { + if(NULL != allocateDataCallee) { + (*deallocateDataCallee)(space,label.c_str(),ptr,size); + } +} - void initialize() { - - // Make sure initialize calls happens only once - static int is_initialized = 0; - if(is_initialized) return; - is_initialized = 1; - - void* firstProfileLibrary; - - char* envProfileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); - - // If we do not find a profiling library in the environment then exit - // early. - if( NULL == envProfileLibrary ) { - return ; - } - - char* envProfileCopy = (char*) malloc(sizeof(char) * (strlen(envProfileLibrary) + 1)); - sprintf(envProfileCopy, "%s", envProfileLibrary); - - char* profileLibraryName = strtok(envProfileCopy, ";"); - - if( (NULL != profileLibraryName) && (strcmp(profileLibraryName, "") != 0) ) { - firstProfileLibrary = dlopen(profileLibraryName, RTLD_NOW | RTLD_GLOBAL); - - if(NULL == firstProfileLibrary) { - std::cerr << "Error: Unable to load KokkosP library: " << - profileLibraryName << std::endl; - } else { - std::cout << "KokkosP: Library Loaded: " << profileLibraryName << std::endl; - - // dlsym returns a pointer to an object, while we want to assign to pointer to function - // A direct cast will give warnings hence, we have to workaround the issue by casting pointer to pointers. - auto p1 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_for"); - beginForCallee = *((beginFunction*) &p1); - auto p2 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_scan"); - beginScanCallee = *((beginFunction*) &p2); - auto p3 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_reduce"); - beginReduceCallee = *((beginFunction*) &p3); - - auto p4 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_scan"); - endScanCallee = *((endFunction*) &p4); - auto p5 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_for"); - endForCallee = *((endFunction*) &p5); - auto p6 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_reduce"); - endReduceCallee = *((endFunction*) &p6); - - auto p7 = dlsym(firstProfileLibrary, "kokkosp_init_library"); - initProfileLibrary = *((initFunction*) &p7); - auto p8 = dlsym(firstProfileLibrary, "kokkosp_finalize_library"); - finalizeProfileLibrary = *((finalizeFunction*) &p8); - - auto p9 = dlsym(firstProfileLibrary, "kokkosp_push_profile_region"); - pushRegionCallee = *((pushFunction*) &p9); - auto p10 = dlsym(firstProfileLibrary, "kokkosp_pop_profile_region"); - popRegionCallee = *((popFunction*) &p10); - - auto p11 = dlsym(firstProfileLibrary, "kokkosp_allocate_data"); - allocateDataCallee = *((allocateDataFunction*) &p11); - auto p12 = dlsym(firstProfileLibrary, "kokkosp_deallocate_data"); - deallocateDataCallee = *((deallocateDataFunction*) &p12); - - } - } - - if(NULL != initProfileLibrary) { - (*initProfileLibrary)(0, - (uint64_t) KOKKOSP_INTERFACE_VERSION, - (uint32_t) 0, - NULL); - } - - free(envProfileCopy); - } +void initialize() { + + // Make sure initialize calls happens only once + static int is_initialized = 0; + if(is_initialized) return; + is_initialized = 1; - void finalize() { - // Make sure finalize calls happens only once - static int is_finalized = 0; - if(is_finalized) return; - is_finalized = 1; + void* firstProfileLibrary; - if(NULL != finalizeProfileLibrary) { - (*finalizeProfileLibrary)(); + char* envProfileLibrary = getenv("KOKKOS_PROFILE_LIBRARY"); - // Set all profile hooks to NULL to prevent - // any additional calls. Once we are told to - // finalize, we mean it - initProfileLibrary = NULL; - finalizeProfileLibrary = NULL; + // If we do not find a profiling library in the environment then exit + // early. + if( NULL == envProfileLibrary ) { + return ; + } - beginForCallee = NULL; - beginScanCallee = NULL; - beginReduceCallee = NULL; - endScanCallee = NULL; - endForCallee = NULL; - endReduceCallee = NULL; + char* envProfileCopy = (char*) malloc(sizeof(char) * (strlen(envProfileLibrary) + 1)); + sprintf(envProfileCopy, "%s", envProfileLibrary); - pushRegionCallee = NULL; - popRegionCallee = NULL; + char* profileLibraryName = strtok(envProfileCopy, ";"); - allocateDataCallee = NULL; - deallocateDataCallee = NULL; + if( (NULL != profileLibraryName) && (strcmp(profileLibraryName, "") != 0) ) { + firstProfileLibrary = dlopen(profileLibraryName, RTLD_NOW | RTLD_GLOBAL); + + if(NULL == firstProfileLibrary) { + std::cerr << "Error: Unable to load KokkosP library: " << + profileLibraryName << std::endl; + } else { +#ifdef KOKKOS_ENABLE_PROFILING_LOAD_PRINT + std::cout << "KokkosP: Library Loaded: " << profileLibraryName << std::endl; +#endif + + // dlsym returns a pointer to an object, while we want to assign to pointer to function + // A direct cast will give warnings hence, we have to workaround the issue by casting pointer to pointers. + auto p1 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_for"); + beginForCallee = *((beginFunction*) &p1); + auto p2 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_scan"); + beginScanCallee = *((beginFunction*) &p2); + auto p3 = dlsym(firstProfileLibrary, "kokkosp_begin_parallel_reduce"); + beginReduceCallee = *((beginFunction*) &p3); + + auto p4 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_scan"); + endScanCallee = *((endFunction*) &p4); + auto p5 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_for"); + endForCallee = *((endFunction*) &p5); + auto p6 = dlsym(firstProfileLibrary, "kokkosp_end_parallel_reduce"); + endReduceCallee = *((endFunction*) &p6); + + auto p7 = dlsym(firstProfileLibrary, "kokkosp_init_library"); + initProfileLibrary = *((initFunction*) &p7); + auto p8 = dlsym(firstProfileLibrary, "kokkosp_finalize_library"); + finalizeProfileLibrary = *((finalizeFunction*) &p8); + + auto p9 = dlsym(firstProfileLibrary, "kokkosp_push_profile_region"); + pushRegionCallee = *((pushFunction*) &p9); + auto p10 = dlsym(firstProfileLibrary, "kokkosp_pop_profile_region"); + popRegionCallee = *((popFunction*) &p10); + + auto p11 = dlsym(firstProfileLibrary, "kokkosp_allocate_data"); + allocateDataCallee = *((allocateDataFunction*) &p11); + auto p12 = dlsym(firstProfileLibrary, "kokkosp_deallocate_data"); + deallocateDataCallee = *((deallocateDataFunction*) &p12); - } } } + + if(NULL != initProfileLibrary) { + (*initProfileLibrary)(0, + (uint64_t) KOKKOSP_INTERFACE_VERSION, + (uint32_t) 0, + NULL); + } + + free(envProfileCopy); +} + +void finalize() { + // Make sure finalize calls happens only once + static int is_finalized = 0; + if(is_finalized) return; + is_finalized = 1; + + if(NULL != finalizeProfileLibrary) { + (*finalizeProfileLibrary)(); + + // Set all profile hooks to NULL to prevent + // any additional calls. Once we are told to + // finalize, we mean it + initProfileLibrary = NULL; + finalizeProfileLibrary = NULL; + + beginForCallee = NULL; + beginScanCallee = NULL; + beginReduceCallee = NULL; + endScanCallee = NULL; + endForCallee = NULL; + endReduceCallee = NULL; + + pushRegionCallee = NULL; + popRegionCallee = NULL; + + allocateDataCallee = NULL; + deallocateDataCallee = NULL; + + } +} +} } +#else +void KOKKOS_CORE_SRC_IMPL_PROFILING_INTERFACE_PREVENT_LINK_ERROR() {} #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp index 139a20d8f9ea99b88d21436726fa9c55fe063622..f76e5dfa04826f057345fd919f9861c78916a7df 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -44,108 +44,108 @@ #ifndef KOKKOSP_INTERFACE_HPP #define KOKKOSP_INTERFACE_HPP +#include <Kokkos_Macros.hpp> +#if defined(KOKKOS_ENABLE_PROFILING) + #include <cstddef> #include <Kokkos_Core_fwd.hpp> -#include <Kokkos_Macros.hpp> #include <string> #include <cinttypes> -#if defined(KOKKOS_ENABLE_PROFILING) #include <impl/Kokkos_Profiling_DeviceInfo.hpp> #include <dlfcn.h> #include <iostream> -#include <stdlib.h> -#endif +#include <cstdlib> #define KOKKOSP_INTERFACE_VERSION 20150628 -#if defined(KOKKOS_ENABLE_PROFILING) namespace Kokkos { - namespace Profiling { +namespace Profiling { - struct SpaceHandle { - SpaceHandle(const char* space_name); - char name[64]; - }; +struct SpaceHandle { + SpaceHandle(const char* space_name); + char name[64]; +}; - typedef void (*initFunction)(const int, - const uint64_t, - const uint32_t, - KokkosPDeviceInfo*); - typedef void (*finalizeFunction)(); - typedef void (*beginFunction)(const char*, const uint32_t, uint64_t*); - typedef void (*endFunction)(uint64_t); +typedef void (*initFunction)(const int, + const uint64_t, + const uint32_t, + KokkosPDeviceInfo*); +typedef void (*finalizeFunction)(); +typedef void (*beginFunction)(const char*, const uint32_t, uint64_t*); +typedef void (*endFunction)(uint64_t); - typedef void (*pushFunction)(const char*); - typedef void (*popFunction)(); +typedef void (*pushFunction)(const char*); +typedef void (*popFunction)(); - typedef void (*allocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t); - typedef void (*deallocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t); +typedef void (*allocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t); +typedef void (*deallocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t); - static initFunction initProfileLibrary = NULL; - static finalizeFunction finalizeProfileLibrary = NULL; +static initFunction initProfileLibrary = NULL; +static finalizeFunction finalizeProfileLibrary = NULL; - static beginFunction beginForCallee = NULL; - static beginFunction beginScanCallee = NULL; - static beginFunction beginReduceCallee = NULL; - static endFunction endForCallee = NULL; - static endFunction endScanCallee = NULL; - static endFunction endReduceCallee = NULL; +static beginFunction beginForCallee = NULL; +static beginFunction beginScanCallee = NULL; +static beginFunction beginReduceCallee = NULL; +static endFunction endForCallee = NULL; +static endFunction endScanCallee = NULL; +static endFunction endReduceCallee = NULL; - static pushFunction pushRegionCallee = NULL; - static popFunction popRegionCallee = NULL; +static pushFunction pushRegionCallee = NULL; +static popFunction popRegionCallee = NULL; - static allocateDataFunction allocateDataCallee = NULL; - static deallocateDataFunction deallocateDataCallee = NULL; +static allocateDataFunction allocateDataCallee = NULL; +static deallocateDataFunction deallocateDataCallee = NULL; - bool profileLibraryLoaded(); +bool profileLibraryLoaded(); - void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); - void endParallelFor(const uint64_t kernelID); - void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); - void endParallelScan(const uint64_t kernelID); - void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); - void endParallelReduce(const uint64_t kernelID); +void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); +void endParallelFor(const uint64_t kernelID); +void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); +void endParallelScan(const uint64_t kernelID); +void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID); +void endParallelReduce(const uint64_t kernelID); - void pushRegion(const std::string& kName); - void popRegion(); +void pushRegion(const std::string& kName); +void popRegion(); - void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size); - void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size); +void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size); +void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size); - void initialize(); - void finalize(); +void initialize(); +void finalize(); - //Define finalize_fake inline to get rid of warnings for unused static variables - inline void finalize_fake() { - if(NULL != finalizeProfileLibrary) { - (*finalizeProfileLibrary)(); +//Define finalize_fake inline to get rid of warnings for unused static variables +inline void finalize_fake() { + if(NULL != finalizeProfileLibrary) { + (*finalizeProfileLibrary)(); - // Set all profile hooks to NULL to prevent - // any additional calls. Once we are told to - // finalize, we mean it - beginForCallee = NULL; - beginScanCallee = NULL; - beginReduceCallee = NULL; - endScanCallee = NULL; - endForCallee = NULL; - endReduceCallee = NULL; + // Set all profile hooks to NULL to prevent + // any additional calls. Once we are told to + // finalize, we mean it + beginForCallee = NULL; + beginScanCallee = NULL; + beginReduceCallee = NULL; + endScanCallee = NULL; + endForCallee = NULL; + endReduceCallee = NULL; - allocateDataCallee = NULL; - deallocateDataCallee = NULL; + allocateDataCallee = NULL; + deallocateDataCallee = NULL; - initProfileLibrary = NULL; - finalizeProfileLibrary = NULL; - pushRegionCallee = NULL; - popRegionCallee = NULL; - } - } + initProfileLibrary = NULL; + finalizeProfileLibrary = NULL; + pushRegionCallee = NULL; + popRegionCallee = NULL; + } +} - } +} } #endif #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_Reducer.hpp b/lib/kokkos/core/src/impl/Kokkos_Reducer.hpp deleted file mode 100644 index b3ed5f151439c659305773f1cd997376300ccf3e..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Reducer.hpp +++ /dev/null @@ -1,317 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_IMPL_REDUCER_HPP -#define KOKKOS_IMPL_REDUCER_HPP - -#include <impl/Kokkos_Traits.hpp> - -//---------------------------------------------------------------------------- -/* Reducer abstraction: - * 1) Provides 'join' operation - * 2) Provides 'init' operation - * 3) Provides 'copy' operation - * 4) Optionally provides result value in a memory space - * - * Created from: - * 1) Functor::operator()( destination , source ) - * 2) Functor::{ join , init ) - */ -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< typename value_type > -struct ReduceSum -{ - KOKKOS_INLINE_FUNCTION static - void copy( value_type & dest - , value_type const & src ) noexcept - { dest = src ; } - - KOKKOS_INLINE_FUNCTION static - void init( value_type & dest ) noexcept - { new( &dest ) value_type(); } - - KOKKOS_INLINE_FUNCTION static - void join( value_type volatile & dest - , value_type const volatile & src ) noexcept - { dest += src ; } - - KOKKOS_INLINE_FUNCTION static - void join( value_type & dest - , value_type const & src ) noexcept - { dest += src ; } -}; - -template< typename T - , class ReduceOp = ReduceSum< T > - , typename MemorySpace = void > -struct Reducer - : private ReduceOp - , private integral_nonzero_constant - < int , ( std::rank<T>::value == 1 ? std::extent<T>::value : 1 )> -{ -private: - - // Determine if T is simple array - - enum : int { rank = std::rank<T>::value }; - - static_assert( rank <= 1 , "Kokkos::Impl::Reducer type is at most rank-one" ); - - using length_t = - integral_nonzero_constant<int,( rank == 1 ? std::extent<T>::value : 1 )> ; - -public: - - using reducer = Reducer ; - using memory_space = MemorySpace ; - using value_type = typename std::remove_extent<T>::type ; - using reference_type = - typename std::conditional< ( rank != 0 ) - , value_type * - , value_type & - >::type ; -private: - - //-------------------------------------------------------------------------- - // Determine what functions 'ReduceOp' provides: - // copy( destination , source ) - // init( destination ) - // - // operator()( destination , source ) - // join( destination , source ) - // - // Provide defaults for missing optional operations - - template< class R , typename = void> - struct COPY { - KOKKOS_INLINE_FUNCTION static - void copy( R const & - , value_type * dst - , value_type const * src ) { *dst = *src ; } - }; - - template< class R > - struct COPY< R , decltype( ((R*)0)->copy( *((value_type*)0) - , *((value_type const *)0) ) ) > - { - KOKKOS_INLINE_FUNCTION static - void copy( R const & r - , value_type * dst - , value_type const * src ) { r.copy( *dst , *src ); } - }; - - template< class R , typename = void > - struct INIT { - KOKKOS_INLINE_FUNCTION static - void init( R const & , value_type * dst ) { new(dst) value_type(); } - }; - - template< class R > - struct INIT< R , decltype( ((R*)0)->init( *((value_type*)0 ) ) ) > - { - KOKKOS_INLINE_FUNCTION static - void init( R const & r , value_type * dst ) { r.init( *dst ); } - }; - - template< class R , typename V , typename = void > struct JOIN - { - // If no join function then try operator() - KOKKOS_INLINE_FUNCTION static - void join( R const & r , V * dst , V const * src ) - { r.operator()(*dst,*src); } - }; - - template< class R , typename V > - struct JOIN< R , V , decltype( ((R*)0)->join ( *((V *)0) , *((V const *)0) ) ) > - { - // If has join function use it - KOKKOS_INLINE_FUNCTION static - void join( R const & r , V * dst , V const * src ) - { r.join(*dst,*src); } - }; - - //-------------------------------------------------------------------------- - - value_type * const m_result ; - - template< int Rank > - KOKKOS_INLINE_FUNCTION - static constexpr - typename std::enable_if< ( 0 != Rank ) , reference_type >::type - ref( value_type * p ) noexcept { return p ; } - - template< int Rank > - KOKKOS_INLINE_FUNCTION - static constexpr - typename std::enable_if< ( 0 == Rank ) , reference_type >::type - ref( value_type * p ) noexcept { return *p ; } - -public: - - //-------------------------------------------------------------------------- - - KOKKOS_INLINE_FUNCTION - constexpr int length() const noexcept - { return length_t::value ; } - - KOKKOS_INLINE_FUNCTION - value_type * data() const noexcept - { return m_result ; } - - KOKKOS_INLINE_FUNCTION - reference_type reference() const noexcept - { return Reducer::template ref< rank >( m_result ); } - - //-------------------------------------------------------------------------- - - KOKKOS_INLINE_FUNCTION - void copy( value_type * const dest - , value_type const * const src ) const noexcept - { - for ( int i = 0 ; i < length() ; ++i ) { - Reducer::template COPY<ReduceOp>::copy( (ReduceOp &) *this , dest + i , src + i ); - } - } - - KOKKOS_INLINE_FUNCTION - void init( value_type * dest ) const noexcept - { - for ( int i = 0 ; i < length() ; ++i ) { - Reducer::template INIT<ReduceOp>::init( (ReduceOp &) *this , dest + i ); - } - } - - KOKKOS_INLINE_FUNCTION - void join( value_type * const dest - , value_type const * const src ) const noexcept - { - for ( int i = 0 ; i < length() ; ++i ) { - Reducer::template JOIN<ReduceOp,value_type>::join( (ReduceOp &) *this , dest + i , src + i ); - } - } - - KOKKOS_INLINE_FUNCTION - void join( value_type volatile * const dest - , value_type volatile const * const src ) const noexcept - { - for ( int i = 0 ; i < length() ; ++i ) { - Reducer::template JOIN<ReduceOp,value_type volatile>::join( (ReduceOp &) *this , dest + i , src + i ); - } - } - - //-------------------------------------------------------------------------- - - template< typename ArgT > - KOKKOS_INLINE_FUNCTION explicit - constexpr Reducer - ( ArgT * arg_value - , typename std::enable_if - < std::is_same<ArgT,value_type>::value && - std::is_default_constructible< ReduceOp >::value - , int >::type arg_length = 1 - ) noexcept - : ReduceOp(), length_t( arg_length ), m_result( arg_value ) {} - - KOKKOS_INLINE_FUNCTION explicit - constexpr Reducer( ReduceOp const & arg_op - , value_type * arg_value = 0 - , int arg_length = 1 ) noexcept - : ReduceOp( arg_op ), length_t( arg_length ), m_result( arg_value ) {} - - KOKKOS_INLINE_FUNCTION explicit - constexpr Reducer( ReduceOp && arg_op - , value_type * arg_value = 0 - , int arg_length = 1 ) noexcept - : ReduceOp( arg_op ), length_t( arg_length ), m_result( arg_value ) {} - - Reducer( Reducer const & ) = default ; - Reducer( Reducer && ) = default ; - Reducer & operator = ( Reducer const & ) = default ; - Reducer & operator = ( Reducer && ) = default ; -}; - -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { - -template< typename ValueType > -constexpr -Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > > -Sum( ValueType & arg_value ) -{ - static_assert( std::is_trivial<ValueType>::value - , "Kokkos reducer requires trivial value type" ); - return Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > >( & arg_value ); -} - -template< typename ValueType > -constexpr -Impl::Reducer< ValueType[] , Impl::ReduceSum< ValueType > > -Sum( ValueType * arg_value , int arg_length ) -{ - static_assert( std::is_trivial<ValueType>::value - , "Kokkos reducer requires trivial value type" ); - return Impl::Reducer< ValueType[] , Impl::ReduceSum< ValueType > >( arg_value , arg_length ); -} - -//---------------------------------------------------------------------------- - -template< typename ValueType , class JoinType > -Impl::Reducer< ValueType , JoinType > -reducer( ValueType & value , JoinType const & lambda ) -{ - return Impl::Reducer< ValueType , JoinType >( lambda , & value ); -} - -} // namespace Kokkos - -#endif /* #ifndef KOKKOS_IMPL_REDUCER_HPP */ - diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp index 79496133061145aee8786aecb21aa86117b1dbc4..755271c07e4e473448b0800d4395582a85ca49fa 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp @@ -41,13 +41,15 @@ //@HEADER */ -#include <stdlib.h> +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_SERIAL ) + +#include <cstdlib> #include <sstream> #include <Kokkos_Serial.hpp> #include <impl/Kokkos_Traits.hpp> #include <impl/Kokkos_Error.hpp> -#if defined( KOKKOS_ENABLE_SERIAL ) /*--------------------------------------------------------------------------*/ @@ -175,8 +177,11 @@ void Serial::finalize() #endif } +const char* Serial::name() { return "Serial"; } + } // namespace Kokkos +#else +void KOKKOS_CORE_SRC_IMPL_SERIAL_PREVENT_LINK_ERROR() {} #endif // defined( KOKKOS_ENABLE_SERIAL ) - diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp index d22d604fbc2f02e2f18c6c24d69840e7f33e7e98..76297161b175c705f986e70708ec56279eb28f02 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,15 +36,16 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include <Kokkos_Core.hpp> - +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) +#include <Kokkos_Core.hpp> + #include <impl/Kokkos_Serial_Task.hpp> #include <impl/Kokkos_TaskQueue_impl.hpp> @@ -66,6 +67,13 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + // Set default buffers + serial_resize_thread_team_data( 0 /* global reduce buffer */ + , 512 /* team reduce buffer */ + , 0 /* team shared buffer */ + , 0 /* thread local buffer */ + ); + Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); Member exec( *data ); @@ -126,7 +134,7 @@ void TaskQueueSpecialization< Kokkos::Serial > :: // Loop until no runnable task task_root_type * task = end ; - + do { task = end ; @@ -148,5 +156,7 @@ void TaskQueueSpecialization< Kokkos::Serial > :: }} /* namespace Kokkos::Impl */ +#else +void KOKKOS_CORE_SRC_IMPL_SERIAL_TASK_PREVENT_LINK_ERROR() {} #endif /* #if defined( KOKKOS_ENABLE_SERIAL ) && defined( KOKKOS_ENABLE_TASKDAG ) */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp index ac7f17c0ea9e314137560626e0b0467faf5ff90d..2eb2b5cf529958b2fd10a8b0677e0cb40da40676 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp @@ -44,6 +44,7 @@ #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP #define KOKKOS_IMPL_SERIAL_TASK_HPP +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_TASKDAG ) #include <impl/Kokkos_TaskQueue.hpp> diff --git a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp index 1ae51742e0a2cbe437abf17c7dedb8658c3e3e94..e28c1194a7984b73da38fd7fc952e63e0c7574c9 100644 --- a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -93,9 +93,9 @@ is_sane( SharedAllocationRecord< void , void > * arg_record ) ok = ok_root && ok_prev_next && ok_next_prev && ok_count ; if ( ! ok ) { - //Formatting dependent on sizeof(uintptr_t) + //Formatting dependent on sizeof(uintptr_t) const char * format_string; - + if (sizeof(uintptr_t) == sizeof(unsigned long)) { format_string = "Kokkos::Impl::SharedAllocationRecord failed is_sane: rec(0x%.12lx){ m_count(%d) m_root(0x%.12lx) m_next(0x%.12lx) m_prev(0x%.12lx) m_next->m_prev(0x%.12lx) m_prev->m_next(0x%.12lx) }\n"; } @@ -104,7 +104,7 @@ if ( ! ok ) { } fprintf(stderr - , format_string + , format_string , reinterpret_cast< uintptr_t >( rec ) , rec->use_count() , reinterpret_cast< uintptr_t >( rec->m_root ) @@ -122,7 +122,7 @@ if ( ! ok ) { } } - return ok ; + return ok ; } SharedAllocationRecord<void,void> * @@ -245,9 +245,11 @@ decrement( SharedAllocationRecord< void , void > * arg_record ) else { // before: arg_record->m_root == arg_record->m_prev // after: arg_record->m_root == arg_record->m_next - root_next = arg_record->m_next ; + root_next = arg_record->m_next ; } + Kokkos::memory_fence(); + // Unlock the list: if ( zero != Kokkos::atomic_exchange( & arg_record->m_root->m_next , root_next ) ) { Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord failed decrement unlocking"); @@ -282,7 +284,7 @@ print_host_accessible_records( std::ostream & s if ( detail ) { do { - //Formatting dependent on sizeof(uintptr_t) + //Formatting dependent on sizeof(uintptr_t) const char * format_string; if (sizeof(uintptr_t) == sizeof(unsigned long)) { @@ -311,13 +313,13 @@ print_host_accessible_records( std::ostream & s else { do { if ( r->m_alloc_ptr ) { - //Formatting dependent on sizeof(uintptr_t) + //Formatting dependent on sizeof(uintptr_t) const char * format_string; - if (sizeof(uintptr_t) == sizeof(unsigned long)) { + if (sizeof(uintptr_t) == sizeof(unsigned long)) { format_string = "%s [ 0x%.12lx + %ld ] %s\n"; } - else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { format_string = "%s [ 0x%.12llx + %ld ] %s\n"; } @@ -341,4 +343,3 @@ print_host_accessible_records( std::ostream & s } /* namespace Impl */ } /* namespace Kokkos */ - diff --git a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp index 24061d574badd856c9128c9e1761e67465852c85..4dc61bb02e027d82fd94022e636778c2a17e24ed 100644 --- a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,15 +36,15 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#ifndef KOKKOS_SHARED_ALLOC_HPP_ -#define KOKKOS_SHARED_ALLOC_HPP_ +#ifndef KOKKOS_SHARED_ALLOC_HPP +#define KOKKOS_SHARED_ALLOC_HPP -#include <stdint.h> +#include <cstdint> #include <string> namespace Kokkos { @@ -112,12 +112,12 @@ public: static int tracking_enabled() { return s_tracking_enabled ; } - /**\brief A host process thread claims and disables the + /**\brief A host process thread claims and disables the * shared allocation tracking flag. */ static void tracking_claim_and_disable(); - /**\brief A host process thread releases and enables the + /**\brief A host process thread releases and enables the * shared allocation tracking flag. */ static void tracking_release_and_enable(); @@ -225,7 +225,7 @@ public: // inserts the record into the tracking list. Decrementing the count from one to zero // removes from the trakcing list and deallocates. KOKKOS_INLINE_FUNCTION static - SharedAllocationRecord * allocate( const MemorySpace & arg_space + SharedAllocationRecord * allocate( const MemorySpace & arg_space , const std::string & arg_label , const size_t arg_alloc ) @@ -400,3 +400,4 @@ public: } /* namespace Kokkos */ #endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp b/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp index 25e2ec9dc1849db862d9cb0d01bfd817c584b3b8..7bbe0fea9595f8fb3dbd9eb7c1cb9c8533a2b791 100644 --- a/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -76,4 +76,3 @@ struct StaticAssertAssignable< const A , A > { typedef const A type ; }; #endif /* KOKKOS_STATICASSERT_HPP */ - diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp index b514df351725ac55e88ea1c2e92eec4b1711e6b4..bee98e6745e0be66c3afe6b7d308abd08b42bef9 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp @@ -46,6 +46,7 @@ #ifndef KOKKOS_IMPL_TASKQUEUE_HPP #define KOKKOS_IMPL_TASKQUEUE_HPP +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_TASKDAG ) #include <string> @@ -104,7 +105,7 @@ private: using specialization = TaskQueueSpecialization< execution_space > ; using memory_space = typename specialization::memory_space ; using device_type = Kokkos::Device< execution_space , memory_space > ; - using memory_pool = Kokkos::Experimental::MemoryPool< device_type > ; + using memory_pool = Kokkos::MemoryPool< device_type > ; using task_root_type = Kokkos::Impl::TaskBase<execution_space,void,void> ; struct Destroy { @@ -134,11 +135,7 @@ private: TaskQueue & operator = ( TaskQueue && ) = delete ; TaskQueue & operator = ( TaskQueue const & ) = delete ; - TaskQueue - ( const memory_space & arg_space - , unsigned const arg_memory_pool_capacity - , unsigned const arg_memory_pool_superblock_capacity_log2 - ); + TaskQueue( const memory_pool & arg_memory_pool ); // Schedule a task // Precondition: @@ -487,7 +484,7 @@ public: , int arg_task_type , int arg_priority ) - : root_type( arg_apply + : root_type( arg_apply , arg_queue , arg_dependence , arg_ref_count @@ -589,7 +586,7 @@ public: , int arg_priority , FunctorType && arg_functor ) - : base_type( arg_apply + : base_type( arg_apply , arg_queue , arg_dependence , arg_ref_count @@ -612,3 +609,4 @@ public: #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_TASKQUEUE_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp index 23f5d3cd30dbbf87c024af935356961c1642a022..aee381afad69cfbdf9e2590b601bc188484d2215 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp @@ -41,6 +41,7 @@ //@HEADER */ +#include <Kokkos_Macros.hpp> #if defined( KOKKOS_ENABLE_TASKDAG ) namespace Kokkos { @@ -58,13 +59,8 @@ void TaskQueue< ExecSpace >::Destroy::destroy_shared_allocation() template< typename ExecSpace > TaskQueue< ExecSpace >::TaskQueue - ( const TaskQueue< ExecSpace >::memory_space & arg_space - , unsigned const arg_memory_pool_capacity - , unsigned const arg_memory_pool_superblock_capacity_log2 - ) - : m_memory( arg_space - , arg_memory_pool_capacity - , arg_memory_pool_superblock_capacity_log2 ) + ( typename TaskQueue< ExecSpace >::memory_pool const & arg_memory_pool ) + : m_memory( arg_memory_pool ) , m_ready() , m_accum_alloc(0) , m_count_alloc(0) @@ -379,7 +375,7 @@ void TaskQueue< ExecSpace >::schedule_runnable // task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero ); task_root_type * dep = task->m_next ; task->m_next = zero ; - const bool is_ready = + const bool is_ready = ( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) ); if ( ( 0 != dep ) && respawn ) { @@ -659,3 +655,4 @@ void TaskQueue< ExecSpace >::complete } /* namespace Kokkos */ #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp index 7f1400156fe3706f8327720cddfae6b0ff012ece..630041757606ba85ed6dd20ab0ec33172e74794b 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Traits.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Traits.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,8 +44,8 @@ #ifndef KOKKOSTRAITS_HPP #define KOKKOSTRAITS_HPP -#include <stddef.h> -#include <stdint.h> +#include <cstddef> +#include <cstdint> #include <Kokkos_Macros.hpp> #include <string> #include <type_traits> @@ -93,7 +93,7 @@ public: template< typename DefaultType , template< typename > class Condition , typename ... Pack > -struct has_condition +struct has_condition { enum { value = false }; typedef DefaultType type ; @@ -355,7 +355,7 @@ struct is_integral : public integral_constant< bool , std::is_same< T , uint8_t >::value || std::is_same< T , uint16_t >::value || std::is_same< T , uint32_t >::value || - std::is_same< T , uint64_t >::value + std::is_same< T , uint64_t >::value )> {}; //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp b/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp index d72cde03fd2bb1ae40559c80d007f7a8836636c0..580d152dc79a1db00d17bcf2e31e97c5bb2c0907 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp @@ -45,7 +45,7 @@ #define KOKKOS_CORE_IMPL_UTILITIES_HPP #include <Kokkos_Macros.hpp> -#include <stdint.h> +#include <cstdint> #include <type_traits> //---------------------------------------------------------------------------- @@ -413,3 +413,4 @@ struct inclusive_scan_integer_sequence #endif //KOKKOS_CORE_IMPL_UTILITIES_HPP + diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index f76ddfb3a796dd9ef77e85edbe4c813f40e18454..900bd88f1c164891e8f14310da62fb6bbe9ef683 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -54,6 +54,9 @@ #include <impl/Kokkos_Traits.hpp> #include <impl/Kokkos_ViewCtor.hpp> #include <impl/Kokkos_Atomic_View.hpp> +#if defined(KOKKOS_ENABLE_PROFILING) +#include <impl/Kokkos_Profiling_Interface.hpp> +#endif //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -2478,10 +2481,21 @@ struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ > { destroy = arg ; if ( ! space.in_parallel() ) { +#if defined(KOKKOS_ENABLE_PROFILING) + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelFor("Kokkos::View::initialization", 0, &kpID); + } +#endif const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType > closure( *this , PolicyType( 0 , n ) ); closure.execute(); space.fence(); +#if defined(KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } +#endif } else { for ( size_t i = 0 ; i < n ; ++i ) operator()(i); @@ -2524,10 +2538,21 @@ struct ViewValueFunctor< ExecSpace , ValueType , true /* is_scalar */ > void construct_shared_allocation() { if ( ! space.in_parallel() ) { +#if defined(KOKKOS_ENABLE_PROFILING) + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::beginParallelFor("Kokkos::View::initialization", 0, &kpID); + } +#endif const Kokkos::Impl::ParallelFor< ViewValueFunctor , PolicyType > closure( *this , PolicyType( 0 , n ) ); closure.execute(); space.fence(); +#if defined(KOKKOS_ENABLE_PROFILING) + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } +#endif } else { for ( size_t i = 0 ; i < n ; ++i ) operator()(i); @@ -3126,16 +3151,18 @@ void view_error_operator_bounds view_error_operator_bounds<R+1>(buf+n,len-n,map,args...); } -template< class MapType , class ... Args > +template< class MemorySpace , class MapType , class ... Args > KOKKOS_INLINE_FUNCTION void view_verify_operator_bounds - ( const char* label , const MapType & map , Args ... args ) + ( Kokkos::Impl::SharedAllocationTracker const & tracker + , const MapType & map , Args ... args ) { if ( ! view_verify_operator_bounds<0>( map , args ... ) ) { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) enum { LEN = 1024 }; char buffer[ LEN ]; - int n = snprintf(buffer,LEN,"View bounds error of view %s (", label); + const std::string label = tracker.template get_label<MemorySpace>(); + int n = snprintf(buffer,LEN,"View bounds error of view %s (",label.c_str()); view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... ); Kokkos::Impl::throw_runtime_exception(std::string(buffer)); #else diff --git a/lib/kokkos/core/src/impl/Kokkos_Volatile_Load.hpp b/lib/kokkos/core/src/impl/Kokkos_Volatile_Load.hpp index 9c770c68d0734c7a62a1b1280b9d01cc981da37e..8a17623c3ec299165e8ed1608b7dbd75b8c3c748 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Volatile_Load.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Volatile_Load.hpp @@ -238,5 +238,3 @@ T safe_load(T const * const ptr) #endif - - diff --git a/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp b/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp index c3c427bcced5e2c94be8c1bdb2675b76778662af..55d7651eca57c5d11bcf5018c72e04ff9a183ba4 100644 --- a/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_hwloc.cpp @@ -723,4 +723,3 @@ std::pair<unsigned,unsigned> get_this_thread_coordinate() #endif - diff --git a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp index 93ff6c48a77d00e45e3028413d5c02f4020d65bc..101b714fcd7007486b9b6aef659108dd87643818 100644 --- a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp @@ -42,6 +42,7 @@ */ #include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) #include <impl/Kokkos_spinwait.hpp> @@ -128,7 +129,6 @@ namespace Kokkos { namespace Impl { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) void spinwait_while_equal( volatile int32_t & flag , const int32_t value ) { @@ -174,8 +174,10 @@ void spinwait_until_equal( volatile int64_t & flag , const int64_t value ) Kokkos::load_fence(); } -#endif - } /* namespace Impl */ } /* namespace Kokkos */ +#else +void KOKKOS_CORE_SRC_IMPL_SPINWAIT_PREVENT_LINK_ERROR() {} +#endif + diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt index caf6c50129f090cd13cd92e67a79880949e821a1..5d6f25ac958e9abac8e87127fb815871b11692b9 100644 --- a/lib/kokkos/core/unit_test/CMakeLists.txt +++ b/lib/kokkos/core/unit_test/CMakeLists.txt @@ -23,10 +23,18 @@ IF(Kokkos_ENABLE_Serial) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_Serial SOURCES - UnitTestMain.cpp + UnitTestMainInit.cpp + serial/TestSerial_AtomicOperations.cpp + serial/TestSerial_AtomicViews.cpp serial/TestSerial_Atomics.cpp + serial/TestSerial_Complex.cpp + serial/TestSerial_Init.cpp + serial/TestSerial_MDRange.cpp serial/TestSerial_Other.cpp + serial/TestSerial_RangePolicy.cpp serial/TestSerial_Reductions.cpp + serial/TestSerial_Scan.cpp + serial/TestSerial_SharedAlloc.cpp serial/TestSerial_SubView_a.cpp serial/TestSerial_SubView_b.cpp serial/TestSerial_SubView_c01.cpp @@ -42,8 +50,13 @@ IF(Kokkos_ENABLE_Serial) serial/TestSerial_SubView_c11.cpp serial/TestSerial_SubView_c12.cpp serial/TestSerial_Team.cpp - serial/TestSerial_ViewAPI_a.cpp + serial/TestSerial_TeamReductionScan.cpp + serial/TestSerial_TeamScratch.cpp serial/TestSerial_ViewAPI_b.cpp + serial/TestSerial_ViewMapping_a.cpp + serial/TestSerial_ViewMapping_b.cpp + serial/TestSerial_ViewMapping_subview.cpp + serial/TestSerial_ViewOfClass.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -55,10 +68,18 @@ IF(Kokkos_ENABLE_Pthread) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_Threads SOURCES - UnitTestMain.cpp + UnitTestMainInit.cpp + threads/TestThreads_AtomicOperations.cpp + threads/TestThreads_AtomicViews.cpp threads/TestThreads_Atomics.cpp + threads/TestThreads_Complex.cpp + threads/TestThreads_Init.cpp + threads/TestThreads_MDRange.cpp threads/TestThreads_Other.cpp + threads/TestThreads_RangePolicy.cpp threads/TestThreads_Reductions.cpp + threads/TestThreads_Scan.cpp + threads/TestThreads_SharedAlloc.cpp threads/TestThreads_SubView_a.cpp threads/TestThreads_SubView_b.cpp threads/TestThreads_SubView_c01.cpp @@ -74,8 +95,13 @@ IF(Kokkos_ENABLE_Pthread) threads/TestThreads_SubView_c11.cpp threads/TestThreads_SubView_c12.cpp threads/TestThreads_Team.cpp - threads/TestThreads_ViewAPI_a.cpp + threads/TestThreads_TeamReductionScan.cpp + threads/TestThreads_TeamScratch.cpp threads/TestThreads_ViewAPI_b.cpp + threads/TestThreads_ViewMapping_a.cpp + threads/TestThreads_ViewMapping_b.cpp + threads/TestThreads_ViewMapping_subview.cpp + threads/TestThreads_ViewOfClass.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -87,10 +113,18 @@ IF(Kokkos_ENABLE_OpenMP) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_OpenMP SOURCES - UnitTestMain.cpp + UnitTestMainInit.cpp + openmp/TestOpenMP_AtomicOperations.cpp + openmp/TestOpenMP_AtomicViews.cpp openmp/TestOpenMP_Atomics.cpp + openmp/TestOpenMP_Complex.cpp + openmp/TestOpenMP_Init.cpp + openmp/TestOpenMP_MDRange.cpp openmp/TestOpenMP_Other.cpp + openmp/TestOpenMP_RangePolicy.cpp openmp/TestOpenMP_Reductions.cpp + openmp/TestOpenMP_Scan.cpp + openmp/TestOpenMP_SharedAlloc.cpp openmp/TestOpenMP_SubView_a.cpp openmp/TestOpenMP_SubView_b.cpp openmp/TestOpenMP_SubView_c01.cpp @@ -105,9 +139,14 @@ IF(Kokkos_ENABLE_OpenMP) openmp/TestOpenMP_SubView_c10.cpp openmp/TestOpenMP_SubView_c11.cpp openmp/TestOpenMP_SubView_c12.cpp + openmp/TestOpenMP_Task.cpp openmp/TestOpenMP_Team.cpp - openmp/TestOpenMP_ViewAPI_a.cpp + openmp/TestOpenMP_TeamReductionScan.cpp openmp/TestOpenMP_ViewAPI_b.cpp + openmp/TestOpenMP_ViewMapping_a.cpp + openmp/TestOpenMP_ViewMapping_b.cpp + openmp/TestOpenMP_ViewMapping_subview.cpp + openmp/TestOpenMP_ViewOfClass.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -119,8 +158,9 @@ IF(Kokkos_ENABLE_Qthreads) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_Qthreads SOURCES - UnitTestMain.cpp + UnitTestMainInit.cpp qthreads/TestQthreads_Atomics.cpp + qthreads/TestQthreads_Complex.cpp qthreads/TestQthreads_Other.cpp qthreads/TestQthreads_Reductions.cpp qthreads/TestQthreads_SubView_a.cpp @@ -151,11 +191,28 @@ IF(Kokkos_ENABLE_Cuda) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_Cuda SOURCES - UnitTestMain.cpp + UnitTestMainInit.cpp + cuda/TestCudaHostPinned_SharedAlloc.cpp + cuda/TestCudaHostPinned_ViewAPI.cpp + cuda/TestCudaHostPinned_ViewMapping_a.cpp + cuda/TestCudaHostPinned_ViewMapping_b.cpp + cuda/TestCudaHostPinned_ViewMapping_subview.cpp + cuda/TestCudaUVM_SharedAlloc.cpp + cuda/TestCudaUVM_ViewAPI.cpp + cuda/TestCudaUVM_ViewMapping_a.cpp + cuda/TestCudaUVM_ViewMapping_b.cpp + cuda/TestCudaUVM_ViewMapping_subview.cpp + cuda/TestCuda_AtomicOperations.cpp + cuda/TestCuda_AtomicViews.cpp cuda/TestCuda_Atomics.cpp + cuda/TestCuda_Complex.cpp + cuda/TestCuda_Init.cpp + cuda/TestCuda_MDRange.cpp cuda/TestCuda_Other.cpp - cuda/TestCuda_Reductions_a.cpp - cuda/TestCuda_Reductions_b.cpp + cuda/TestCuda_RangePolicy.cpp + cuda/TestCuda_Reductions.cpp + cuda/TestCuda_Scan.cpp + cuda/TestCuda_SharedAlloc.cpp cuda/TestCuda_Spaces.cpp cuda/TestCuda_SubView_a.cpp cuda/TestCuda_SubView_b.cpp @@ -171,15 +228,15 @@ IF(Kokkos_ENABLE_Cuda) cuda/TestCuda_SubView_c10.cpp cuda/TestCuda_SubView_c11.cpp cuda/TestCuda_SubView_c12.cpp + cuda/TestCuda_Task.cpp cuda/TestCuda_Team.cpp - cuda/TestCuda_ViewAPI_a.cpp + cuda/TestCuda_TeamReductionScan.cpp + cuda/TestCuda_TeamScratch.cpp cuda/TestCuda_ViewAPI_b.cpp - cuda/TestCuda_ViewAPI_c.cpp - cuda/TestCuda_ViewAPI_d.cpp - cuda/TestCuda_ViewAPI_e.cpp - cuda/TestCuda_ViewAPI_f.cpp - cuda/TestCuda_ViewAPI_g.cpp - cuda/TestCuda_ViewAPI_h.cpp + cuda/TestCuda_ViewMapping_a.cpp + cuda/TestCuda_ViewMapping_b.cpp + cuda/TestCuda_ViewMapping_subview.cpp + cuda/TestCuda_ViewOfClass.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -189,7 +246,13 @@ ENDIF() TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_Default - SOURCES UnitTestMain.cpp TestDefaultDeviceType.cpp TestDefaultDeviceType_a.cpp + SOURCES + UnitTestMainInit.cpp + default/TestDefaultDeviceType.cpp + default/TestDefaultDeviceType_a.cpp + default/TestDefaultDeviceType_b.cpp + default/TestDefaultDeviceType_c.cpp + default/TestDefaultDeviceType_d.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -199,7 +262,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( foreach(INITTESTS_NUM RANGE 1 16) TRIBITS_ADD_EXECUTABLE_AND_TEST( UnitTest_DefaultInit_${INITTESTS_NUM} - SOURCES UnitTestMain.cpp TestDefaultDeviceTypeInit_${INITTESTS_NUM}.cpp + SOURCES UnitTestMain.cpp default/TestDefaultDeviceTypeInit_${INITTESTS_NUM}.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile index d93830a28d9db5ae50306c70ae5187062a07c594..41f192a4866ceaebd18211448daa5abe01ff16f4 100644 --- a/lib/kokkos/core/unit_test/Makefile +++ b/lib/kokkos/core/unit_test/Makefile @@ -3,9 +3,11 @@ KOKKOS_PATH = ../.. GTEST_PATH = ../../tpls/gtest vpath %.cpp ${KOKKOS_PATH}/core/unit_test +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/default vpath %.cpp ${KOKKOS_PATH}/core/unit_test/serial vpath %.cpp ${KOKKOS_PATH}/core/unit_test/threads vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmp +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmptarget vpath %.cpp ${KOKKOS_PATH}/core/unit_test/qthreads vpath %.cpp ${KOKKOS_PATH}/core/unit_test/cuda @@ -16,7 +18,7 @@ default: build_all echo "End Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/config/nvcc_wrapper + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper else CXX = g++ endif @@ -33,52 +35,134 @@ TEST_TARGETS = TARGETS = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - OBJ_CUDA = TestCuda_Other.o TestCuda_Reductions_a.o TestCuda_Reductions_b.o TestCuda_Atomics.o TestCuda_Team.o TestCuda_Spaces.o - OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o + OBJ_CUDA = UnitTestMainInit.o gtest-all.o + OBJ_CUDA += TestCuda_Init.o + OBJ_CUDA += TestCuda_SharedAlloc.o TestCudaUVM_SharedAlloc.o TestCudaHostPinned_SharedAlloc.o + OBJ_CUDA += TestCuda_RangePolicy.o + OBJ_CUDA += TestCuda_ViewAPI_b.o + OBJ_CUDA += TestCuda_ViewMapping_a.o TestCuda_ViewMapping_b.o TestCuda_ViewMapping_subview.o + OBJ_CUDA += TestCudaUVM_ViewAPI.o + OBJ_CUDA += TestCudaUVM_ViewMapping_a.o TestCudaUVM_ViewMapping_b.o TestCudaUVM_ViewMapping_subview.o + OBJ_CUDA += TestCudaHostPinned_ViewAPI.o + OBJ_CUDA += TestCudaHostPinned_ViewMapping_a.o TestCudaHostPinned_ViewMapping_b.o TestCudaHostPinned_ViewMapping_subview.o + OBJ_CUDA += TestCuda_ViewOfClass.o ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - OBJ_OPENMP += TestCuda_SubView_c_all.o + OBJ_CUDA += TestCuda_SubView_c_all.o else + OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o OBJ_CUDA += TestCuda_SubView_c04.o TestCuda_SubView_c05.o TestCuda_SubView_c06.o OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o endif - OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o - OBJ_CUDA += TestCuda_ViewAPI_e.o TestCuda_ViewAPI_f.o TestCuda_ViewAPI_g.o TestCuda_ViewAPI_h.o - OBJ_CUDA += TestCuda_ViewAPI_s.o - OBJ_CUDA += UnitTestMain.o gtest-all.o + OBJ_CUDA += TestCuda_Reductions.o TestCuda_Scan.o + OBJ_CUDA += TestCuda_Complex.o + OBJ_CUDA += TestCuda_AtomicOperations.o TestCuda_AtomicViews.o TestCuda_Atomics.o + OBJ_CUDA += TestCuda_Team.o TestCuda_TeamScratch.o + OBJ_CUDA += TestCuda_TeamReductionScan.o + OBJ_CUDA += TestCuda_Other.o + OBJ_CUDA += TestCuda_MDRange.o + OBJ_CUDA += TestCuda_Task.o + OBJ_CUDA += TestCuda_Spaces.o + TARGETS += KokkosCore_UnitTest_Cuda + TEST_TARGETS += test-cuda endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - OBJ_THREADS = TestThreads_Other.o TestThreads_Reductions.o TestThreads_Atomics.o TestThreads_Team.o + + OBJ_THREADS = UnitTestMainInit.o gtest-all.o + OBJ_THREADS += TestThreads_Init.o + OBJ_THREADS += TestThreads_SharedAlloc.o + OBJ_THREADS += TestThreads_RangePolicy.o + OBJ_THREADS += TestThreads_ViewAPI_b.o + OBJ_THREADS += TestThreads_ViewMapping_a.o TestThreads_ViewMapping_b.o TestThreads_ViewMapping_subview.o + OBJ_THREADS += TestThreads_ViewOfClass.o OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o - OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o UnitTestMain.o gtest-all.o + OBJ_THREADS += TestThreads_Reductions.o TestThreads_Scan.o + OBJ_THREADS += TestThreads_Complex.o + OBJ_THREADS += TestThreads_AtomicOperations.o TestThreads_AtomicViews.o TestThreads_Atomics.o + OBJ_THREADS += TestThreads_Team.o TestThreads_TeamScratch.o + OBJ_THREADS += TestThreads_TeamReductionScan.o + OBJ_THREADS += TestThreads_Other.o + OBJ_THREADS += TestThreads_MDRange.o + TARGETS += KokkosCore_UnitTest_Threads + TEST_TARGETS += test-threads endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - OBJ_OPENMP = TestOpenMP_Other.o TestOpenMP_Reductions.o TestOpenMP_Atomics.o TestOpenMP_Team.o - OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o + OBJ_OPENMP = UnitTestMainInit.o gtest-all.o + OBJ_OPENMP += TestOpenMP_Init.o + OBJ_OPENMP += TestOpenMP_SharedAlloc.o + OBJ_OPENMP += TestOpenMP_RangePolicy.o + OBJ_OPENMP += TestOpenMP_ViewAPI_b.o + OBJ_OPENMP += TestOpenMP_ViewMapping_a.o TestOpenMP_ViewMapping_b.o TestOpenMP_ViewMapping_subview.o + OBJ_OPENMP += TestOpenMP_ViewOfClass.o ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) OBJ_OPENMP += TestOpenMP_SubView_c_all.o else + OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o OBJ_OPENMP += TestOpenMP_SubView_c04.o TestOpenMP_SubView_c05.o TestOpenMP_SubView_c06.o OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o endif - OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o UnitTestMain.o gtest-all.o + OBJ_OPENMP += TestOpenMP_Reductions.o TestOpenMP_Scan.o + OBJ_OPENMP += TestOpenMP_Complex.o + OBJ_OPENMP += TestOpenMP_AtomicOperations.o TestOpenMP_AtomicViews.o TestOpenMP_Atomics.o + OBJ_OPENMP += TestOpenMP_Team.o TestOpenMP_TeamScratch.o + OBJ_OPENMP += TestOpenMP_TeamReductionScan.o + OBJ_OPENMP += TestOpenMP_Other.o + OBJ_OPENMP += TestOpenMP_MDRange.o + OBJ_OPENMP += TestOpenMP_Task.o + TARGETS += KokkosCore_UnitTest_OpenMP + TEST_TARGETS += test-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + OBJ_OPENMPTARGET = UnitTestMainInit.o gtest-all.o + OBJ_OPENMPTARGET += TestOpenMPTarget_Init.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SharedAlloc.o + OBJ_OPENMPTARGET += TestOpenMPTarget_RangePolicy.o + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewAPI_b.o #Some commented out code + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_a.o + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_b.o + OBJ_OPENMPTARGET += TestOpenMPTarget_ViewMapping_subview.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_ViewOfClass.o + OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_a.o TestOpenMPTarget_SubView_b.o + #The following subview tests need something like UVM: + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c01.o TestOpenMPTarget_SubView_c02.o TestOpenMPTarget_SubView_c03.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c04.o TestOpenMPTarget_SubView_c05.o TestOpenMPTarget_SubView_c06.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c07.o TestOpenMPTarget_SubView_c08.o TestOpenMPTarget_SubView_c09.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c10.o TestOpenMPTarget_SubView_c11.o TestOpenMPTarget_SubView_c12.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_Reductions.o # Need custom reductions + #OBJ_OPENMPTARGET += TestOpenMPTarget_Scan.o + OBJ_OPENMPTARGET += TestOpenMPTarget_Complex.o + OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations.o + OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicViews.o + OBJ_OPENMPTARGET += TestOpenMPTarget_Atomics.o # Commented Out Arbitrary Type Atomics + #OBJ_OPENMPTARGET += TestOpenMPTarget_Team.o # There is still a static function in this + #OBJ_OPENMPTARGET += TestOpenMPTarget_TeamScratch.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_TeamReductionScan.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_Other.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_MDRange.o + #OBJ_OPENMPTARGET += TestOpenMPTarget_Task.o + + TARGETS += KokkosCore_UnitTest_OpenMPTarget + + TEST_TARGETS += test-openmptarget + +endif + ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) OBJ_QTHREADS = TestQthreads_Other.o TestQthreads_Reductions.o TestQthreads_Atomics.o TestQthreads_Team.o OBJ_QTHREADS += TestQthreads_SubView_a.o TestQthreads_SubView_b.o @@ -92,22 +176,42 @@ else endif OBJ_QTHREADS += TestQthreads_ViewAPI_a.o TestQthreads_ViewAPI_b.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_Qthreads + + OBJ_QTHREADS2 = UnitTestMainInit.o gtest-all.o + OBJ_QTHREADS2 += TestQthreads_Complex.o + TARGETS += KokkosCore_UnitTest_Qthreads2 + TEST_TARGETS += test-qthreads endif ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - OBJ_SERIAL = TestSerial_Other.o TestSerial_Reductions.o TestSerial_Atomics.o TestSerial_Team.o - OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o + OBJ_SERIAL = UnitTestMainInit.o gtest-all.o + OBJ_SERIAL += TestSerial_Init.o + OBJ_SERIAL += TestSerial_SharedAlloc.o + OBJ_SERIAL += TestSerial_RangePolicy.o + OBJ_SERIAL += TestSerial_ViewAPI_b.o + OBJ_SERIAL += TestSerial_ViewMapping_a.o TestSerial_ViewMapping_b.o TestSerial_ViewMapping_subview.o + OBJ_SERIAL += TestSerial_ViewOfClass.o ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - OBJ_OPENMP += TestSerial_SubView_c_all.o + OBJ_SERIAL += TestSerial_SubView_c_all.o else - OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o - OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o - OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o - OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o + OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o + OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o + OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o + OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o + OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o endif - OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o UnitTestMain.o gtest-all.o + OBJ_SERIAL += TestSerial_Reductions.o TestSerial_Scan.o + OBJ_SERIAL += TestSerial_Complex.o + OBJ_SERIAL += TestSerial_AtomicOperations.o TestSerial_AtomicViews.o TestSerial_Atomics.o + OBJ_SERIAL += TestSerial_Team.o TestSerial_TeamScratch.o + OBJ_SERIAL += TestSerial_TeamReductionScan.o + OBJ_SERIAL += TestSerial_Other.o + OBJ_SERIAL += TestSerial_MDRange.o + OBJ_SERIAL += TestSerial_Task.o + TARGETS += KokkosCore_UnitTest_Serial + TEST_TARGETS += test-serial endif @@ -115,7 +219,11 @@ OBJ_HWLOC = TestHWLOC.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_HWLOC TEST_TARGETS += test-hwloc -OBJ_DEFAULT = TestDefaultDeviceType.o TestDefaultDeviceType_a.o TestDefaultDeviceType_b.o TestDefaultDeviceType_c.o TestDefaultDeviceType_d.o UnitTestMain.o gtest-all.o +OBJ_DEFAULT = UnitTestMainInit.o gtest-all.o +ifneq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + OBJ_DEFAULT += TestDefaultDeviceType.o TestDefaultDeviceType_a.o TestDefaultDeviceType_b.o TestDefaultDeviceType_c.o TestDefaultDeviceType_d.o +endif + TARGETS += KokkosCore_UnitTest_Default TEST_TARGETS += test-default @@ -127,31 +235,37 @@ INITTESTS_TEST_TARGETS := $(addprefix test-default-init-,${INITTESTS_NUMBERS}) TEST_TARGETS += ${INITTESTS_TEST_TARGETS} KokkosCore_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Cuda + $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Cuda KokkosCore_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Threads + $(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Threads KokkosCore_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_OpenMP + $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_OpenMP + +KokkosCore_UnitTest_OpenMPTarget: $(OBJ_OPENMPTARGET) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMPTARGET) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_OpenMPTarget KokkosCore_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Serial + $(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Serial KokkosCore_UnitTest_Qthreads: $(OBJ_QTHREADS) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_QTHREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Qthreads + $(LINK) $(EXTRA_PATH) $(OBJ_QTHREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Qthreads + +KokkosCore_UnitTest_Qthreads2: $(OBJ_QTHREADS2) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(EXTRA_PATH) $(OBJ_QTHREADS2) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Qthreads2 KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_HWLOC + $(LINK) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_HWLOC KokkosCore_UnitTest_AllocationTracker: $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_AllocationTracker + $(LINK) $(EXTRA_PATH) $(OBJ_ALLOCATIONTRACKER) $(KOKKOS_LIBS) $( $(KOKKOS_LDFLAGS) $(LDFLAGS)LIB) -o KokkosCore_UnitTest_AllocationTracker KokkosCore_UnitTest_Default: $(OBJ_DEFAULT) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_DEFAULT) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Default + $(LINK) $(EXTRA_PATH) $(OBJ_DEFAULT) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_Default ${INITTESTS_TARGETS}: KokkosCore_UnitTest_DefaultDeviceTypeInit_%: TestDefaultDeviceTypeInit_%.o UnitTestMain.o gtest-all.o $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) TestDefaultDeviceTypeInit_$*.o UnitTestMain.o gtest-all.o $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_DefaultDeviceTypeInit_$* + $(LINK) $(EXTRA_PATH) TestDefaultDeviceTypeInit_$*.o UnitTestMain.o gtest-all.o $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosCore_UnitTest_DefaultDeviceTypeInit_$* test-cuda: KokkosCore_UnitTest_Cuda ./KokkosCore_UnitTest_Cuda @@ -162,11 +276,15 @@ test-threads: KokkosCore_UnitTest_Threads test-openmp: KokkosCore_UnitTest_OpenMP ./KokkosCore_UnitTest_OpenMP +test-openmptarget: KokkosCore_UnitTest_OpenMPTarget + ./KokkosCore_UnitTest_OpenMPTarget + test-serial: KokkosCore_UnitTest_Serial ./KokkosCore_UnitTest_Serial -test-qthreads: KokkosCore_UnitTest_Qthreads +test-qthreads: KokkosCore_UnitTest_Qthreads KokkosCore_UnitTest_Qthreads2 ./KokkosCore_UnitTest_Qthreads + ./KokkosCore_UnitTest_Qthreads2 test-hwloc: KokkosCore_UnitTest_HWLOC ./KokkosCore_UnitTest_HWLOC diff --git a/lib/kokkos/core/unit_test/TestAggregate.hpp b/lib/kokkos/core/unit_test/TestAggregate.hpp index f09cc5018cb698ec033639a326a29d8fffacec3f..6896a27bfb027f676381d445f2140921f0a289da 100644 --- a/lib/kokkos/core/unit_test/TestAggregate.hpp +++ b/lib/kokkos/core/unit_test/TestAggregate.hpp @@ -119,6 +119,11 @@ void TestViewAggregate() // Kokkos::Array< double, 3 > initialized_with_too_many{ { 1, 2, 3, 4 } }; } +TEST_F( TEST_CATEGORY, view_aggregate ) +{ + TestViewAggregate< TEST_EXECSPACE >(); +} + } // namespace Test #endif /* #ifndef TEST_AGGREGATE_HPP */ diff --git a/lib/kokkos/core/unit_test/TestAtomic.hpp b/lib/kokkos/core/unit_test/TestAtomic.hpp index ff77b8dca6f0437393bacca9d42ed73d359e44d5..e61d5e730c47e51a752eacc0316f6c28bf94fcc3 100644 --- a/lib/kokkos/core/unit_test/TestAtomic.hpp +++ b/lib/kokkos/core/unit_test/TestAtomic.hpp @@ -431,3 +431,52 @@ bool Loop( int loop, int test ) } } // namespace TestAtomic + +namespace Test { + +TEST_F( TEST_CATEGORY, atomics ) +{ + const int loop_count = 1e4; + + ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, TEST_EXECSPACE >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, TEST_EXECSPACE >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, TEST_EXECSPACE >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, TEST_EXECSPACE >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, TEST_EXECSPACE >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, TEST_EXECSPACE >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, TEST_EXECSPACE >( 100, 3 ) ) ); + +#ifndef KOKKOS_ENABLE_OPENMPTARGET + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, TEST_EXECSPACE >( 100, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, TEST_EXECSPACE >( 100, 3 ) ) ); +#endif +} + + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp index e3ceca404ff12c1c9e5da04bf70d183fee87dfdd..89b2ee7047ad733110ba09bc331935f96c8d012c 100644 --- a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp +++ b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp @@ -1056,4 +1056,85 @@ bool AtomicOperationsTestNonIntegralType( int i0, int i1, int test ) return 0; } -} // namespace TestAtomicOperations +} + +namespace Test { + +TEST_F( TEST_CATEGORY , atomic_operations ) +{ + const int start = 1; // Avoid zero for division. + const int end = 11; + for ( int i = start; i < end; ++i ) + { + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, TEST_EXECSPACE >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, TEST_EXECSPACE >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, TEST_EXECSPACE >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, TEST_EXECSPACE >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, TEST_EXECSPACE >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, TEST_EXECSPACE >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, TEST_EXECSPACE >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, TEST_EXECSPACE >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, TEST_EXECSPACE >( start, end - i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, TEST_EXECSPACE >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, TEST_EXECSPACE >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, TEST_EXECSPACE >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, TEST_EXECSPACE >( start, end - i, 4 ) ) ); + } +} +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestAtomicViews.hpp b/lib/kokkos/core/unit_test/TestAtomicViews.hpp index 71080e5c8216aecd01985139c37bb68931139929..1ebb3fd43fae962ec1c73782d74bfec422fada6c 100644 --- a/lib/kokkos/core/unit_test/TestAtomicViews.hpp +++ b/lib/kokkos/core/unit_test/TestAtomicViews.hpp @@ -1437,3 +1437,39 @@ bool AtomicViewsTestNonIntegralType( const int length, int test ) } } // namespace TestAtomicViews + +namespace Test { + +TEST_F( TEST_CATEGORY, atomic_views_integral ) +{ + const long length = 1000000; + { + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, TEST_EXECSPACE >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, TEST_EXECSPACE >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, TEST_EXECSPACE >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, TEST_EXECSPACE >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, TEST_EXECSPACE >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, TEST_EXECSPACE >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, TEST_EXECSPACE >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, TEST_EXECSPACE >( length, 8 ) ) ); + } +} + +TEST_F( TEST_CATEGORY, atomic_views_nonintegral ) +{ + const long length = 1000000; + { + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, TEST_EXECSPACE >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, TEST_EXECSPACE >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, TEST_EXECSPACE >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, TEST_EXECSPACE >( length, 4 ) ) ); + } +} + +TEST_F( TEST_CATEGORY, atomic_view_api ) +{ + TestAtomicViews::TestAtomicViewAPI< int, TEST_EXECSPACE >(); +} +} diff --git a/lib/kokkos/core/unit_test/TestCXX11.hpp b/lib/kokkos/core/unit_test/TestCXX11.hpp index e2ad623d9c89cef44c4e55a9096d3dba6796adf6..7e6a0b79d7992f7b6ea5e02190e1339a76d39047 100644 --- a/lib/kokkos/core/unit_test/TestCXX11.hpp +++ b/lib/kokkos/core/unit_test/TestCXX11.hpp @@ -343,3 +343,17 @@ bool Test( int test ) { } } // namespace TestCXX11 + +namespace Test { +TEST_F( TEST_CATEGORY, cxx11 ) +{ + if ( std::is_same< Kokkos::DefaultExecutionSpace, TEST_EXECSPACE >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< TEST_EXECSPACE >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< TEST_EXECSPACE >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< TEST_EXECSPACE >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< TEST_EXECSPACE >( 4 ) ) ); + } +} + +} + diff --git a/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp b/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp index b53b42b8e05bc906c17f2ad59bdf1ebb9fd62ef7..3ee1c02c5e6e77ae47074949c66d3378c30a53cd 100644 --- a/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp +++ b/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp @@ -89,4 +89,11 @@ void test_reduction_deduction() } // namespace TestCXX11 +namespace Test { + +TEST_F( TEST_CATEGORY, reduction_deduction ) +{ + TestCXX11::test_reduction_deduction< TEST_EXECSPACE >(); +} +} #endif diff --git a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp index 45554383446ec13794f9e22bb0819477a7bdb278..fddcc4a2e6ce76925b68caf7908e26f0efffa45d 100644 --- a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp +++ b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp @@ -95,3 +95,10 @@ bool Test() { } } // namespace TestCompilerMacros + +namespace Test { +TEST_F( TEST_CATEGORY, compiler_macros ) +{ + ASSERT_TRUE( ( TestCompilerMacros::Test< TEST_EXECSPACE >() ) ); +} +} diff --git a/lib/kokkos/core/unit_test/TestComplex.hpp b/lib/kokkos/core/unit_test/TestComplex.hpp new file mode 100644 index 0000000000000000000000000000000000000000..36f05612e0972a043af9622801bc1855c58cc62b --- /dev/null +++ b/lib/kokkos/core/unit_test/TestComplex.hpp @@ -0,0 +1,243 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<Kokkos_Core.hpp> +#include<cstdio> + +namespace Test { + +// Test construction and assignment + +template<class ExecSpace> +struct TestComplexConstruction { + Kokkos::View<Kokkos::complex<double>*,ExecSpace> d_results; + typename Kokkos::View<Kokkos::complex<double>*,ExecSpace>::HostMirror h_results; + + void testit () { + d_results = Kokkos::View<Kokkos::complex<double>*,ExecSpace>("TestComplexConstruction",10); + h_results = Kokkos::create_mirror_view(d_results); + + Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace>(0,1), *this); + Kokkos::fence(); + Kokkos::deep_copy(h_results,d_results); + + ASSERT_FLOAT_EQ(h_results(0).real(),1.5); ASSERT_FLOAT_EQ(h_results(0).imag(),2.5); + ASSERT_FLOAT_EQ(h_results(1).real(),1.5); ASSERT_FLOAT_EQ(h_results(1).imag(),2.5); + ASSERT_FLOAT_EQ(h_results(2).real(),0.0); ASSERT_FLOAT_EQ(h_results(2).imag(),0.0); + ASSERT_FLOAT_EQ(h_results(3).real(),3.5); ASSERT_FLOAT_EQ(h_results(3).imag(),0.0); + ASSERT_FLOAT_EQ(h_results(4).real(),4.5); ASSERT_FLOAT_EQ(h_results(4).imag(),5.5); + ASSERT_FLOAT_EQ(h_results(5).real(),1.5); ASSERT_FLOAT_EQ(h_results(5).imag(),2.5); + ASSERT_FLOAT_EQ(h_results(6).real(),4.5); ASSERT_FLOAT_EQ(h_results(6).imag(),5.5); + ASSERT_FLOAT_EQ(h_results(7).real(),7.5); ASSERT_FLOAT_EQ(h_results(7).imag(),0.0); + ASSERT_FLOAT_EQ(h_results(8).real(),double(8)); ASSERT_FLOAT_EQ(h_results(8).imag(),0.0); + + Kokkos::complex<double> a(1.5,2.5),b(3.25,5.25),r_kk; + std::complex<double> sa(a),sb(3.25,5.25),r; + r = a; r_kk = a; ASSERT_FLOAT_EQ(r.real(),r_kk.real()); ASSERT_FLOAT_EQ(r.imag(),r_kk.imag()); + r = sb*a; r_kk = b*a; ASSERT_FLOAT_EQ(r.real(),r_kk.real()); ASSERT_FLOAT_EQ(r.imag(),r_kk.imag()); + r = sa; r_kk = a; ASSERT_FLOAT_EQ(r.real(),r_kk.real()); ASSERT_FLOAT_EQ(r.imag(),r_kk.imag()); + + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &i ) const { + Kokkos::complex<double> a(1.5,2.5); + d_results(0) = a; + Kokkos::complex<double> b(a); + d_results(1) = b; + Kokkos::complex<double> c = Kokkos::complex<double>(); + d_results(2) = c; + Kokkos::complex<double> d(3.5); + d_results(3) = d; + volatile Kokkos::complex<double> a_v(4.5,5.5); + d_results(4) = a_v; + volatile Kokkos::complex<double> b_v(a); + d_results(5) = b_v; + Kokkos::complex<double> e(a_v); + d_results(6) = e; + + d_results(7) = double(7.5); + d_results(8) = int(8); + } +}; + +TEST_F(TEST_CATEGORY, complex_construction) { + TestComplexConstruction<TEST_EXECSPACE> test; + test.testit(); +} + +// Test Math FUnction + +template<class ExecSpace> +struct TestComplexBasicMath { + Kokkos::View<Kokkos::complex<double>*,ExecSpace> d_results; + typename Kokkos::View<Kokkos::complex<double>*,ExecSpace>::HostMirror h_results; + + void testit () { + d_results = Kokkos::View<Kokkos::complex<double>*,ExecSpace>("TestComplexBasicMath",20); + h_results = Kokkos::create_mirror_view(d_results); + + Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace>(0,1), *this); + Kokkos::fence(); + Kokkos::deep_copy(h_results,d_results); + + std::complex<double> a(1.5,2.5); + std::complex<double> b(3.25,5.75); + std::complex<double> d(1.0,2.0); + double c = 9.3; + + std::complex<double> r; + r = a+b; ASSERT_FLOAT_EQ(h_results(0).real(), r.real()); ASSERT_FLOAT_EQ(h_results(0).imag(), r.imag()); + r = a-b; ASSERT_FLOAT_EQ(h_results(1).real(), r.real()); ASSERT_FLOAT_EQ(h_results(1).imag(), r.imag()); + r = a*b; ASSERT_FLOAT_EQ(h_results(2).real(), r.real()); ASSERT_FLOAT_EQ(h_results(2).imag(), r.imag()); + r = a/b; ASSERT_FLOAT_EQ(h_results(3).real(), r.real()); ASSERT_FLOAT_EQ(h_results(3).imag(), r.imag()); + r = d+a; ASSERT_FLOAT_EQ(h_results(4).real(), r.real()); ASSERT_FLOAT_EQ(h_results(4).imag(), r.imag()); + r = d-a; ASSERT_FLOAT_EQ(h_results(5).real(), r.real()); ASSERT_FLOAT_EQ(h_results(5).imag(), r.imag()); + r = d*a; ASSERT_FLOAT_EQ(h_results(6).real(), r.real()); ASSERT_FLOAT_EQ(h_results(6).imag(), r.imag()); + r = d/a; ASSERT_FLOAT_EQ(h_results(7).real(), r.real()); ASSERT_FLOAT_EQ(h_results(7).imag(), r.imag()); + r = a+c; ASSERT_FLOAT_EQ(h_results(8).real(), r.real()); ASSERT_FLOAT_EQ(h_results(8).imag(), r.imag()); + r = a-c; ASSERT_FLOAT_EQ(h_results(9).real(), r.real()); ASSERT_FLOAT_EQ(h_results(9).imag(), r.imag()); + r = a*c; ASSERT_FLOAT_EQ(h_results(10).real(), r.real()); ASSERT_FLOAT_EQ(h_results(10).imag(), r.imag()); + r = a/c; ASSERT_FLOAT_EQ(h_results(11).real(), r.real()); ASSERT_FLOAT_EQ(h_results(11).imag(), r.imag()); + r = d+c; ASSERT_FLOAT_EQ(h_results(12).real(), r.real()); ASSERT_FLOAT_EQ(h_results(12).imag(), r.imag()); + r = d-c; ASSERT_FLOAT_EQ(h_results(13).real(), r.real()); ASSERT_FLOAT_EQ(h_results(13).imag(), r.imag()); + r = d*c; ASSERT_FLOAT_EQ(h_results(14).real(), r.real()); ASSERT_FLOAT_EQ(h_results(14).imag(), r.imag()); + r = d/c; ASSERT_FLOAT_EQ(h_results(15).real(), r.real()); ASSERT_FLOAT_EQ(h_results(15).imag(), r.imag()); + r = c+a; ASSERT_FLOAT_EQ(h_results(16).real(), r.real()); ASSERT_FLOAT_EQ(h_results(16).imag(), r.imag()); + r = c-a; ASSERT_FLOAT_EQ(h_results(17).real(), r.real()); ASSERT_FLOAT_EQ(h_results(17).imag(), r.imag()); + r = c*a; ASSERT_FLOAT_EQ(h_results(18).real(), r.real()); ASSERT_FLOAT_EQ(h_results(18).imag(), r.imag()); + r = c/a; ASSERT_FLOAT_EQ(h_results(19).real(), r.real()); ASSERT_FLOAT_EQ(h_results(19).imag(), r.imag()); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &i ) const { + Kokkos::complex<double> a(1.5,2.5); + Kokkos::complex<double> b(3.25,5.75); + // Basic math complex / complex + d_results(0) = a+b; + d_results(1) = a-b; + d_results(2) = a*b; + d_results(3) = a/b; + d_results(4) = Kokkos::complex<double>(1.0,2.0); + d_results(4) += a; + d_results(5) = Kokkos::complex<double>(1.0,2.0); + d_results(5) -= a; + d_results(6) = Kokkos::complex<double>(1.0,2.0); + d_results(6) *= a; + d_results(7) = Kokkos::complex<double>(1.0,2.0); + d_results(7) /= a; + + // Basic math complex / scalar + double c = 9.3; + d_results(8) = a+c; + d_results(9) = a-c; + d_results(10) = a*c; + d_results(11) = a/c; + d_results(12) = Kokkos::complex<double>(1.0,2.0); + d_results(12) += c; + d_results(13) = Kokkos::complex<double>(1.0,2.0); + d_results(13) -= c; + d_results(14) = Kokkos::complex<double>(1.0,2.0); + d_results(14) *= c; + d_results(15) = Kokkos::complex<double>(1.0,2.0); + d_results(15) /= c; + + + // Basic math scalar / complex + d_results(16) = c+a; + d_results(17) = c-a; + d_results(18) = c*a; + d_results(19) = c/a; + } +}; + +TEST_F(TEST_CATEGORY, complex_basic_math) { + TestComplexBasicMath<TEST_EXECSPACE> test; + test.testit(); +} + + +template<class ExecSpace> +struct TestComplexSpecialFunctions { + Kokkos::View<Kokkos::complex<double>*,ExecSpace> d_results; + typename Kokkos::View<Kokkos::complex<double>*,ExecSpace>::HostMirror h_results; + + void testit () { + d_results = Kokkos::View<Kokkos::complex<double>*,ExecSpace>("TestComplexSpecialFunctions",20); + h_results = Kokkos::create_mirror_view(d_results); + + Kokkos::parallel_for(Kokkos::RangePolicy<ExecSpace>(0,1), *this); + Kokkos::fence(); + Kokkos::deep_copy(h_results,d_results); + + std::complex<double> a(1.5,2.5); + double c = 9.3; + + std::complex<double> r; + r = a; ASSERT_FLOAT_EQ(h_results(0).real(), r.real()); ASSERT_FLOAT_EQ(h_results(0).imag(), r.imag()); + r = std::sqrt(a); ASSERT_FLOAT_EQ(h_results(1).real(), r.real()); ASSERT_FLOAT_EQ(h_results(1).imag(), r.imag()); + r = std::pow(a,c); ASSERT_FLOAT_EQ(h_results(2).real(), r.real()); ASSERT_FLOAT_EQ(h_results(2).imag(), r.imag()); + r = std::abs(a); ASSERT_FLOAT_EQ(h_results(3).real(), r.real()); ASSERT_FLOAT_EQ(h_results(3).imag(), r.imag()); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const int &i ) const { + Kokkos::complex<double> a(1.5,2.5); + Kokkos::complex<double> b(3.25,5.75); + double c = 9.3; + + d_results(0) = Kokkos::complex<double>(Kokkos::real(a),Kokkos::imag(a)); + d_results(1) = Kokkos::sqrt(a); + d_results(2) = Kokkos::pow(a,c); + d_results(3) = Kokkos::abs(a); + + } +}; + +TEST_F(TEST_CATEGORY, complex_special_funtions) { + TestComplexSpecialFunctions<TEST_EXECSPACE> test; + test.testit(); +} +} // namespace Test + + diff --git a/lib/kokkos/core/unit_test/TestConcurrentBitset.hpp b/lib/kokkos/core/unit_test/TestConcurrentBitset.hpp new file mode 100644 index 0000000000000000000000000000000000000000..404566cc6f78153d37303eaf978b7fa1763c44da --- /dev/null +++ b/lib/kokkos/core/unit_test/TestConcurrentBitset.hpp @@ -0,0 +1,177 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef TEST_CONCURRENTBITSET_HPP +#define TEST_CONCURRENTBITSET_HPP + +#include <gtest/gtest.h> + +#include <stdexcept> +#include <sstream> +#include <iostream> + +#include <impl/Kokkos_ConcurrentBitset.hpp> + +namespace Test { + +template< class DeviceType > +struct ConcurrentBitset { + + typedef Kokkos::View<uint32_t*,DeviceType> view_unsigned_type ; + typedef Kokkos::View<int*,DeviceType> view_int_type ; + + view_unsigned_type bitset ; + view_int_type acquired ; + uint32_t bitset_count_lg2 ; + uint32_t bitset_count_mask ; + + ConcurrentBitset( const uint32_t arg_bitset_count_lg2 + , const view_unsigned_type & arg_bitset + , const view_int_type & arg_acquired ) + : bitset( arg_bitset ), acquired( arg_acquired ) + , bitset_count_lg2( arg_bitset_count_lg2 ) + , bitset_count_mask( uint32_t( 1u << arg_bitset_count_lg2 ) - 1 ) + {} + + struct TagAcquire {}; + struct TagRelease {}; + struct TagReacquire {}; + + KOKKOS_INLINE_FUNCTION + void operator()( TagAcquire , int i , long & update ) const + { + unsigned hint = Kokkos::Impl::clock_tic() & bitset_count_mask ; + + Kokkos::pair<int,int> result = + Kokkos::Impl::concurrent_bitset::acquire_bounded_lg2 + ( bitset.data() , bitset_count_lg2 , hint ); + + acquired(i) = result.first ; + + if ( 0 <= result.first ) ++update ; + } + + KOKKOS_INLINE_FUNCTION + void operator()( TagRelease , int i , long & update ) const + { + if ( 0 == ( i % 3 ) && 0 <= acquired(i) ) { + Kokkos::Impl::concurrent_bitset::release( bitset.data() , acquired(i) ); + acquired(i) = -1 ; + ++update ; + } + } + + KOKKOS_INLINE_FUNCTION + void operator()( TagReacquire , int i , long & update ) const + { + if ( acquired(i) < 0 ) { + + unsigned hint = Kokkos::Impl::clock_tic() & bitset_count_mask ; + + Kokkos::pair<int,int> result = Kokkos::Impl::concurrent_bitset::acquire_bounded_lg2 + ( bitset.data() , bitset_count_lg2 , hint ); + + acquired(i) = result.first ; + + if ( 0 <= result.first ) ++update ; + } + } +}; + +template< class DeviceType > +void test_concurrent_bitset( int bit_count ) +{ + typedef ConcurrentBitset< DeviceType > Functor ; + typedef typename Functor::view_unsigned_type view_unsigned_type ; + typedef typename Functor::view_int_type view_int_type ; + + int bit_count_lg2 = 1 ; + + while ( ( 1 << bit_count_lg2 ) < bit_count ) ++bit_count_lg2 ; + + bit_count = 1 << bit_count_lg2 ; + + const int buffer_length = + Kokkos::Impl::concurrent_bitset::buffer_bound_lg2(bit_count_lg2); + + view_unsigned_type bitset("bitset",buffer_length); + + // Try to acquire more than available: + + const size_t n = ( bit_count * 3 ) / 2 ; + + view_int_type acquired("acquired", n ); + + typename view_unsigned_type::HostMirror bitset_host = + Kokkos::create_mirror_view( bitset ); + + Kokkos::deep_copy( bitset , 0u ); + + long total = 0 ; + long total_release = 0 ; + long total_reacquire = 0 ; + + Kokkos::parallel_reduce + ( Kokkos::RangePolicy< DeviceType , typename Functor::TagAcquire >(0,n) + , Functor( bit_count_lg2 , bitset , acquired ) + , total ); + + ASSERT_EQ( bit_count , total ); + + Kokkos::parallel_reduce + ( Kokkos::RangePolicy< DeviceType , typename Functor::TagRelease >(0,n) + , Functor( bit_count_lg2 , bitset , acquired ) + , total_release ); + + Kokkos::parallel_reduce + ( Kokkos::RangePolicy< DeviceType , typename Functor::TagReacquire >(0,n) + , Functor( bit_count_lg2 , bitset , acquired ) + , total_reacquire ); + + ASSERT_EQ( total_release , total_reacquire ); + +} + +} // namespace Test + +#endif /* #ifndef TEST_CONCURRENTBITSET_HPP */ diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp deleted file mode 100644 index 5d3665b905434d1310dc51e430940b17690baac1..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp +++ /dev/null @@ -1,237 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <gtest/gtest.h> - -#include <Kokkos_Core.hpp> - -#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) - -#include <TestAtomic.hpp> -#include <TestViewAPI.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestTeam.hpp> -#include <TestAggregate.hpp> -#include <TestCompilerMacros.hpp> -#include <TestCXX11.hpp> -#include <TestTeamVector.hpp> -#include <TestUtilities.hpp> - -namespace Test { - -class defaultdevicetype : public ::testing::Test { -protected: - static void SetUpTestCase() - { - Kokkos::initialize(); - } - - static void TearDownTestCase() - { - Kokkos::finalize(); - } -}; - -TEST_F( defaultdevicetype, test_utilities ) -{ - test_utilities(); -} - -TEST_F( defaultdevicetype, long_reduce ) -{ - TestReduce< long, Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, double_reduce ) -{ - TestReduce< double, Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, long_reduce_dynamic ) -{ - TestReduceDynamic< long, Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, double_reduce_dynamic ) -{ - TestReduceDynamic< double, Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, long_reduce_dynamic_view ) -{ - TestReduceDynamicView< long, Kokkos::DefaultExecutionSpace >( 100000 ); -} - -TEST_F( defaultdevicetype, atomics ) -{ - const int loop_count = 1e4; - - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::DefaultExecutionSpace >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::DefaultExecutionSpace >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::DefaultExecutionSpace >( 100, 3 ) ) ); -} - -/*TEST_F( defaultdevicetype, view_remap ) -{ - enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3], - Kokkos::LayoutRight, - Kokkos::DefaultExecutionSpace > output_type; - - typedef Kokkos::View< int**[N2][N3], - Kokkos::LayoutLeft, - Kokkos::DefaultExecutionSpace > input_type; - - typedef Kokkos::View< int*[N0][N2][N3], - Kokkos::LayoutLeft, - Kokkos::DefaultExecutionSpace > diff_type; - - output_type output( "output", N0 ); - input_type input ( "input", N0, N1 ); - diff_type diff ( "diff", N0 ); - - int value = 0; - for ( size_t i3 = 0; i3 < N3; ++i3 ) { - for ( size_t i2 = 0; i2 < N2; ++i2 ) { - for ( size_t i1 = 0; i1 < N1; ++i1 ) { - for ( size_t i0 = 0; i0 < N0; ++i0 ) { - input( i0, i1, i2, i3 ) = ++value; - } - } - } - } - - // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. - Kokkos::deep_copy( output, input ); - - value = 0; - for ( size_t i3 = 0; i3 < N3; ++i3 ) { - for ( size_t i2 = 0; i2 < N2; ++i2 ) { - for ( size_t i1 = 0; i1 < N1; ++i1 ) { - for ( size_t i0 = 0; i0 < N0; ++i0 ) { - ++value; - ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); - } - } - } - } -}*/ - -TEST_F( defaultdevicetype, view_aggregate ) -{ - TestViewAggregate< Kokkos::DefaultExecutionSpace >(); -} - -TEST_F( defaultdevicetype, scan ) -{ - TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1, 1000 ); - TestScan< Kokkos::DefaultExecutionSpace >( 1000000 ); - TestScan< Kokkos::DefaultExecutionSpace >( 10000000 ); - Kokkos::DefaultExecutionSpace::fence(); -} - -TEST_F( defaultdevicetype, compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::DefaultExecutionSpace >() ) ); -} - -TEST_F( defaultdevicetype, cxx11 ) -{ - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 1 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 2 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 3 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 4 ) ) ); -} - -#if !defined(KOKKOS_CUDA_CLANG_WORKAROUND) && !defined(KOKKOS_ARCH_PASCAL) -TEST_F( defaultdevicetype, team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 0 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 1 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 2 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 3 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 4 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 5 ) ) ); -} -#endif - -TEST_F( defaultdevicetype, malloc ) -{ - int* data = (int*) Kokkos::kokkos_malloc( 100 * sizeof( int ) ); - ASSERT_NO_THROW( data = (int*) Kokkos::kokkos_realloc( data, 120 * sizeof( int ) ) ); - Kokkos::kokkos_free( data ); - - int* data2 = (int*) Kokkos::kokkos_malloc( 0 ); - ASSERT_TRUE( data2 == NULL ); - Kokkos::kokkos_free( data2 ); -} - -} // namespace Test - -#endif diff --git a/lib/kokkos/core/unit_test/TestFunctorAnalysis.hpp b/lib/kokkos/core/unit_test/TestFunctorAnalysis.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5bd196d924d30568dd39affdb13efa696d861118 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestFunctorAnalysis.hpp @@ -0,0 +1,153 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef TEST_FUNCTOR_ANALYSIS_HPP +#define TEST_FUNCTOR_ANALYSIS_HPP + +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> + +/*--------------------------------------------------------------------------*/ + +namespace Test { + +struct TestFunctorAnalysis_03 { + + struct value_type { double x[2]; }; + + KOKKOS_INLINE_FUNCTION + void operator()( int , value_type & ) const {} + + KOKKOS_INLINE_FUNCTION + void join( value_type volatile & , value_type const volatile & ) const {} + + KOKKOS_INLINE_FUNCTION static + void init( value_type & ) {} +}; + + +template< class ExecSpace > +void test_functor_analysis() +{ + //------------------------------ + auto c01 = KOKKOS_LAMBDA(int){} ; + typedef Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::FOR + , Kokkos::RangePolicy< ExecSpace > + , decltype( c01 ) > + A01 ; + + typedef typename A01::template Reducer< typename ExecSpace::memory_space > + R01 ; + + static_assert( std::is_same< typename A01::value_type , void >::value , "" ); + static_assert( std::is_same< typename A01::pointer_type , void >::value , "" ); + static_assert( std::is_same< typename A01::reference_type , void >::value , "" ); + static_assert( std::is_same< typename R01::functor_type , decltype(c01) >::value , "" ); + + static_assert( ! A01::has_join_member_function , "" ); + static_assert( ! A01::has_init_member_function , "" ); + static_assert( ! A01::has_final_member_function , "" ); + static_assert( A01::StaticValueSize == 0 , "" ); + ASSERT_EQ( R01( & c01 ).length() , 0 ); + + //------------------------------ + auto c02 = KOKKOS_LAMBDA(int,double&){} ; + typedef Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::REDUCE + , Kokkos::RangePolicy< ExecSpace > + , decltype( c02 ) > + A02 ; + typedef typename A02::template Reducer< typename ExecSpace::memory_space > + R02 ; + + static_assert( std::is_same< typename A02::value_type , double >::value , "" ); + static_assert( std::is_same< typename A02::pointer_type , double * >::value , "" ); + static_assert( std::is_same< typename A02::reference_type , double & >::value , "" ); + static_assert( std::is_same< typename R02::functor_type , decltype(c02) >::value , "" ); + + static_assert( ! A02::has_join_member_function , "" ); + static_assert( ! A02::has_init_member_function , "" ); + static_assert( ! A02::has_final_member_function , "" ); + static_assert( A02::StaticValueSize == sizeof(double) , "" ); + ASSERT_EQ( R02( & c02 ).length() , 1 ); + + //------------------------------ + + TestFunctorAnalysis_03 c03 ; + typedef Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::REDUCE + , Kokkos::RangePolicy< ExecSpace > + , TestFunctorAnalysis_03 > + A03 ; + typedef typename A03::template Reducer< typename ExecSpace::memory_space > + R03 ; + + static_assert( std::is_same< typename A03::value_type , TestFunctorAnalysis_03::value_type >::value , "" ); + static_assert( std::is_same< typename A03::pointer_type , TestFunctorAnalysis_03::value_type * >::value , "" ); + static_assert( std::is_same< typename A03::reference_type , TestFunctorAnalysis_03::value_type & >::value , "" ); + static_assert( std::is_same< typename R03::functor_type , TestFunctorAnalysis_03 >::value , "" ); + + static_assert( A03::has_join_member_function , "" ); + static_assert( A03::has_init_member_function , "" ); + static_assert( ! A03::has_final_member_function , "" ); + static_assert( A03::StaticValueSize == sizeof(TestFunctorAnalysis_03::value_type) , "" ); + ASSERT_EQ( R03( & c03 ).length() , 1 ); + + //------------------------------ + + + +} + +TEST_F( TEST_CATEGORY , functor_analysis ) +{ + test_functor_analysis< TEST_EXECSPACE >(); +} + +} + +/*--------------------------------------------------------------------------*/ + +#endif /* #ifndef TEST_FUNCTOR_ANALYSIS_HPP */ + diff --git a/lib/kokkos/core/unit_test/TestInit.hpp b/lib/kokkos/core/unit_test/TestInit.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0b4246a2bfff24a5bf1c3523d959ee3e71f6f9c8 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestInit.hpp @@ -0,0 +1,76 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cstdio> +#include <stdexcept> +#include <sstream> +#include <iostream> + +#include <Kokkos_Core.hpp> + +namespace Test { +TEST_F( TEST_CATEGORY, init ) +{ + ; +} + +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA + +template<class ExecSpace> +void test_dispatch () { + const int repeat = 100; + for ( int i = 0; i < repeat; ++i ) { + for ( int j = 0; j < repeat; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< TEST_EXECSPACE >( 0, j ) + , KOKKOS_LAMBDA( int ) {} ); + } + } +} + +TEST_F( TEST_CATEGORY, dispatch ) +{ + test_dispatch<TEST_EXECSPACE>(); +} +#endif + +} diff --git a/lib/kokkos/core/unit_test/TestMDRange.hpp b/lib/kokkos/core/unit_test/TestMDRange.hpp index 1dc349cc1268e680aabc0859a771c7a786a388de..091591bcbf5b5f260bb79c49d342b58cfc03f03b 100644 --- a/lib/kokkos/core/unit_test/TestMDRange.hpp +++ b/lib/kokkos/core/unit_test/TestMDRange.hpp @@ -41,7 +41,7 @@ //@HEADER */ -#include <stdio.h> +#include <cstdio> #include <gtest/gtest.h> @@ -1718,4 +1718,19 @@ struct TestMDRange_6D { } // namespace +TEST_F( TEST_CATEGORY , mdrange_for ) { + TestMDRange_2D< TEST_EXECSPACE >::test_for2( 100, 100 ); + TestMDRange_3D< TEST_EXECSPACE >::test_for3( 100, 10, 100 ); + TestMDRange_4D< TEST_EXECSPACE >::test_for4( 100, 10, 10, 10 ); + TestMDRange_5D< TEST_EXECSPACE >::test_for5( 100, 10, 10, 10, 5 ); + TestMDRange_6D< TEST_EXECSPACE >::test_for6( 10, 10, 10, 10, 5, 5 ); +} + +#ifndef KOKKOS_ENABLE_CUDA +TEST_F( TEST_CATEGORY , mdrange_reduce ) { + TestMDRange_2D< TEST_EXECSPACE >::test_reduce2( 100, 100 ); + TestMDRange_3D< TEST_EXECSPACE >::test_reduce3( 100, 10, 100 ); +} +#endif + } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestMemoryPool.hpp b/lib/kokkos/core/unit_test/TestMemoryPool.hpp index 925f0e35ed6d12d3a822daa63421827fe636c86c..941cd6c26d4a5ada4dd3ac5b583033cd099e4d8f 100644 --- a/lib/kokkos/core/unit_test/TestMemoryPool.hpp +++ b/lib/kokkos/core/unit_test/TestMemoryPool.hpp @@ -45,776 +45,273 @@ #ifndef KOKKOS_UNITTEST_MEMPOOL_HPP #define KOKKOS_UNITTEST_MEMPOOL_HPP -#include <stdio.h> +#include <cstdio> #include <iostream> #include <cmath> #include <algorithm> #include <impl/Kokkos_Timer.hpp> -//#define TESTMEMORYPOOL_PRINT -//#define TESTMEMORYPOOL_PRINT_STATUS - -#define STRIDE 1 -#ifdef KOKKOS_ENABLE_CUDA -#define STRIDE_ALLOC 32 -#else -#define STRIDE_ALLOC 1 -#endif - namespace TestMemoryPool { -struct pointer_obj { - uint64_t * ptr; - - KOKKOS_INLINE_FUNCTION - pointer_obj() : ptr( 0 ) {} -}; - -struct pointer_obj2 { - void * ptr; - size_t size; - - KOKKOS_INLINE_FUNCTION - pointer_obj2() : ptr( 0 ), size( 0 ) {} -}; +template< typename MemSpace = Kokkos::HostSpace > +void test_host_memory_pool_stats() +{ + typedef typename MemSpace::execution_space Space ; + typedef typename Kokkos::MemoryPool< Space > MemPool ; -template < typename PointerView, typename Allocator > -struct allocate_memory { - typedef typename PointerView::execution_space execution_space; - typedef typename execution_space::size_type size_type; + const size_t MemoryCapacity = 32000 ; + const size_t MinBlockSize = 64 ; + const size_t MaxBlockSize = 1024 ; + const size_t SuperBlockSize = 4096 ; - PointerView m_pointers; - size_t m_chunk_size; - Allocator m_mempool; + MemPool pool( MemSpace() + , MemoryCapacity + , MinBlockSize + , MaxBlockSize + , SuperBlockSize + ); - allocate_memory( PointerView & ptrs, size_t num_ptrs, - size_t cs, Allocator & m ) - : m_pointers( ptrs ), m_chunk_size( cs ), m_mempool( m ) { - // Initialize the view with the out degree of each vertex. - Kokkos::parallel_for( num_ptrs * STRIDE_ALLOC, *this ); - } + typename MemPool::usage_statistics stats ; - KOKKOS_INLINE_FUNCTION - void operator()( size_type i ) const - { - if ( i % STRIDE_ALLOC == 0 ) { - m_pointers[i / STRIDE_ALLOC].ptr = - static_cast< uint64_t * >( m_mempool.allocate( m_chunk_size ) ); - } - } -}; - -template < typename PointerView > -struct count_invalid_memory { - typedef typename PointerView::execution_space execution_space; - typedef typename execution_space::size_type size_type; - typedef uint64_t value_type; - - PointerView m_pointers; - uint64_t & m_result; + pool.get_usage_statistics( stats ); - count_invalid_memory( PointerView & ptrs, size_t num_ptrs, uint64_t & res ) - : m_pointers( ptrs ), m_result( res ) - { - // Initialize the view with the out degree of each vertex. - Kokkos::parallel_reduce( num_ptrs * STRIDE, *this, m_result ); + ASSERT_LE( MemoryCapacity , stats.capacity_bytes ); + ASSERT_LE( MinBlockSize , stats.min_block_bytes ); + ASSERT_LE( MaxBlockSize , stats.max_block_bytes ); + ASSERT_LE( SuperBlockSize , stats.superblock_bytes ); } - KOKKOS_INLINE_FUNCTION - void init( value_type & v ) const - { v = 0; } + void * p0064 = pool.allocate(64); + void * p0128 = pool.allocate(128); + void * p0256 = pool.allocate(256); + void * p1024 = pool.allocate(1024); - KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dst, volatile value_type const & src ) const - { dst += src; } + // Aborts because exceeds max block size: + // void * p2048 = pool.allocate(2048); - KOKKOS_INLINE_FUNCTION - void operator()( size_type i, value_type & r ) const - { - if ( i % STRIDE == 0 ) { - r += ( m_pointers[i / STRIDE].ptr == 0 ); - } - } -}; + ASSERT_NE( p0064 , (void*) 0 ); + ASSERT_NE( p0128 , (void*) 0 ); + ASSERT_NE( p0256 , (void*) 0 ); + ASSERT_NE( p1024 , (void*) 0 ); -template < typename PointerView > -struct fill_memory { - typedef typename PointerView::execution_space execution_space; - typedef typename execution_space::size_type size_type; + pool.deallocate( p0064 , 64 ); + pool.deallocate( p0128 , 128 ); + pool.deallocate( p0256 , 256 ); + pool.deallocate( p1024 , 1024 ); - PointerView m_pointers; +} - fill_memory( PointerView & ptrs, size_t num_ptrs ) : m_pointers( ptrs ) - { - // Initialize the view with the out degree of each vertex. - Kokkos::parallel_for( num_ptrs * STRIDE, *this ); - } +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- - KOKKOS_INLINE_FUNCTION - void operator()( size_type i ) const - { - if ( i % STRIDE == 0 ) { - *m_pointers[i / STRIDE].ptr = i / STRIDE; - } - } -}; +template< class DeviceType > +struct TestMemoryPool_Functor { -template < typename PointerView > -struct sum_memory { - typedef typename PointerView::execution_space execution_space; - typedef typename execution_space::size_type size_type; - typedef uint64_t value_type; + typedef Kokkos::View< uintptr_t * , DeviceType > ptrs_type ; + typedef Kokkos::MemoryPool< DeviceType > pool_type ; - PointerView m_pointers; - uint64_t & m_result; + pool_type pool ; + ptrs_type ptrs ; - sum_memory( PointerView & ptrs, size_t num_ptrs, uint64_t & res ) - : m_pointers( ptrs ), m_result( res ) - { - // Initialize the view with the out degree of each vertex. - Kokkos::parallel_reduce( num_ptrs * STRIDE, *this, m_result ); - } + TestMemoryPool_Functor( const pool_type & arg_pool , size_t n ) + : pool( arg_pool ) + , ptrs( "ptrs" , n ) + {} - KOKKOS_INLINE_FUNCTION - void init( value_type & v ) const - { v = 0; } + // Specify reduction argument value_type to avoid + // confusion with tag-dispatch. - KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dst, volatile value_type const & src ) const - { dst += src; } + using value_type = long ; + + struct TagAlloc {}; KOKKOS_INLINE_FUNCTION - void operator()( size_type i, value_type & r ) const - { - if ( i % STRIDE == 0 ) { - r += *m_pointers[i / STRIDE].ptr; + void operator()( TagAlloc , int i , long & update ) const noexcept + { + unsigned alloc_size = 32 * ( 1 + ( i % 5 )); + ptrs(i) = (uintptr_t) pool.allocate( alloc_size ); + if ( ptrs(i) ) { ++update ; } } - } -}; - -template < typename PointerView, typename Allocator > -struct deallocate_memory { - typedef typename PointerView::execution_space execution_space; - typedef typename execution_space::size_type size_type; - PointerView m_pointers; - size_t m_chunk_size; - Allocator m_mempool; - - deallocate_memory( PointerView & ptrs, size_t num_ptrs, - size_t cs, Allocator & m ) - : m_pointers( ptrs ), m_chunk_size( cs ), m_mempool( m ) - { - // Initialize the view with the out degree of each vertex. - Kokkos::parallel_for( num_ptrs * STRIDE, *this ); - } + struct TagDealloc {}; KOKKOS_INLINE_FUNCTION - void operator()( size_type i ) const - { - if ( i % STRIDE == 0 ) { - m_mempool.deallocate( m_pointers[i / STRIDE].ptr, m_chunk_size ); + void operator()( TagDealloc , int i , long & update ) const noexcept + { + if ( ptrs(i) && ( 0 == i % 3 ) ) { + unsigned alloc_size = 32 * ( 1 + ( i % 5 )); + pool.deallocate( (void*) ptrs(i) , alloc_size ); + ptrs(i) = 0 ; + ++update ; + } } - } -}; -template < typename WorkView, typename PointerView, typename ScalarView, - typename Allocator > -struct allocate_deallocate_memory { - typedef typename WorkView::execution_space execution_space; - typedef typename execution_space::size_type size_type; - - WorkView m_work; - PointerView m_pointers; - ScalarView m_ptrs_front; - ScalarView m_ptrs_back; - Allocator m_mempool; - - allocate_deallocate_memory( WorkView & w, size_t work_size, PointerView & p, - ScalarView pf, ScalarView pb, Allocator & m ) - : m_work( w ), m_pointers( p ), m_ptrs_front( pf ), m_ptrs_back( pb ), - m_mempool( m ) - { - // Initialize the view with the out degree of each vertex. - Kokkos::parallel_for( work_size * STRIDE_ALLOC, *this ); - } + struct TagRealloc {}; KOKKOS_INLINE_FUNCTION - void operator()( size_type i ) const - { - if ( i % STRIDE_ALLOC == 0 ) { - unsigned my_work = m_work[i / STRIDE_ALLOC]; - - if ( ( my_work & 1 ) == 0 ) { - // Allocation. - size_t pos = Kokkos::atomic_fetch_add( &m_ptrs_back(), 1 ); - size_t alloc_size = my_work >> 1; - m_pointers[pos].ptr = m_mempool.allocate( alloc_size ); - m_pointers[pos].size = alloc_size; - } - else { - // Deallocation. - size_t pos = Kokkos::atomic_fetch_add( &m_ptrs_front(), 1 ); - m_mempool.deallocate( m_pointers[pos].ptr, m_pointers[pos].size ); + void operator()( TagRealloc , int i , long & update ) const noexcept + { + if ( 0 == ptrs(i) ) { + unsigned alloc_size = 32 * ( 1 + ( i % 5 )); + ptrs(i) = (uintptr_t) pool.allocate( alloc_size ); + if ( ptrs(i) ) { ++update ; } } } - } -}; -#define PRECISION 6 -#define SHIFTW 24 -#define SHIFTW2 12 - -template < typename F > -void print_results( const std::string & text, F elapsed_time ) -{ - std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 ) - << std::fixed << std::setprecision( PRECISION ) << elapsed_time - << std::endl; -} - -template < typename F, typename T > -void print_results( const std::string & text, unsigned long long width, - F elapsed_time, T result ) -{ - std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 ) - << std::fixed << std::setprecision( PRECISION ) << elapsed_time - << " " << std::setw( width ) << result << std::endl; -} - -template < typename F > -void print_results( const std::string & text, unsigned long long width, - F elapsed_time, const std::string & result ) -{ - std::cout << std::setw( SHIFTW ) << text << std::setw( SHIFTW2 ) - << std::fixed << std::setprecision( PRECISION ) << elapsed_time - << " " << std::setw( width ) << result << std::endl; -} - -// This test slams allocation and deallocation in a worse than real-world usage -// scenario to see how bad the thread-safety really is by having a loop where -// all threads allocate and a subsequent loop where all threads deallocate. -// All of the allocation requests are for equal-sized chunks that are the base -// chunk size of the memory pool. It also tests initialization of the memory -// pool and breaking large chunks into smaller chunks to fulfill allocation -// requests. It verifies that MemoryPool(), allocate(), and deallocate() work -// correctly. -template < class Device > -bool test_mempool( size_t chunk_size, size_t total_size ) -{ - typedef typename Device::execution_space execution_space; - typedef typename Device::memory_space memory_space; - typedef Device device_type; - typedef Kokkos::View< pointer_obj *, device_type > pointer_view; - typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space; - - uint64_t result = 0; - size_t num_chunks = total_size / chunk_size; - bool return_val = true; - - pointer_view pointers( "pointers", num_chunks ); - -#ifdef TESTMEMORYPOOL_PRINT - std::cout << "*** test_mempool() ***" << std::endl - << std::setw( SHIFTW ) << "chunk_size: " << std::setw( 12 ) - << chunk_size << std::endl - << std::setw( SHIFTW ) << "total_size: " << std::setw( 12 ) - << total_size << std::endl - << std::setw( SHIFTW ) << "num_chunks: " << std::setw( 12 ) - << num_chunks << std::endl; - - double elapsed_time = 0; - Kokkos::Timer timer; -#endif - - pool_memory_space mempool( memory_space(), total_size * 1.2, 20 ); - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "initialize mempool: ", elapsed_time ); -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif - timer.reset(); -#endif - - { - allocate_memory< pointer_view, pool_memory_space > - am( pointers, num_chunks, chunk_size, mempool ); - } - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "allocate chunks: ", elapsed_time ); -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif - timer.reset(); -#endif - - { - count_invalid_memory< pointer_view > sm( pointers, num_chunks, result ); - } - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "invalid chunks: ", 16, elapsed_time, result ); - timer.reset(); -#endif - - { - fill_memory< pointer_view > fm( pointers, num_chunks ); - } - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "fill chunks: ", elapsed_time ); - timer.reset(); -#endif - - { - sum_memory< pointer_view > sm( pointers, num_chunks, result ); - } - - execution_space::fence(); - -#ifdef TESTMEMORYPOOL_PRINT - elapsed_time = timer.seconds(); - print_results( "sum chunks: ", 16, elapsed_time, result ); -#endif - - if ( result != ( num_chunks * ( num_chunks - 1 ) ) / 2 ) { - std::cerr << "Invalid sum value in memory." << std::endl; - return_val = false; - } - -#ifdef TESTMEMORYPOOL_PRINT - timer.reset(); -#endif + struct TagMixItUp {}; - { - deallocate_memory< pointer_view, pool_memory_space > - dm( pointers, num_chunks, chunk_size, mempool ); - } - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "deallocate chunks: ", elapsed_time ); -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif - timer.reset(); -#endif - - { - allocate_memory< pointer_view, pool_memory_space > - am( pointers, num_chunks, chunk_size, mempool ); - } - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "allocate chunks: ", elapsed_time ); -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif - timer.reset(); -#endif - - { - count_invalid_memory< pointer_view > sm( pointers, num_chunks, result ); - } - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "invalid chunks: ", 16, elapsed_time, result ); - timer.reset(); -#endif - - { - fill_memory< pointer_view > fm( pointers, num_chunks ); - } - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "fill chunks: ", elapsed_time ); - timer.reset(); -#endif - - { - sum_memory< pointer_view > sm( pointers, num_chunks, result ); - } - - execution_space::fence(); - -#ifdef TESTMEMORYPOOL_PRINT - elapsed_time = timer.seconds(); - print_results( "sum chunks: ", 16, elapsed_time, result ); -#endif - - if ( result != ( num_chunks * ( num_chunks - 1 ) ) / 2 ) { - std::cerr << "Invalid sum value in memory." << std::endl; - return_val = false; - } + KOKKOS_INLINE_FUNCTION + void operator()( TagMixItUp , int i , long & update ) const noexcept + { + if ( ptrs(i) && ( 0 == i % 3 ) ) { -#ifdef TESTMEMORYPOOL_PRINT - timer.reset(); -#endif + unsigned alloc_size = 32 * ( 1 + ( i % 5 )); - { - deallocate_memory< pointer_view, pool_memory_space > - dm( pointers, num_chunks, chunk_size, mempool ); - } + pool.deallocate( (void*) ptrs(i) , alloc_size ); -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "deallocate chunks: ", elapsed_time ); -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif -#endif + ptrs(i) = (uintptr_t) pool.allocate( alloc_size ); - return return_val; -} + if ( ptrs(i) ) { ++update ; } + } + } +}; -template < typename T > -T smallest_power2_ge( T val ) +template< class PoolType > +void print_memory_pool_stats + ( typename PoolType::usage_statistics const & stats ) { - // Find the most significant nonzero bit. - int first_nonzero_bit = Kokkos::Impl::bit_scan_reverse( val ); - - // If val is an integral power of 2, ceil( log2( val ) ) is equal to the - // most significant nonzero bit. Otherwise, you need to add 1. - int lg2_size = first_nonzero_bit + - !Kokkos::Impl::is_integral_power_of_two( val ); - - return T( 1 ) << T( lg2_size ); + std::cout << "MemoryPool {" << std::endl + << " bytes capacity = " << stats.capacity_bytes << std::endl + << " bytes used = " << stats.consumed_bytes << std::endl + << " bytes reserved = " << stats.reserved_bytes << std::endl + << " bytes free = " << ( stats.capacity_bytes - + ( stats.consumed_bytes + stats.reserved_bytes ) ) << std::endl + << " alloc used = " << stats.consumed_blocks << std::endl + << " alloc reserved = " << stats.reserved_blocks << std::endl + << " super used = " << stats.consumed_superblocks << std::endl + << " super reserved = " << ( stats.capacity_superblocks - + stats.consumed_superblocks ) << std::endl + << "}" << std::endl ; } -// This test makes allocation requests for multiple sizes and interleaves -// allocation and deallocation. -// -// There are 3 phases. The first phase does only allocations to build up a -// working state for the allocator. The second phase interleaves allocations -// and deletions. The third phase does only deallocations to undo all the -// allocations from the first phase. By building first to a working state, -// allocations and deallocations can happen in any order for the second phase. -// Each phase performs on multiple chunk sizes. -template < class Device > -void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes, - size_t phase1_size, size_t phase2_size ) +template< class DeviceType > +void test_memory_pool_v2( const bool print_statistics + , const bool print_superblocks ) { -#ifdef TESTMEMORYPOOL_PRINT - typedef typename Device::execution_space execution_space; -#endif - typedef typename Device::memory_space memory_space; - typedef Device device_type; - typedef Kokkos::View< unsigned *, device_type > work_view; - typedef Kokkos::View< size_t, device_type > scalar_view; - typedef Kokkos::View< pointer_obj2 *, device_type > pointer_view; - typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space; - - enum { - MIN_CHUNK_SIZE = 64, - MIN_BASE_CHUNK_SIZE = MIN_CHUNK_SIZE / 2 + 1 - }; - - // Make sure the base chunk size is at least MIN_BASE_CHUNK_SIZE bytes, so - // all the different chunk sizes translate to different block sizes for the - // allocator. - if ( base_chunk_size < MIN_BASE_CHUNK_SIZE ) { - base_chunk_size = MIN_BASE_CHUNK_SIZE; - } - - // Get the smallest power of 2 >= the base chunk size. The size must be - // >= MIN_CHUNK_SIZE, though. - unsigned ceil_base_chunk_size = smallest_power2_ge( base_chunk_size ); - if ( ceil_base_chunk_size < MIN_CHUNK_SIZE ) { - ceil_base_chunk_size = MIN_CHUNK_SIZE; - } - - // Make sure the phase 1 size is multiples of num_chunk_sizes. - phase1_size = ( ( phase1_size + num_chunk_sizes - 1 ) / num_chunk_sizes ) * - num_chunk_sizes; - - // Make sure the phase 2 size is multiples of ( 2 * num_chunk_sizes ). - phase2_size = - ( ( phase2_size + 2 * num_chunk_sizes - 1 ) / ( 2 * num_chunk_sizes ) ) * - 2 * num_chunk_sizes; - - // The phase2 size must be <= twice the phase1 size so that deallocations - // can't happen before allocations. - if ( phase2_size > 2 * phase1_size ) phase2_size = 2 * phase1_size; + typedef typename DeviceType::memory_space memory_space ; + typedef typename DeviceType::execution_space execution_space ; + typedef Kokkos::MemoryPool< DeviceType > pool_type ; + typedef TestMemoryPool_Functor< DeviceType > functor_type ; - size_t phase3_size = phase1_size; - size_t half_phase2_size = phase2_size / 2; + typedef typename functor_type::TagAlloc TagAlloc ; + typedef typename functor_type::TagDealloc TagDealloc ; + typedef typename functor_type::TagRealloc TagRealloc ; + typedef typename functor_type::TagMixItUp TagMixItUp ; - // Each entry in the work views has the following format. The least - // significant bit indicates allocation (0) vs. deallocation (1). For - // allocation, the other bits indicate the desired allocation size. + const size_t total_alloc_size = 10000000 ; + const unsigned min_block_size = 64 ; + const unsigned max_block_size = 256 ; + const long nfill = 70000 ; - // Initialize the phase 1 work view with an equal number of allocations for - // each chunk size. - work_view phase1_work( "Phase 1 Work", phase1_size ); - typename work_view::HostMirror host_phase1_work = - create_mirror_view( phase1_work ); + for ( uint32_t k = 0 , min_superblock_size = 10000 ; + k < 3 ; ++k , min_superblock_size *= 10 ) { - size_t inner_size = phase1_size / num_chunk_sizes; - unsigned chunk_size = base_chunk_size; + typename pool_type::usage_statistics stats ; - for ( size_t i = 0; i < num_chunk_sizes; ++i ) { - for ( size_t j = 0; j < inner_size; ++j ) { - host_phase1_work[i * inner_size + j] = chunk_size << 1; - } + pool_type pool( memory_space() + , total_alloc_size + , min_block_size + , max_block_size + , min_superblock_size ); - chunk_size *= 2; - } + functor_type functor(pool,nfill); - std::random_shuffle( host_phase1_work.ptr_on_device(), - host_phase1_work.ptr_on_device() + phase1_size ); + long result = 0 ; + long ndel = 0 ; - deep_copy( phase1_work, host_phase1_work ); + Kokkos::parallel_reduce + ( Kokkos::RangePolicy< execution_space , TagAlloc >(0,nfill) + , functor + , result + ); - // Initialize the phase 2 work view with half allocations and half - // deallocations with an equal number of allocations for each chunk size. - work_view phase2_work( "Phase 2 Work", phase2_size ); - typename work_view::HostMirror host_phase2_work = - create_mirror_view( phase2_work ); + pool.get_usage_statistics( stats ); - inner_size = half_phase2_size / num_chunk_sizes; - chunk_size = base_chunk_size; + const int fill_error = ( nfill != result ) || + ( nfill != long(stats.consumed_blocks) ); - for ( size_t i = 0; i < num_chunk_sizes; ++i ) { - for ( size_t j = 0; j < inner_size; ++j ) { - host_phase2_work[i * inner_size + j] = chunk_size << 1; - } + if ( fill_error || print_statistics ) print_memory_pool_stats< pool_type >( stats ); + if ( fill_error || print_superblocks ) pool.print_state( std::cout ); - chunk_size *= 2; - } + ASSERT_EQ( nfill , result ); + ASSERT_EQ( nfill , long(stats.consumed_blocks) ); - for ( size_t i = half_phase2_size; i < phase2_size; ++i ) { - host_phase2_work[i] = 1; - } + Kokkos::parallel_reduce + ( Kokkos::RangePolicy< execution_space , TagDealloc >(0,nfill) + , functor + , ndel + ); - std::random_shuffle( host_phase2_work.ptr_on_device(), - host_phase2_work.ptr_on_device() + phase2_size ); + pool.get_usage_statistics( stats ); - deep_copy( phase2_work, host_phase2_work ); + const int del_error = ( nfill - ndel ) != long(stats.consumed_blocks); - // Initialize the phase 3 work view with all deallocations. - work_view phase3_work( "Phase 3 Work", phase3_size ); - typename work_view::HostMirror host_phase3_work = - create_mirror_view( phase3_work ); + if ( del_error || print_statistics ) print_memory_pool_stats< pool_type >( stats ); + if ( del_error || print_superblocks ) pool.print_state( std::cout ); - inner_size = phase3_size / num_chunk_sizes; + ASSERT_EQ( ( nfill - ndel ) , long(stats.consumed_blocks) ); - for ( size_t i = 0; i < phase3_size; ++i ) host_phase3_work[i] = 1; + Kokkos::parallel_reduce + ( Kokkos::RangePolicy< execution_space , TagRealloc >(0,nfill) + , functor + , result + ); - deep_copy( phase3_work, host_phase3_work ); + pool.get_usage_statistics( stats ); - // Calculate the amount of memory needed for the allocator. We need to know - // the number of superblocks required for each chunk size and use that to - // calculate the amount of memory for each chunk size. - size_t lg_sb_size = 18; - size_t sb_size = 1 << lg_sb_size; - size_t total_size = 0; - size_t allocs_per_size = phase1_size / num_chunk_sizes + - half_phase2_size / num_chunk_sizes; + const int refill_error = ( ndel != result ) || + ( nfill != long(stats.consumed_blocks) ); - chunk_size = ceil_base_chunk_size; - for ( size_t i = 0; i < num_chunk_sizes; ++i ) { - size_t my_size = allocs_per_size * chunk_size; - total_size += ( my_size + sb_size - 1 ) / sb_size * sb_size; - chunk_size *= 2; - } + if ( refill_error || print_statistics ) print_memory_pool_stats< pool_type >( stats ); + if ( refill_error || print_superblocks ) pool.print_state( std::cout ); - // Declare the queue to hold the records for allocated memory. An allocation - // adds a record to the back of the queue, and a deallocation removes a - // record from the front of the queue. - size_t num_allocations = phase1_size + half_phase2_size; - scalar_view ptrs_front( "Pointers front" ); - scalar_view ptrs_back( "Pointers back" ); - - pointer_view pointers( "pointers", num_allocations ); - -#ifdef TESTMEMORYPOOL_PRINT - printf( "\n*** test_mempool2() ***\n" ); - printf( " num_chunk_sizes: %12zu\n", num_chunk_sizes ); - printf( " base_chunk_size: %12u\n", base_chunk_size ); - printf( " ceil_base_chunk_size: %12u\n", ceil_base_chunk_size ); - printf( " phase1_size: %12zu\n", phase1_size ); - printf( " phase2_size: %12zu\n", phase2_size ); - printf( " phase3_size: %12zu\n", phase3_size ); - printf( " allocs_per_size: %12zu\n", allocs_per_size ); - printf( " num_allocations: %12zu\n", num_allocations ); - printf( " total_size: %12zu\n", total_size ); - fflush( stdout ); - - double elapsed_time = 0; - Kokkos::Timer timer; -#endif + ASSERT_EQ( ndel , result ); + ASSERT_EQ( nfill , long(stats.consumed_blocks) ); - pool_memory_space mempool( memory_space(), total_size * 1.2, lg_sb_size ); + Kokkos::parallel_reduce + ( Kokkos::RangePolicy< execution_space , TagMixItUp >(0,nfill) + , functor + , result + ); -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "initialize mempool: ", elapsed_time ); + pool.get_usage_statistics( stats ); -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif + const int mix_error = ( ndel != result ) || + ( nfill != long(stats.consumed_blocks) ); - timer.reset(); -#endif + if ( mix_error || print_statistics ) print_memory_pool_stats< pool_type >( stats ); + if ( mix_error || print_superblocks ) pool.print_state( std::cout ); - { - allocate_deallocate_memory< work_view, pointer_view, scalar_view, - pool_memory_space > - adm( phase1_work, phase1_size, pointers, ptrs_front, ptrs_back, mempool ); + ASSERT_EQ( ndel , result ); + ASSERT_EQ( nfill , long(stats.consumed_blocks) ); } - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "phase1: ", elapsed_time ); - -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif - - timer.reset(); -#endif - - { - allocate_deallocate_memory< work_view, pointer_view, scalar_view, - pool_memory_space > - adm( phase2_work, phase2_size, pointers, ptrs_front, ptrs_back, mempool ); - } - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "phase2: ", elapsed_time ); - -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif - - timer.reset(); -#endif - - { - allocate_deallocate_memory< work_view, pointer_view, scalar_view, - pool_memory_space > - adm( phase3_work, phase3_size, pointers, ptrs_front, ptrs_back, mempool ); - } - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "phase3: ", elapsed_time ); -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif -#endif } -// Tests for correct behavior when the allocator is out of memory. -template < class Device > -void test_memory_exhaustion() -{ -#ifdef TESTMEMORYPOOL_PRINT - typedef typename Device::execution_space execution_space; -#endif - typedef typename Device::memory_space memory_space; - typedef Device device_type; - typedef Kokkos::View< pointer_obj *, device_type > pointer_view; - typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space; - - // The allocator will have a single superblock, and allocations will all be - // of the same chunk size. The allocation loop will attempt to allocate - // twice the number of chunks as are available in the allocator. The - // deallocation loop will only free the successfully allocated chunks. - - size_t chunk_size = 128; - size_t num_chunks = 128; - size_t half_num_chunks = num_chunks / 2; - size_t superblock_size = chunk_size * half_num_chunks; - size_t lg_superblock_size = - Kokkos::Impl::integral_power_of_two( superblock_size ); - - pointer_view pointers( "pointers", num_chunks ); - -#ifdef TESTMEMORYPOOL_PRINT - std::cout << "\n*** test_memory_exhaustion() ***" << std::endl; - - double elapsed_time = 0; - Kokkos::Timer timer; -#endif - - pool_memory_space mempool( memory_space(), superblock_size, - lg_superblock_size ); - -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "initialize mempool: ", elapsed_time ); -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif - timer.reset(); -#endif - - { - allocate_memory< pointer_view, pool_memory_space > - am( pointers, num_chunks, chunk_size, mempool ); - } +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "allocate chunks: ", elapsed_time ); -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif - timer.reset(); -#endif +} // namespace TestMemoryPool { - { - // In parallel, the allocations that succeeded were not put contiguously - // into the pointers View. The whole View can still be looped over and - // have deallocate called because deallocate will just do nothing for NULL - // pointers. - deallocate_memory< pointer_view, pool_memory_space > - dm( pointers, num_chunks, chunk_size, mempool ); - } +namespace Test { -#ifdef TESTMEMORYPOOL_PRINT - execution_space::fence(); - elapsed_time = timer.seconds(); - print_results( "deallocate chunks: ", elapsed_time ); -#ifdef TESTMEMORYPOOL_PRINT_STATUS - mempool.print_status(); -#endif -#endif +TEST_F( TEST_CATEGORY, memory_pool ) +{ + TestMemoryPool::test_host_memory_pool_stats<>(); + TestMemoryPool::test_memory_pool_v2< TEST_EXECSPACE >(false,false); } - } -#undef TESTMEMORYPOOL_PRINT -#undef TESTMEMORYPOOL_PRINT_STATUS -#undef STRIDE -#undef STRIDE_ALLOC - #endif + diff --git a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp index 6f2ca6a61c34b84f96cefd1195a6a11e2a6d32d1..9d5fc6464154dfd948dee9181202556585164bd2 100644 --- a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp +++ b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp @@ -48,6 +48,7 @@ #include <sstream> #include <iostream> +namespace Test { struct SomeTag {}; template< class ExecutionSpace > @@ -526,3 +527,11 @@ private: test_run_time_parameters_type< Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long>, ExecutionSpace, SomeTag > >(); } }; + +TEST_F( TEST_CATEGORY, policy_construction ) +{ + TestRangePolicyConstruction< TEST_EXECSPACE >(); + TestTeamPolicyConstruction< TEST_EXECSPACE >(); +} + +} diff --git a/lib/kokkos/core/unit_test/TestRange.hpp b/lib/kokkos/core/unit_test/TestRange.hpp index 90411a57a0c9c871f946dd3a8b04b4af0554b380..f55574761b89170f3d6068bb649bda3944d4aed3 100644 --- a/lib/kokkos/core/unit_test/TestRange.hpp +++ b/lib/kokkos/core/unit_test/TestRange.hpp @@ -41,7 +41,7 @@ //@HEADER */ -#include <stdio.h> +#include <cstdio> #include <Kokkos_Core.hpp> @@ -61,37 +61,37 @@ struct TestRange { struct ResetTag {}; struct VerifyResetTag {}; - TestRange( const size_t N ) - : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), N ) + int N; + TestRange( const size_t N_ ) + : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), N_ ), N(N_) {} - static void test_for( const size_t N ) + void test_for() { - TestRange functor( N ); - typename view_type::HostMirror host_flags = Kokkos::create_mirror_view( functor.m_flags ); + typename view_type::HostMirror host_flags = Kokkos::create_mirror_view( m_flags ); - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor ); - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyInitTag >( 0, N ), functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), *this ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyInitTag >( 0, N ), *this ); - Kokkos::deep_copy( host_flags, functor.m_flags ); + Kokkos::deep_copy( host_flags, m_flags ); - size_t error_count = 0; - for ( size_t i = 0; i < N; ++i ) { + int error_count = 0; + for ( int i = 0; i < N; ++i ) { if ( int( i ) != host_flags( i ) ) ++error_count; } - ASSERT_EQ( error_count, size_t( 0 ) ); + ASSERT_EQ( error_count, int( 0 ) ); - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, ResetTag >( 0, N ), functor ); - Kokkos::parallel_for( std::string( "TestKernelFor" ), Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyResetTag >( 0, N ), functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, ResetTag >( 0, N ), *this ); + Kokkos::parallel_for( std::string( "TestKernelFor" ), Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyResetTag >( 0, N ), *this ); - Kokkos::deep_copy( host_flags, functor.m_flags ); + Kokkos::deep_copy( host_flags, m_flags ); error_count = 0; - for ( size_t i = 0; i < N; ++i ) { + for ( int i = 0; i < N; ++i ) { if ( int( 2 * i ) != host_flags( i ) ) ++error_count; } - ASSERT_EQ( error_count, size_t( 0 ) ); + ASSERT_EQ( error_count, int( 0 ) ); } KOKKOS_INLINE_FUNCTION @@ -123,18 +123,17 @@ struct TestRange { struct OffsetTag {}; - static void test_reduce( const size_t N ) + void test_reduce( ) { - TestRange functor( N ); int total = 0; - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), *this ); - Kokkos::parallel_reduce( "TestKernelReduce", Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor, total ); + Kokkos::parallel_reduce( "TestKernelReduce", Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), *this, total ); // sum( 0 .. N-1 ) ASSERT_EQ( size_t( ( N - 1 ) * ( N ) / 2 ), size_t( total ) ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), functor, total ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), *this, total ); // sum( 1 .. N ) ASSERT_EQ( size_t( ( N ) * ( N + 1 ) / 2 ), size_t( total ) ); } @@ -149,13 +148,12 @@ struct TestRange { //---------------------------------------- - static void test_scan( const size_t N ) + void test_scan( ) { - TestRange functor( N ); - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), *this ); - Kokkos::parallel_scan( "TestKernelScan", Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), functor ); + Kokkos::parallel_scan( "TestKernelScan", Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), *this ); } KOKKOS_INLINE_FUNCTION @@ -170,15 +168,17 @@ struct TestRange { } } - static void test_dynamic_policy( const size_t N ) + void test_dynamic_policy() { - typedef Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; +#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + #if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) + typedef Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; { Kokkos::View< size_t*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > count( "Count", ExecSpace::concurrency() ); Kokkos::View< int*, ExecSpace > a( "A", N ); - Kokkos::parallel_for( policy_t( 0, N ), KOKKOS_LAMBDA ( const typename policy_t::member_type& i ) { + Kokkos::parallel_for( policy_t( 0, N ), KOKKOS_LAMBDA ( const int& i ) { for ( int k = 0; k < ( i < N / 2 ? 1 : 10000 ); k++ ) { a( i )++; } @@ -186,12 +186,12 @@ struct TestRange { }); int error = 0; - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), KOKKOS_LAMBDA( const typename policy_t::member_type & i, int & lsum ) { + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), KOKKOS_LAMBDA( const int & i, int & lsum ) { lsum += ( a( i ) != ( i < N / 2 ? 1 : 10000 ) ); }, error ); ASSERT_EQ( error, 0 ); - if ( ( ExecSpace::concurrency() > (int) 1 ) && ( N > static_cast<size_t>( 4 * ExecSpace::concurrency() ) ) ) { + if ( ( ExecSpace::concurrency() > (int) 1 ) && ( N > static_cast<int>( 4 * ExecSpace::concurrency() ) ) ) { size_t min = N; size_t max = 0; for ( int t = 0; t < ExecSpace::concurrency(); t++ ) { @@ -211,7 +211,7 @@ struct TestRange { Kokkos::View< int*, ExecSpace> a( "A", N ); int sum = 0; - Kokkos::parallel_reduce( policy_t( 0, N ), KOKKOS_LAMBDA( const typename policy_t::member_type & i, int & lsum ) { + Kokkos::parallel_reduce( policy_t( 0, N ), KOKKOS_LAMBDA( const int & i, int & lsum ) { for ( int k = 0; k < ( i < N / 2 ? 1 : 10000 ); k++ ) { a( i )++; } @@ -221,12 +221,12 @@ struct TestRange { ASSERT_EQ( sum, N ); int error = 0; - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), KOKKOS_LAMBDA( const typename policy_t::member_type & i, int & lsum ) { + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), KOKKOS_LAMBDA( const int & i, int & lsum ) { lsum += ( a( i ) != ( i < N / 2 ? 1 : 10000 ) ); }, error ); ASSERT_EQ( error, 0 ); - if ( ( ExecSpace::concurrency() > (int) 1 ) && ( N > static_cast<size_t>( 4 * ExecSpace::concurrency() ) ) ) { + if ( ( ExecSpace::concurrency() > (int) 1 ) && ( N > static_cast<int>( 4 * ExecSpace::concurrency() ) ) ) { size_t min = N; size_t max = 0; for ( int t = 0; t < ExecSpace::concurrency(); t++ ) { @@ -240,9 +240,57 @@ struct TestRange { //} } } +#endif +#endif } }; } // namespace +TEST_F( TEST_CATEGORY, range_for ) +{ + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(0); f.test_for(); } + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(0); f.test_for(); } + + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(2); f.test_for(); } + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(3); f.test_for(); } + + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(1000); f.test_for(); } + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(1001); f.test_for(); } +} + +TEST_F( TEST_CATEGORY, range_reduce ) +{ + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(0); f.test_reduce(); } + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(0); f.test_reduce(); } + + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(2); f.test_reduce(); } + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(3); f.test_reduce(); } + + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(1000); f.test_reduce(); } + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(1001); f.test_reduce(); } +} + +#ifndef KOKKOS_ENABLE_OPENMPTARGET +TEST_F( TEST_CATEGORY, range_scan ) +{ + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(0); f.test_scan(); } + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(0); f.test_scan(); } +#ifndef KOKKOS_ENABLE_CUDA + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(0); f.test_dynamic_policy(); } +#endif + + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(2); f.test_scan(); } + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(3); f.test_scan(); } +#ifndef KOKKOS_ENABLE_CUDA + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(3); f.test_dynamic_policy(); } +#endif + + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >f(1000); f.test_scan(); } + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(1001); f.test_scan(); } +#ifndef KOKKOS_ENABLE_CUDA + { TestRange< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >f(1001); f.test_dynamic_policy(); } +#endif +} +#endif } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestReduce.hpp b/lib/kokkos/core/unit_test/TestReduce.hpp index 7e77dadf6249fe3eaa763c0c9848b93965379e7e..86982e6a55fb3db5658d3c3d667564b54656039c 100644 --- a/lib/kokkos/core/unit_test/TestReduce.hpp +++ b/lib/kokkos/core/unit_test/TestReduce.hpp @@ -470,640 +470,11 @@ public: } // namespace -// Computes y^T*A*x -// ( modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) - -#if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) - -template< typename ScalarType, class DeviceType > -class TestTripleNestedReduce -{ -public: - typedef DeviceType execution_space; - typedef typename execution_space::size_type size_type; - - TestTripleNestedReduce( const size_type & nrows, const size_type & ncols - , const size_type & team_size, const size_type & vector_length ) - { - run_test( nrows, ncols, team_size, vector_length ); - } - - void run_test( const size_type & nrows, const size_type & ncols - , const size_type & team_size, const size_type & vector_length ) - { - //typedef Kokkos::LayoutLeft Layout; - typedef Kokkos::LayoutRight Layout; - - typedef Kokkos::View< ScalarType*, DeviceType > ViewVector; - typedef Kokkos::View< ScalarType**, Layout, DeviceType > ViewMatrix; - - ViewVector y( "y", nrows ); - ViewVector x( "x", ncols ); - ViewMatrix A( "A", nrows, ncols ); - - typedef Kokkos::RangePolicy<DeviceType> range_policy; - - // Initialize y vector. - Kokkos::parallel_for( range_policy( 0, nrows ), KOKKOS_LAMBDA ( const int i ) { y( i ) = 1; } ); - - // Initialize x vector. - Kokkos::parallel_for( range_policy( 0, ncols ), KOKKOS_LAMBDA ( const int i ) { x( i ) = 1; } ); - - typedef Kokkos::TeamPolicy< DeviceType > team_policy; - typedef typename Kokkos::TeamPolicy< DeviceType >::member_type member_type; - - // Initialize A matrix, note 2D indexing computation. - Kokkos::parallel_for( team_policy( nrows, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type & teamMember ) { - const int j = teamMember.league_rank(); - Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, ncols ), [&] ( const int i ) { - A( j, i ) = 1; - } ); - } ); - - // Three level parallelism kernel to force caching of vector x. - ScalarType result = 0.0; - int chunk_size = 128; - Kokkos::parallel_reduce( team_policy( nrows / chunk_size, team_size, vector_length ), - KOKKOS_LAMBDA ( const member_type & teamMember, double & update ) { - const int row_start = teamMember.league_rank() * chunk_size; - const int row_end = row_start + chunk_size; - Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, row_start, row_end ), [&] ( const int i ) { - ScalarType sum_i = 0.0; - Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( teamMember, ncols ), [&] ( const int j, ScalarType &innerUpdate ) { - innerUpdate += A( i, j ) * x( j ); - }, sum_i ); - Kokkos::single( Kokkos::PerThread( teamMember ), [&] () { - update += y( i ) * sum_i; - } ); - } ); - }, result ); - - const ScalarType solution = (ScalarType) nrows * (ScalarType) ncols; - ASSERT_EQ( solution, result ); - } -}; - -#else // #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) - -template< typename ScalarType, class DeviceType > -class TestTripleNestedReduce -{ -public: - typedef DeviceType execution_space; - typedef typename execution_space::size_type size_type; - - TestTripleNestedReduce( const size_type &, const size_type - , const size_type &, const size_type ) - {} -}; - -#endif //-------------------------------------------------------------------------- namespace Test { -namespace ReduceCombinatorical { - -template< class Scalar, class Space = Kokkos::HostSpace > -struct AddPlus { -public: - // Required. - typedef AddPlus reducer_type; - typedef Scalar value_type; - - typedef Kokkos::View< value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; - -private: - result_view_type result; - -public: - AddPlus( value_type & result_ ) : result( &result_ ) {} - - // Required. - KOKKOS_INLINE_FUNCTION - void join( value_type & dest, const value_type & src ) const { - dest += src + 1; - } - - KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dest, const volatile value_type & src ) const { - dest += src + 1; - } - - // Optional. - KOKKOS_INLINE_FUNCTION - void init( value_type & val ) const { - val = value_type(); - } - - result_view_type result_view() const { - return result; - } -}; - -template< int ISTEAM > -struct FunctorScalar; - -template<> -struct FunctorScalar< 0 > { - Kokkos::View< double > result; - - FunctorScalar( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const int & i, double & update ) const { - update += i; - } -}; - -template<> -struct FunctorScalar< 1 > { - typedef Kokkos::TeamPolicy<>::member_type team_type; - - Kokkos::View< double > result; - - FunctorScalar( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const team_type & team, double & update ) const { - update += 1.0 / team.team_size() * team.league_rank(); - } -}; - -template< int ISTEAM > -struct FunctorScalarInit; - -template<> -struct FunctorScalarInit< 0 > { - Kokkos::View< double > result; - - FunctorScalarInit( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const int & i, double & update ) const { - update += i; - } - - KOKKOS_INLINE_FUNCTION - void init( double & update ) const { - update = 0.0; - } -}; - -template<> -struct FunctorScalarInit< 1 > { - typedef Kokkos::TeamPolicy<>::member_type team_type; - - Kokkos::View< double > result; - - FunctorScalarInit( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const team_type & team, double & update ) const { - update += 1.0 / team.team_size() * team.league_rank(); - } - - KOKKOS_INLINE_FUNCTION - void init( double & update ) const { - update = 0.0; - } -}; - -template< int ISTEAM > -struct FunctorScalarFinal; - -template<> -struct FunctorScalarFinal< 0 > { - Kokkos::View<double> result; - - FunctorScalarFinal( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const int & i, double & update ) const { - update += i; - } - - KOKKOS_INLINE_FUNCTION - void final( double & update ) const { - result() = update; - } -}; - -template<> -struct FunctorScalarFinal< 1 > { - typedef Kokkos::TeamPolicy<>::member_type team_type; - - Kokkos::View< double > result; - - FunctorScalarFinal( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const team_type & team, double & update ) const { - update += 1.0 / team.team_size() * team.league_rank(); - } - - KOKKOS_INLINE_FUNCTION - void final( double & update ) const { - result() = update; - } -}; - -template< int ISTEAM > -struct FunctorScalarJoin; - -template<> -struct FunctorScalarJoin< 0 > { - Kokkos::View<double> result; - - FunctorScalarJoin( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const int & i, double & update ) const { - update += i; - } - - KOKKOS_INLINE_FUNCTION - void join( volatile double & dst, const volatile double & update ) const { - dst += update; - } -}; - -template<> -struct FunctorScalarJoin< 1 > { - typedef Kokkos::TeamPolicy<>::member_type team_type; - - Kokkos::View< double > result; - - FunctorScalarJoin( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const team_type & team, double & update ) const { - update += 1.0 / team.team_size() * team.league_rank(); - } - - KOKKOS_INLINE_FUNCTION - void join( volatile double & dst, const volatile double & update ) const { - dst += update; - } -}; - -template< int ISTEAM > -struct FunctorScalarJoinFinal; - -template<> -struct FunctorScalarJoinFinal< 0 > { - Kokkos::View< double > result; - - FunctorScalarJoinFinal( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const int & i, double & update ) const { - update += i; - } - - KOKKOS_INLINE_FUNCTION - void join( volatile double & dst, const volatile double & update ) const { - dst += update; - } - - KOKKOS_INLINE_FUNCTION - void final( double & update ) const { - result() = update; - } -}; - -template<> -struct FunctorScalarJoinFinal< 1 > { - typedef Kokkos::TeamPolicy<>::member_type team_type; - - Kokkos::View< double > result; - - FunctorScalarJoinFinal( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const team_type & team, double & update ) const { - update += 1.0 / team.team_size() * team.league_rank(); - } - - KOKKOS_INLINE_FUNCTION - void join( volatile double & dst, const volatile double & update ) const { - dst += update; - } - - KOKKOS_INLINE_FUNCTION - void final( double & update ) const { - result() = update; - } -}; - -template< int ISTEAM > -struct FunctorScalarJoinInit; - -template<> -struct FunctorScalarJoinInit< 0 > { - Kokkos::View< double > result; - - FunctorScalarJoinInit( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const int & i, double & update ) const { - update += i; - } - - KOKKOS_INLINE_FUNCTION - void join( volatile double & dst, const volatile double & update ) const { - dst += update; - } - - KOKKOS_INLINE_FUNCTION - void init( double & update ) const { - update = 0.0; - } -}; - -template<> -struct FunctorScalarJoinInit< 1 > { - typedef Kokkos::TeamPolicy<>::member_type team_type; - - Kokkos::View< double > result; - - FunctorScalarJoinInit( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const team_type & team, double & update ) const { - update += 1.0 / team.team_size() * team.league_rank(); - } - - KOKKOS_INLINE_FUNCTION - void join( volatile double & dst, const volatile double & update ) const { - dst += update; - } - - KOKKOS_INLINE_FUNCTION - void init( double & update ) const { - update = 0.0; - } -}; - -template< int ISTEAM > -struct FunctorScalarJoinFinalInit; - -template<> -struct FunctorScalarJoinFinalInit< 0 > { - Kokkos::View<double> result; - - FunctorScalarJoinFinalInit( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const int & i, double & update ) const { - update += i; - } - - KOKKOS_INLINE_FUNCTION - void join( volatile double & dst, const volatile double & update ) const { - dst += update; - } - - KOKKOS_INLINE_FUNCTION - void final( double & update ) const { - result() = update; - } - - KOKKOS_INLINE_FUNCTION - void init( double & update ) const { - update = 0.0; - } -}; - -template<> -struct FunctorScalarJoinFinalInit< 1 > { - typedef Kokkos::TeamPolicy<>::member_type team_type; - - Kokkos::View< double > result; - - FunctorScalarJoinFinalInit( Kokkos::View< double > r ) : result( r ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const team_type & team, double & update ) const { - update += 1.0 / team.team_size() * team.league_rank(); - } - - KOKKOS_INLINE_FUNCTION - void join( volatile double & dst, const volatile double & update ) const { - dst += update; - } - - KOKKOS_INLINE_FUNCTION - void final( double & update ) const { - result() = update; - } - - KOKKOS_INLINE_FUNCTION - void init( double & update ) const { - update = 0.0; - } -}; - -struct Functor1 { - KOKKOS_INLINE_FUNCTION - void operator()( const int & i, double & update ) const { - update += i; - } -}; - -struct Functor2 { - typedef double value_type[]; - - const unsigned value_count; - - Functor2( unsigned n ) : value_count( n ) {} - - KOKKOS_INLINE_FUNCTION - void operator()( const unsigned & i, double update[] ) const { - for ( unsigned j = 0; j < value_count; j++ ) { - update[j] += i; - } - } - - KOKKOS_INLINE_FUNCTION - void init( double dst[] ) const - { - for ( unsigned i = 0; i < value_count; ++i ) dst[i] = 0; - } - - KOKKOS_INLINE_FUNCTION - void join( volatile double dst[], - const volatile double src[] ) const - { - for ( unsigned i = 0; i < value_count; ++i ) dst[i] += src[i]; - } -}; - -} // namespace ReduceCombinatorical - -} // namespace Test - -namespace Test { - -template< class ExecSpace = Kokkos::DefaultExecutionSpace > -struct TestReduceCombinatoricalInstantiation { - template< class ... Args > - static void CallParallelReduce( Args... args ) { - Kokkos::parallel_reduce( args... ); - } - - template< class ... Args > - static void AddReturnArgument( Args... args ) { - Kokkos::View< double, Kokkos::HostSpace > result_view( "ResultView" ); - double expected_result = 1000.0 * 999.0 / 2.0; - - double value = 0; - Kokkos::parallel_reduce( args..., value ); - ASSERT_EQ( expected_result, value ); - - result_view() = 0; - CallParallelReduce( args..., result_view ); - ASSERT_EQ( expected_result, result_view() ); - - value = 0; - CallParallelReduce( args..., Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >( &value ) ); - ASSERT_EQ( expected_result, value ); - - result_view() = 0; - const Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_const_um = result_view; - CallParallelReduce( args..., result_view_const_um ); - ASSERT_EQ( expected_result, result_view_const_um() ); - - value = 0; - CallParallelReduce( args..., Test::ReduceCombinatorical::AddPlus< double >( value ) ); - if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) && ( ExecSpace::concurrency() > 1 ) ) { - ASSERT_TRUE( expected_result < value ); - } - else if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) || ( ExecSpace::concurrency() > 1 ) ) { - ASSERT_TRUE( expected_result <= value ); - } - else { - ASSERT_EQ( expected_result, value ); - } - - value = 0; - Test::ReduceCombinatorical::AddPlus< double > add( value ); - CallParallelReduce( args..., add ); - if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) && ( ExecSpace::concurrency() > 1 ) ) { - ASSERT_TRUE( expected_result < value ); - } - else if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) || ( ExecSpace::concurrency() > 1 ) ) { - ASSERT_TRUE( expected_result <= value ); - } - else { - ASSERT_EQ( expected_result, value ); - } - } - - template< class ... Args > - static void AddLambdaRange( void*, Args... args ) { - AddReturnArgument( args..., KOKKOS_LAMBDA ( const int & i, double & lsum ) { - lsum += i; - }); - } - - template< class ... Args > - static void AddLambdaTeam( void*, Args... args ) { - AddReturnArgument( args..., KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type & team, double & update ) { - update += 1.0 / team.team_size() * team.league_rank(); - }); - } - - template< class ... Args > - static void AddLambdaRange( Kokkos::InvalidType, Args... args ) {} - - template< class ... Args > - static void AddLambdaTeam( Kokkos::InvalidType, Args... args ) {} - - template< int ISTEAM, class ... Args > - static void AddFunctor( Args... args ) { - Kokkos::View< double > result_view( "FunctorView" ); - auto h_r = Kokkos::create_mirror_view( result_view ); - Test::ReduceCombinatorical::FunctorScalar< ISTEAM > functor( result_view ); - double expected_result = 1000.0 * 999.0 / 2.0; - - AddReturnArgument( args..., functor ); - AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalar< ISTEAM >( result_view ) ); - AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarInit< ISTEAM >( result_view ) ); - AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarJoin< ISTEAM >( result_view ) ); - AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarJoinInit< ISTEAM >( result_view ) ); - - h_r() = 0; - Kokkos::deep_copy( result_view, h_r ); - CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarFinal< ISTEAM >( result_view ) ); - Kokkos::deep_copy( h_r, result_view ); - ASSERT_EQ( expected_result, h_r() ); - - h_r() = 0; - Kokkos::deep_copy( result_view, h_r ); - CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal< ISTEAM >( result_view ) ); - Kokkos::deep_copy( h_r, result_view ); - ASSERT_EQ( expected_result, h_r() ); - - h_r() = 0; - Kokkos::deep_copy( result_view, h_r ); - CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit< ISTEAM >( result_view ) ); - Kokkos::deep_copy( h_r, result_view ); - ASSERT_EQ( expected_result, h_r() ); - } - - template< class ... Args > - static void AddFunctorLambdaRange( Args... args ) { - AddFunctor< 0, Args... >( args... ); -#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - AddLambdaRange( typename std::conditional< std::is_same<ExecSpace, Kokkos::DefaultExecutionSpace>::value, void*, Kokkos::InvalidType >::type(), args... ); -#endif - } - - template< class ... Args > - static void AddFunctorLambdaTeam( Args... args ) { - AddFunctor< 1, Args... >( args... ); -#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - AddLambdaTeam( typename std::conditional< std::is_same<ExecSpace, Kokkos::DefaultExecutionSpace>::value, void*, Kokkos::InvalidType >::type(), args... ); -#endif - } - - template< class ... Args > - static void AddPolicy( Args... args ) { - int N = 1000; - Kokkos::RangePolicy< ExecSpace > policy( 0, N ); - - AddFunctorLambdaRange( args..., 1000 ); - AddFunctorLambdaRange( args..., N ); - AddFunctorLambdaRange( args..., policy ); - AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace >( 0, N ) ); - AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( 0, N ) ); - AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Static> >( 0, N ).set_chunk_size( 10 ) ); - AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( 0, N ).set_chunk_size( 10 ) ); - - AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace >( N, Kokkos::AUTO ) ); - AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( N, Kokkos::AUTO ) ); - AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule<Kokkos::Static> >( N, Kokkos::AUTO ).set_chunk_size( 10 ) ); - AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( N, Kokkos::AUTO ).set_chunk_size( 10 ) ); - } - - static void execute_a() { - AddPolicy(); - } - - static void execute_b() { - std::string s( "Std::String" ); - AddPolicy( s.c_str() ); - AddPolicy( "Char Constant" ); - } - - static void execute_c() { - std::string s( "Std::String" ); - AddPolicy( s ); - } -}; - template< class Scalar, class ExecSpace = Kokkos::DefaultExecutionSpace > struct TestReducers { struct SumFunctor { @@ -1201,15 +572,6 @@ struct TestReducers { } }; - struct BXorFunctor { - Kokkos::View< const Scalar*, ExecSpace > values; - - KOKKOS_INLINE_FUNCTION - void operator()( const int & i, Scalar & value ) const { - value = value ^ values( i ); - } - }; - struct LAndFunctor { Kokkos::View< const Scalar*, ExecSpace > values; @@ -1228,15 +590,6 @@ struct TestReducers { } }; - struct LXorFunctor { - Kokkos::View< const Scalar*, ExecSpace > values; - - KOKKOS_INLINE_FUNCTION - void operator()( const int & i, Scalar & value ) const { - value = value ? ( !values( i ) ) : values( i ); - } - }; - static void test_sum( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); @@ -1259,21 +612,10 @@ struct TestReducers { ASSERT_EQ( sum_scalar, reference_sum ); - Scalar sum_scalar_view = reducer_scalar.result_view()(); + Scalar sum_scalar_view = reducer_scalar.reference(); ASSERT_EQ( sum_scalar_view, reference_sum ); } - { - Scalar sum_scalar_init = init; - Kokkos::Experimental::Sum< Scalar > reducer_scalar_init( sum_scalar_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); - - ASSERT_EQ( sum_scalar_init, reference_sum ); - - Scalar sum_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ( sum_scalar_init_view, reference_sum ); - } - { Kokkos::View< Scalar, Kokkos::HostSpace> sum_view( "View" ); sum_view() = init; @@ -1283,22 +625,9 @@ struct TestReducers { Scalar sum_view_scalar = sum_view(); ASSERT_EQ( sum_view_scalar, reference_sum ); - Scalar sum_view_view = reducer_view.result_view()(); + Scalar sum_view_view = reducer_view.reference(); ASSERT_EQ( sum_view_view, reference_sum ); } - - { - Kokkos::View< Scalar, Kokkos::HostSpace > sum_view_init( "View" ); - sum_view_init() = init; - Kokkos::Experimental::Sum< Scalar > reducer_view_init( sum_view_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); - - Scalar sum_view_init_scalar = sum_view_init(); - ASSERT_EQ( sum_view_init_scalar, reference_sum ); - - Scalar sum_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ( sum_view_init_view, reference_sum ); - } } static void test_prod( int N ) { @@ -1316,7 +645,6 @@ struct TestReducers { f.values = values; Scalar init = 1; - if ( std::is_arithmetic< Scalar >::value ) { Scalar prod_scalar = init; Kokkos::Experimental::Prod< Scalar > reducer_scalar( prod_scalar ); @@ -1324,22 +652,10 @@ struct TestReducers { ASSERT_EQ( prod_scalar, reference_prod ); - Scalar prod_scalar_view = reducer_scalar.result_view()(); + Scalar prod_scalar_view = reducer_scalar.reference(); ASSERT_EQ( prod_scalar_view, reference_prod ); } - { - Scalar prod_scalar_init = init; - Kokkos::Experimental::Prod< Scalar > reducer_scalar_init( prod_scalar_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); - - ASSERT_EQ( prod_scalar_init, reference_prod ); - - Scalar prod_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ( prod_scalar_init_view, reference_prod ); - } - - if ( std::is_arithmetic< Scalar >::value ) { Kokkos::View< Scalar, Kokkos::HostSpace > prod_view( "View" ); prod_view() = init; @@ -1349,22 +665,9 @@ struct TestReducers { Scalar prod_view_scalar = prod_view(); ASSERT_EQ( prod_view_scalar, reference_prod ); - Scalar prod_view_view = reducer_view.result_view()(); + Scalar prod_view_view = reducer_view.reference(); ASSERT_EQ( prod_view_view, reference_prod ); } - - { - Kokkos::View< Scalar, Kokkos::HostSpace > prod_view_init( "View" ); - prod_view_init() = init; - Kokkos::Experimental::Prod< Scalar > reducer_view_init( prod_view_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); - - Scalar prod_view_init_scalar = prod_view_init(); - ASSERT_EQ( prod_view_init_scalar, reference_prod ); - - Scalar prod_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ( prod_view_init_view, reference_prod ); - } } static void test_min( int N ) { @@ -1390,21 +693,10 @@ struct TestReducers { ASSERT_EQ( min_scalar, reference_min ); - Scalar min_scalar_view = reducer_scalar.result_view()(); + Scalar min_scalar_view = reducer_scalar.reference(); ASSERT_EQ( min_scalar_view, reference_min ); } - { - Scalar min_scalar_init = init; - Kokkos::Experimental::Min< Scalar > reducer_scalar_init( min_scalar_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); - - ASSERT_EQ( min_scalar_init, reference_min ); - - Scalar min_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ( min_scalar_init_view, reference_min ); - } - { Kokkos::View< Scalar, Kokkos::HostSpace > min_view( "View" ); min_view() = init; @@ -1414,22 +706,9 @@ struct TestReducers { Scalar min_view_scalar = min_view(); ASSERT_EQ( min_view_scalar, reference_min ); - Scalar min_view_view = reducer_view.result_view()(); + Scalar min_view_view = reducer_view.reference(); ASSERT_EQ( min_view_view, reference_min ); } - - { - Kokkos::View< Scalar, Kokkos::HostSpace > min_view_init( "View" ); - min_view_init() = init; - Kokkos::Experimental::Min< Scalar > reducer_view_init( min_view_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); - - Scalar min_view_init_scalar = min_view_init(); - ASSERT_EQ( min_view_init_scalar, reference_min ); - - Scalar min_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ( min_view_init_view, reference_min ); - } } static void test_max( int N ) { @@ -1455,21 +734,10 @@ struct TestReducers { ASSERT_EQ( max_scalar, reference_max ); - Scalar max_scalar_view = reducer_scalar.result_view()(); + Scalar max_scalar_view = reducer_scalar.reference(); ASSERT_EQ( max_scalar_view, reference_max ); } - { - Scalar max_scalar_init = init; - Kokkos::Experimental::Max< Scalar > reducer_scalar_init( max_scalar_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); - - ASSERT_EQ( max_scalar_init, reference_max ); - - Scalar max_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ( max_scalar_init_view, reference_max ); - } - { Kokkos::View< Scalar, Kokkos::HostSpace > max_view( "View" ); max_view() = init; @@ -1479,22 +747,9 @@ struct TestReducers { Scalar max_view_scalar = max_view(); ASSERT_EQ( max_view_scalar, reference_max ); - Scalar max_view_view = reducer_view.result_view()(); + Scalar max_view_view = reducer_view.reference(); ASSERT_EQ( max_view_view, reference_max ); } - - { - Kokkos::View< Scalar, Kokkos::HostSpace > max_view_init( "View" ); - max_view_init() = init; - Kokkos::Experimental::Max< Scalar > reducer_view_init( max_view_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); - - Scalar max_view_init_scalar = max_view_init(); - ASSERT_EQ( max_view_init_scalar, reference_max ); - - Scalar max_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ( max_view_init_view, reference_max ); - } } static void test_minloc( int N ) { @@ -1521,7 +776,6 @@ struct TestReducers { MinLocFunctor f; f.values = values; - Scalar init = std::numeric_limits< Scalar >::max(); { value_type min_scalar; @@ -1531,24 +785,11 @@ struct TestReducers { ASSERT_EQ( min_scalar.val, reference_min ); ASSERT_EQ( min_scalar.loc, reference_loc ); - value_type min_scalar_view = reducer_scalar.result_view()(); + value_type min_scalar_view = reducer_scalar.reference(); ASSERT_EQ( min_scalar_view.val, reference_min ); ASSERT_EQ( min_scalar_view.loc, reference_loc ); } - { - value_type min_scalar_init; - Kokkos::Experimental::MinLoc< Scalar, int > reducer_scalar_init( min_scalar_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); - - ASSERT_EQ( min_scalar_init.val, reference_min ); - ASSERT_EQ( min_scalar_init.loc, reference_loc ); - - value_type min_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ( min_scalar_init_view.val, reference_min ); - ASSERT_EQ( min_scalar_init_view.loc, reference_loc ); - } - { Kokkos::View< value_type, Kokkos::HostSpace > min_view( "View" ); Kokkos::Experimental::MinLoc< Scalar, int > reducer_view( min_view ); @@ -1558,24 +799,10 @@ struct TestReducers { ASSERT_EQ( min_view_scalar.val, reference_min ); ASSERT_EQ( min_view_scalar.loc, reference_loc ); - value_type min_view_view = reducer_view.result_view()(); + value_type min_view_view = reducer_view.reference(); ASSERT_EQ( min_view_view.val, reference_min ); ASSERT_EQ( min_view_view.loc, reference_loc ); } - - { - Kokkos::View< value_type, Kokkos::HostSpace > min_view_init( "View" ); - Kokkos::Experimental::MinLoc< Scalar, int > reducer_view_init( min_view_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); - - value_type min_view_init_scalar = min_view_init(); - ASSERT_EQ( min_view_init_scalar.val, reference_min ); - ASSERT_EQ( min_view_init_scalar.loc, reference_loc ); - - value_type min_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ( min_view_init_view.val, reference_min ); - ASSERT_EQ( min_view_init_view.loc, reference_loc ); - } } static void test_maxloc( int N ) { @@ -1602,7 +829,6 @@ struct TestReducers { MaxLocFunctor f; f.values = values; - Scalar init = std::numeric_limits< Scalar >::min(); { value_type max_scalar; @@ -1612,24 +838,11 @@ struct TestReducers { ASSERT_EQ( max_scalar.val, reference_max ); ASSERT_EQ( max_scalar.loc, reference_loc ); - value_type max_scalar_view = reducer_scalar.result_view()(); + value_type max_scalar_view = reducer_scalar.reference(); ASSERT_EQ( max_scalar_view.val, reference_max ); ASSERT_EQ( max_scalar_view.loc, reference_loc ); } - { - value_type max_scalar_init; - Kokkos::Experimental::MaxLoc< Scalar, int > reducer_scalar_init( max_scalar_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); - - ASSERT_EQ( max_scalar_init.val, reference_max ); - ASSERT_EQ( max_scalar_init.loc, reference_loc ); - - value_type max_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ( max_scalar_init_view.val, reference_max ); - ASSERT_EQ( max_scalar_init_view.loc, reference_loc ); - } - { Kokkos::View< value_type, Kokkos::HostSpace > max_view( "View" ); Kokkos::Experimental::MaxLoc< Scalar, int > reducer_view( max_view ); @@ -1639,24 +852,10 @@ struct TestReducers { ASSERT_EQ( max_view_scalar.val, reference_max ); ASSERT_EQ( max_view_scalar.loc, reference_loc ); - value_type max_view_view = reducer_view.result_view()(); + value_type max_view_view = reducer_view.reference(); ASSERT_EQ( max_view_view.val, reference_max ); ASSERT_EQ( max_view_view.loc, reference_loc ); } - - { - Kokkos::View< value_type, Kokkos::HostSpace > max_view_init( "View" ); - Kokkos::Experimental::MaxLoc< Scalar, int > reducer_view_init( max_view_init, init ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); - - value_type max_view_init_scalar = max_view_init(); - ASSERT_EQ( max_view_init_scalar.val, reference_max ); - ASSERT_EQ( max_view_init_scalar.loc, reference_loc ); - - value_type max_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ( max_view_init_view.val, reference_max ); - ASSERT_EQ( max_view_init_view.loc, reference_loc ); - } } static void test_minmaxloc( int N ) { @@ -1699,8 +898,6 @@ struct TestReducers { MinMaxLocFunctor f; f.values = values; - Scalar init_min = std::numeric_limits< Scalar >::max(); - Scalar init_max = std::numeric_limits< Scalar >::min(); { value_type minmax_scalar; @@ -1726,30 +923,13 @@ struct TestReducers { ASSERT_EQ( minmax_scalar.max_loc, reference_maxloc ); - value_type minmax_scalar_view = reducer_scalar.result_view()(); + value_type minmax_scalar_view = reducer_scalar.reference(); ASSERT_EQ( minmax_scalar_view.min_val, reference_min ); ASSERT_EQ( minmax_scalar_view.min_loc, reference_minloc ); ASSERT_EQ( minmax_scalar_view.max_val, reference_max ); ASSERT_EQ( minmax_scalar_view.max_loc, reference_maxloc ); } - { - value_type minmax_scalar_init; - Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_scalar_init( minmax_scalar_init, init_min, init_max ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); - - ASSERT_EQ( minmax_scalar_init.min_val, reference_min ); - ASSERT_EQ( minmax_scalar_init.min_loc, reference_minloc ); - ASSERT_EQ( minmax_scalar_init.max_val, reference_max ); - ASSERT_EQ( minmax_scalar_init.max_loc, reference_maxloc ); - - value_type minmax_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ( minmax_scalar_init_view.min_val, reference_min ); - ASSERT_EQ( minmax_scalar_init_view.min_loc, reference_minloc ); - ASSERT_EQ( minmax_scalar_init_view.max_val, reference_max ); - ASSERT_EQ( minmax_scalar_init_view.max_loc, reference_maxloc ); - } - { Kokkos::View< value_type, Kokkos::HostSpace > minmax_view( "View" ); Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_view( minmax_view ); @@ -1761,30 +941,12 @@ struct TestReducers { ASSERT_EQ( minmax_view_scalar.max_val, reference_max ); ASSERT_EQ( minmax_view_scalar.max_loc, reference_maxloc ); - value_type minmax_view_view = reducer_view.result_view()(); + value_type minmax_view_view = reducer_view.reference(); ASSERT_EQ( minmax_view_view.min_val, reference_min ); ASSERT_EQ( minmax_view_view.min_loc, reference_minloc ); ASSERT_EQ( minmax_view_view.max_val, reference_max ); ASSERT_EQ( minmax_view_view.max_loc, reference_maxloc ); } - - { - Kokkos::View< value_type, Kokkos::HostSpace > minmax_view_init( "View" ); - Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_view_init( minmax_view_init, init_min, init_max ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); - - value_type minmax_view_init_scalar = minmax_view_init(); - ASSERT_EQ( minmax_view_init_scalar.min_val, reference_min ); - ASSERT_EQ( minmax_view_init_scalar.min_loc, reference_minloc ); - ASSERT_EQ( minmax_view_init_scalar.max_val, reference_max ); - ASSERT_EQ( minmax_view_init_scalar.max_loc, reference_maxloc ); - - value_type minmax_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ( minmax_view_init_view.min_val, reference_min ); - ASSERT_EQ( minmax_view_init_view.min_loc, reference_minloc ); - ASSERT_EQ( minmax_view_init_view.max_val, reference_max ); - ASSERT_EQ( minmax_view_init_view.max_loc, reference_maxloc ); - } } static void test_BAnd( int N ) { @@ -1808,7 +970,7 @@ struct TestReducers { Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); ASSERT_EQ( band_scalar, reference_band ); - Scalar band_scalar_view = reducer_scalar.result_view()(); + Scalar band_scalar_view = reducer_scalar.reference(); ASSERT_EQ( band_scalar_view, reference_band ); } @@ -1822,7 +984,7 @@ struct TestReducers { Scalar band_view_scalar = band_view(); ASSERT_EQ( band_view_scalar, reference_band ); - Scalar band_view_view = reducer_view.result_view()(); + Scalar band_view_view = reducer_view.reference(); ASSERT_EQ( band_view_view, reference_band ); } } @@ -1849,7 +1011,7 @@ struct TestReducers { ASSERT_EQ( bor_scalar, reference_bor ); - Scalar bor_scalar_view = reducer_scalar.result_view()(); + Scalar bor_scalar_view = reducer_scalar.reference(); ASSERT_EQ( bor_scalar_view, reference_bor ); } @@ -1862,51 +1024,11 @@ struct TestReducers { Scalar bor_view_scalar = bor_view(); ASSERT_EQ( bor_view_scalar, reference_bor ); - Scalar bor_view_view = reducer_view.result_view()(); + Scalar bor_view_view = reducer_view.reference(); ASSERT_EQ( bor_view_view, reference_bor ); } } - static void test_BXor( int N ) { - Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); - auto h_values = Kokkos::create_mirror_view( values ); - Scalar reference_bxor = Scalar() & ( ~Scalar() ); - - for ( int i = 0; i < N; i++ ) { - h_values( i ) = (Scalar) ( ( rand() % 100000 + 1 ) * 2 ); - reference_bxor = reference_bxor ^ h_values( i ); - } - Kokkos::deep_copy( values, h_values ); - - BXorFunctor f; - f.values = values; - Scalar init = Scalar() & ( ~Scalar() ); - - { - Scalar bxor_scalar = init; - Kokkos::Experimental::BXor< Scalar > reducer_scalar( bxor_scalar ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); - - ASSERT_EQ( bxor_scalar, reference_bxor ); - - Scalar bxor_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ( bxor_scalar_view, reference_bxor ); - } - - { - Kokkos::View< Scalar, Kokkos::HostSpace > bxor_view( "View" ); - bxor_view() = init; - Kokkos::Experimental::BXor< Scalar > reducer_view( bxor_view ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); - - Scalar bxor_view_scalar = bxor_view(); - ASSERT_EQ( bxor_view_scalar, reference_bxor ); - - Scalar bxor_view_view = reducer_view.result_view()(); - ASSERT_EQ( bxor_view_view, reference_bxor ); - } - } - static void test_LAnd( int N ) { Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); auto h_values = Kokkos::create_mirror_view( values ); @@ -1929,7 +1051,7 @@ struct TestReducers { ASSERT_EQ( land_scalar, reference_land ); - Scalar land_scalar_view = reducer_scalar.result_view()(); + Scalar land_scalar_view = reducer_scalar.reference(); ASSERT_EQ( land_scalar_view, reference_land ); } @@ -1942,7 +1064,7 @@ struct TestReducers { Scalar land_view_scalar = land_view(); ASSERT_EQ( land_view_scalar, reference_land ); - Scalar land_view_view = reducer_view.result_view()(); + Scalar land_view_view = reducer_view.reference(); ASSERT_EQ( land_view_view, reference_land ); } } @@ -1969,7 +1091,7 @@ struct TestReducers { ASSERT_EQ( lor_scalar, reference_lor ); - Scalar lor_scalar_view = reducer_scalar.result_view()(); + Scalar lor_scalar_view = reducer_scalar.reference(); ASSERT_EQ( lor_scalar_view, reference_lor ); } @@ -1982,51 +1104,11 @@ struct TestReducers { Scalar lor_view_scalar = lor_view(); ASSERT_EQ( lor_view_scalar, reference_lor ); - Scalar lor_view_view = reducer_view.result_view()(); + Scalar lor_view_view = reducer_view.reference(); ASSERT_EQ( lor_view_view, reference_lor ); } } - static void test_LXor( int N ) { - Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); - auto h_values = Kokkos::create_mirror_view( values ); - Scalar reference_lxor = 0; - - for ( int i = 0; i < N; i++ ) { - h_values( i ) = (Scalar) ( rand() % 2 ); - reference_lxor = reference_lxor ? ( !h_values( i ) ) : h_values( i ); - } - Kokkos::deep_copy( values, h_values ); - - LXorFunctor f; - f.values = values; - Scalar init = 0; - - { - Scalar lxor_scalar = init; - Kokkos::Experimental::LXor< Scalar > reducer_scalar( lxor_scalar ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); - - ASSERT_EQ( lxor_scalar, reference_lxor ); - - Scalar lxor_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ( lxor_scalar_view, reference_lxor ); - } - - { - Kokkos::View< Scalar, Kokkos::HostSpace > lxor_view( "View" ); - lxor_view() = init; - Kokkos::Experimental::LXor< Scalar > reducer_view( lxor_view ); - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); - - Scalar lxor_view_scalar = lxor_view(); - ASSERT_EQ( lxor_view_scalar, reference_lxor ); - - Scalar lxor_view_view = reducer_view.result_view()(); - ASSERT_EQ( lxor_view_view, reference_lxor ); - } - } - static void execute_float() { test_sum( 10001 ); test_prod( 35 ); @@ -2047,10 +1129,8 @@ struct TestReducers { test_minmaxloc( 10007 ); test_BAnd( 35 ); test_BOr( 35 ); - test_BXor( 35 ); test_LAnd( 35 ); test_LOr( 35 ); - test_LXor( 35 ); } static void execute_basic() { @@ -2059,4 +1139,42 @@ struct TestReducers { } }; + +TEST_F( TEST_CATEGORY, long_reduce ) +{ + TestReduce< long, TEST_EXECSPACE >( 0 ); + TestReduce< long, TEST_EXECSPACE >( 1000000 ); +} + +TEST_F( TEST_CATEGORY, double_reduce ) +{ + TestReduce< double, TEST_EXECSPACE >( 0 ); + TestReduce< double, TEST_EXECSPACE >( 1000000 ); +} + +TEST_F( TEST_CATEGORY, reducers ) +{ + TestReducers< int, TEST_EXECSPACE >::execute_integer(); + TestReducers< size_t, TEST_EXECSPACE >::execute_integer(); + TestReducers< double, TEST_EXECSPACE >::execute_float(); + TestReducers< Kokkos::complex<double>, TEST_EXECSPACE >::execute_basic(); +} + +TEST_F( TEST_CATEGORY, long_reduce_dynamic ) +{ + TestReduceDynamic< long, TEST_EXECSPACE >( 0 ); + TestReduceDynamic< long, TEST_EXECSPACE >( 1000000 ); +} + +TEST_F( TEST_CATEGORY, double_reduce_dynamic ) +{ + TestReduceDynamic< double, TEST_EXECSPACE >( 0 ); + TestReduceDynamic< double, TEST_EXECSPACE >( 1000000 ); +} + +TEST_F( TEST_CATEGORY, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, TEST_EXECSPACE >( 0 ); + TestReduceDynamicView< long, TEST_EXECSPACE >( 1000000 ); +} } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp b/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2651df92318e97e2e9eb2eee7816d848c0d517e8 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp @@ -0,0 +1,597 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <stdexcept> +#include <sstream> +#include <iostream> +#include <limits> + +#include <Kokkos_Core.hpp> + +namespace Test { + +namespace ReduceCombinatorical { + +template< class Scalar, class Space = Kokkos::HostSpace > +struct AddPlus { +public: + // Required. + typedef AddPlus reducer; + typedef Scalar value_type; + + typedef Kokkos::View< value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + +private: + result_view_type result; + +public: + AddPlus( value_type & result_ ) : result( &result_ ) {} + + // Required. + KOKKOS_INLINE_FUNCTION + void join( value_type & dest, const value_type & src ) const { + dest += src + 1; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dest, const volatile value_type & src ) const { + dest += src + 1; + } + + // Optional. + KOKKOS_INLINE_FUNCTION + void init( value_type & val ) const { + val = value_type(); + } + + KOKKOS_INLINE_FUNCTION + value_type& reference() const { + return result(); + } + + KOKKOS_INLINE_FUNCTION + result_view_type view() const { + return result; + } +}; + +template< int ISTEAM > +struct FunctorScalar; + +template<> +struct FunctorScalar< 0 > { + Kokkos::View< double > result; + + FunctorScalar( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int & i, double & update ) const { + update += i; + } +}; + +template<> +struct FunctorScalar< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalar( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); + } +}; + +template< int ISTEAM > +struct FunctorScalarInit; + +template<> +struct FunctorScalarInit< 0 > { + Kokkos::View< double > result; + + FunctorScalarInit( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int & i, double & update ) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void init( double & update ) const { + update = 0.0; + } +}; + +template<> +struct FunctorScalarInit< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarInit( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void init( double & update ) const { + update = 0.0; + } +}; + +template< int ISTEAM > +struct FunctorScalarFinal; + +template<> +struct FunctorScalarFinal< 0 > { + Kokkos::View<double> result; + + FunctorScalarFinal( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int & i, double & update ) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void final( double & update ) const { + result() = update; + } +}; + +template<> +struct FunctorScalarFinal< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarFinal( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void final( double & update ) const { + result() = update; + } +}; + +template< int ISTEAM > +struct FunctorScalarJoin; + +template<> +struct FunctorScalarJoin< 0 > { + Kokkos::View<double> result; + + FunctorScalarJoin( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int & i, double & update ) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile double & dst, const volatile double & update ) const { + dst += update; + } +}; + +template<> +struct FunctorScalarJoin< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarJoin( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void join( volatile double & dst, const volatile double & update ) const { + dst += update; + } +}; + +template< int ISTEAM > +struct FunctorScalarJoinFinal; + +template<> +struct FunctorScalarJoinFinal< 0 > { + Kokkos::View< double > result; + + FunctorScalarJoinFinal( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int & i, double & update ) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile double & dst, const volatile double & update ) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void final( double & update ) const { + result() = update; + } +}; + +template<> +struct FunctorScalarJoinFinal< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarJoinFinal( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void join( volatile double & dst, const volatile double & update ) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void final( double & update ) const { + result() = update; + } +}; + +template< int ISTEAM > +struct FunctorScalarJoinInit; + +template<> +struct FunctorScalarJoinInit< 0 > { + Kokkos::View< double > result; + + FunctorScalarJoinInit( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int & i, double & update ) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile double & dst, const volatile double & update ) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void init( double & update ) const { + update = 0.0; + } +}; + +template<> +struct FunctorScalarJoinInit< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarJoinInit( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void join( volatile double & dst, const volatile double & update ) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void init( double & update ) const { + update = 0.0; + } +}; + +template< int ISTEAM > +struct FunctorScalarJoinFinalInit; + +template<> +struct FunctorScalarJoinFinalInit< 0 > { + Kokkos::View<double> result; + + FunctorScalarJoinFinalInit( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int & i, double & update ) const { + update += i; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile double & dst, const volatile double & update ) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void final( double & update ) const { + result() = update; + } + + KOKKOS_INLINE_FUNCTION + void init( double & update ) const { + update = 0.0; + } +}; + +template<> +struct FunctorScalarJoinFinalInit< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalarJoinFinalInit( Kokkos::View< double > r ) : result( r ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); + } + + KOKKOS_INLINE_FUNCTION + void join( volatile double & dst, const volatile double & update ) const { + dst += update; + } + + KOKKOS_INLINE_FUNCTION + void final( double & update ) const { + result() = update; + } + + KOKKOS_INLINE_FUNCTION + void init( double & update ) const { + update = 0.0; + } +}; + +struct Functor1 { + KOKKOS_INLINE_FUNCTION + void operator()( const int & i, double & update ) const { + update += i; + } +}; + +struct Functor2 { + typedef double value_type[]; + + const unsigned value_count; + + Functor2( unsigned n ) : value_count( n ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const unsigned & i, double update[] ) const { + for ( unsigned j = 0; j < value_count; j++ ) { + update[j] += i; + } + } + + KOKKOS_INLINE_FUNCTION + void init( double dst[] ) const + { + for ( unsigned i = 0; i < value_count; ++i ) dst[i] = 0; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile double dst[], + const volatile double src[] ) const + { + for ( unsigned i = 0; i < value_count; ++i ) dst[i] += src[i]; + } +}; + +} // namespace ReduceCombinatorical + +template< class ExecSpace = Kokkos::DefaultExecutionSpace > +struct TestReduceCombinatoricalInstantiation { + template< class ... Args > + static void CallParallelReduce( Args... args ) { + Kokkos::parallel_reduce( args... ); + } + + template< class ... Args > + static void AddReturnArgument( Args... args ) { + Kokkos::View< double, Kokkos::HostSpace > result_view( "ResultView" ); + double expected_result = 1000.0 * 999.0 / 2.0; + + double value = 0; + Kokkos::parallel_reduce( args..., value ); + ASSERT_EQ( expected_result, value ); + + result_view() = 0; + CallParallelReduce( args..., result_view ); + ASSERT_EQ( expected_result, result_view() ); + + value = 0; + CallParallelReduce( args..., Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >( &value ) ); + ASSERT_EQ( expected_result, value ); + + result_view() = 0; + const Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_const_um = result_view; + CallParallelReduce( args..., result_view_const_um ); + ASSERT_EQ( expected_result, result_view_const_um() ); + + value = 0; + CallParallelReduce( args..., Test::ReduceCombinatorical::AddPlus< double >( value ) ); + if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) && ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result < value ); + } + else if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) || ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result <= value ); + } + else { + ASSERT_EQ( expected_result, value ); + } + + value = 0; + Test::ReduceCombinatorical::AddPlus< double > add( value ); + CallParallelReduce( args..., add ); + if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) && ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result < value ); + } + else if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) || ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result <= value ); + } + else { + ASSERT_EQ( expected_result, value ); + } + } + + template< class ... Args > + static void AddLambdaRange( void*, Args... args ) { + AddReturnArgument( args..., KOKKOS_LAMBDA ( const int & i, double & lsum ) { + lsum += i; + }); + } + + template< class ... Args > + static void AddLambdaTeam( void*, Args... args ) { + AddReturnArgument( args..., KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type & team, double & update ) { + update += 1.0 / team.team_size() * team.league_rank(); + }); + } + + template< class ... Args > + static void AddLambdaRange( Kokkos::InvalidType, Args... args ) {} + + template< class ... Args > + static void AddLambdaTeam( Kokkos::InvalidType, Args... args ) {} + + template< int ISTEAM, class ... Args > + static void AddFunctor( Args... args ) { + Kokkos::View< double > result_view( "FunctorView" ); + auto h_r = Kokkos::create_mirror_view( result_view ); + Test::ReduceCombinatorical::FunctorScalar< ISTEAM > functor( result_view ); + double expected_result = 1000.0 * 999.0 / 2.0; + + AddReturnArgument( args..., functor ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalar< ISTEAM >( result_view ) ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarInit< ISTEAM >( result_view ) ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarJoin< ISTEAM >( result_view ) ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarJoinInit< ISTEAM >( result_view ) ); + + h_r() = 0; + Kokkos::deep_copy( result_view, h_r ); + CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarFinal< ISTEAM >( result_view ) ); + Kokkos::deep_copy( h_r, result_view ); + ASSERT_EQ( expected_result, h_r() ); + + h_r() = 0; + Kokkos::deep_copy( result_view, h_r ); + CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal< ISTEAM >( result_view ) ); + Kokkos::deep_copy( h_r, result_view ); + ASSERT_EQ( expected_result, h_r() ); + + h_r() = 0; + Kokkos::deep_copy( result_view, h_r ); + CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit< ISTEAM >( result_view ) ); + Kokkos::deep_copy( h_r, result_view ); + ASSERT_EQ( expected_result, h_r() ); + } + + template< class ... Args > + static void AddFunctorLambdaRange( Args... args ) { + AddFunctor< 0, Args... >( args... ); +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA + AddLambdaRange( typename std::conditional< std::is_same<ExecSpace, Kokkos::DefaultExecutionSpace>::value, void*, Kokkos::InvalidType >::type(), args... ); +#endif + } + + template< class ... Args > + static void AddFunctorLambdaTeam( Args... args ) { + AddFunctor< 1, Args... >( args... ); +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA + AddLambdaTeam( typename std::conditional< std::is_same<ExecSpace, Kokkos::DefaultExecutionSpace>::value, void*, Kokkos::InvalidType >::type(), args... ); +#endif + } + + template< class ... Args > + static void AddPolicy( Args... args ) { + int N = 1000; + Kokkos::RangePolicy< ExecSpace > policy( 0, N ); + + AddFunctorLambdaRange( args..., 1000 ); + AddFunctorLambdaRange( args..., N ); + AddFunctorLambdaRange( args..., policy ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace >( 0, N ) ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( 0, N ) ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Static> >( 0, N ).set_chunk_size( 10 ) ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( 0, N ).set_chunk_size( 10 ) ); + + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace >( N, Kokkos::AUTO ) ); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( N, Kokkos::AUTO ) ); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule<Kokkos::Static> >( N, Kokkos::AUTO ).set_chunk_size( 10 ) ); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( N, Kokkos::AUTO ).set_chunk_size( 10 ) ); + } + + static void execute_a() { + AddPolicy(); + } + + static void execute_b() { + std::string s( "Std::String" ); + AddPolicy( s.c_str() ); + AddPolicy( "Char Constant" ); + } + + static void execute_c() { + std::string s( "Std::String" ); + AddPolicy( s ); + } +}; + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/TestScan.hpp b/lib/kokkos/core/unit_test/TestScan.hpp index 547e03497601a0a7da8bc3d0027ee9fef603e196..fa7669c5edbb87a6850dc8191cf93a2629949080 100644 --- a/lib/kokkos/core/unit_test/TestScan.hpp +++ b/lib/kokkos/core/unit_test/TestScan.hpp @@ -41,7 +41,8 @@ //@HEADER */ -#include <stdio.h> +#include <Kokkos_Core.hpp> +#include <cstdio> namespace Test { @@ -113,4 +114,29 @@ struct TestScan { } }; +TEST_F( TEST_CATEGORY, scan ) +{ + TestScan< TEST_EXECSPACE >::test_range( 1, 1000 ); + TestScan< TEST_EXECSPACE >( 0 ); + TestScan< TEST_EXECSPACE >( 100000 ); + TestScan< TEST_EXECSPACE >( 10000000 ); + TEST_EXECSPACE::fence(); +} + + +/*TEST_F( TEST_CATEGORY, scan_small ) +{ + typedef TestScan< TEST_EXECSPACE, Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor; + + for ( int i = 0; i < 1000; ++i ) { + TestScanFunctor( 10 ); + TestScanFunctor( 10000 ); + } + TestScanFunctor( 1000000 ); + TestScanFunctor( 10000000 ); + + TEST_EXECSPACE::fence(); +}*/ + + } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp index 57e47d4baa0d177dca9379cf43a05742af2519d1..3a88475620fbf855e9a3b360d87ce164f4710376 100644 --- a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp +++ b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp @@ -44,11 +44,13 @@ #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP #define KOKKOS_UNITTEST_TASKSCHEDULER_HPP -#include <stdio.h> +#include <Kokkos_Macros.hpp> +#if defined( KOKKOS_ENABLE_TASKDAG ) +#include <Kokkos_Core.hpp> +#include <cstdio> #include <iostream> #include <cmath> -#if defined( KOKKOS_ENABLE_TASKDAG ) namespace TestTaskScheduler { @@ -137,9 +139,15 @@ struct TestFib { typedef typename sched_type::memory_space memory_space; - enum { Log2_SuperBlockSize = 12 }; + enum { MinBlockSize = 64 }; + enum { MaxBlockSize = 1024 }; + enum { SuperBlockSize = 1u << 12 }; - sched_type root_sched( memory_space(), MemoryCapacity, Log2_SuperBlockSize ); + sched_type root_sched( memory_space() + , MemoryCapacity + , MinBlockSize + , MaxBlockSize + , SuperBlockSize ); future_type f = Kokkos::host_spawn( Kokkos::TaskSingle( root_sched ) , TestFib( root_sched, i ) ); @@ -168,6 +176,54 @@ struct TestFib namespace TestTaskScheduler { +template< class Space > +struct TestTaskSpawn { + typedef Kokkos::TaskScheduler< Space > sched_type; + typedef Kokkos::Future< Space > future_type; + typedef void value_type; + + sched_type m_sched ; + future_type m_future ; + + KOKKOS_INLINE_FUNCTION + TestTaskSpawn( const sched_type & arg_sched + , const future_type & arg_future + ) + : m_sched( arg_sched ) + , m_future( arg_future ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( typename sched_type::member_type & ) + { + if ( ! m_future.is_null() ) { + Kokkos::task_spawn( Kokkos::TaskSingle( m_sched ) , TestTaskSpawn( m_sched , future_type() ) ); + } + } + + static void run() + { + typedef typename sched_type::memory_space memory_space; + + // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool. + enum { MemoryCapacity = 16000 }; + enum { MinBlockSize = 64 }; + enum { MaxBlockSize = 1024 }; + enum { SuperBlockSize = 1u << 12 }; + + sched_type sched( memory_space() + , MemoryCapacity + , MinBlockSize + , MaxBlockSize + , SuperBlockSize ); + + auto f = Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskSpawn( sched, future_type() ) ); + Kokkos::host_spawn( Kokkos::TaskSingle( f ), TestTaskSpawn( sched, f ) ); + + Kokkos::wait( sched ); + } +}; + template< class Space > struct TestTaskDependence { typedef Kokkos::TaskScheduler< Space > sched_type; @@ -220,8 +276,15 @@ struct TestTaskDependence { // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool. enum { MemoryCapacity = 16000 }; - enum { Log2_SuperBlockSize = 12 }; - sched_type sched( memory_space(), MemoryCapacity, Log2_SuperBlockSize ); + enum { MinBlockSize = 64 }; + enum { MaxBlockSize = 1024 }; + enum { SuperBlockSize = 1u << 12 }; + + sched_type sched( memory_space() + , MemoryCapacity + , MinBlockSize + , MaxBlockSize + , SuperBlockSize ); accum_type accum( "accum" ); @@ -326,12 +389,7 @@ struct TestTaskTeam { tot = 0; Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member, begin, end ) , [&] ( int i, long & res ) { res += parfor_result[i]; } -#if 0 - , Kokkos::Sum( tot ) -#else - , [] ( long & dst, const long & src ) { dst += src; } - , tot -#endif + , Kokkos::Experimental::Sum<long>( tot ) ); Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) @@ -403,6 +461,7 @@ struct TestTaskTeam { parreduce_check[i] += result - expected; }); */ + } static void run( long n ) @@ -411,7 +470,15 @@ struct TestTaskTeam { //const unsigned memory_capacity = 100000; // Fails with SPAN=1 for serial and OMP. const unsigned memory_capacity = 400000; - sched_type root_sched( typename sched_type::memory_space(), memory_capacity ); + enum { MinBlockSize = 64 }; + enum { MaxBlockSize = 1024 }; + enum { SuperBlockSize = 1u << 12 }; + + sched_type root_sched( typename sched_type::memory_space() + , memory_capacity + , MinBlockSize + , MaxBlockSize + , SuperBlockSize ); view_type root_parfor_result( "parfor_result", n + 1 ); view_type root_parreduce_check( "parreduce_check", n + 1 ); @@ -443,24 +510,31 @@ struct TestTaskTeam { Kokkos::deep_copy( host_parscan_result, root_parscan_result ); Kokkos::deep_copy( host_parscan_check, root_parscan_check ); + long error_count = 0 ; + for ( long i = 0; i <= n; ++i ) { const long answer = i; if ( host_parfor_result( i ) != answer ) { + ++error_count ; std::cerr << "TestTaskTeam::run ERROR parallel_for result(" << i << ") = " << host_parfor_result( i ) << " != " << answer << std::endl; } if ( host_parreduce_check( i ) != 0 ) { + ++error_count ; std::cerr << "TestTaskTeam::run ERROR parallel_reduce check(" << i << ") = " << host_parreduce_check( i ) << " != 0" << std::endl; } if ( host_parscan_check( i ) != 0 ) { + ++error_count ; std::cerr << "TestTaskTeam::run ERROR parallel_scan check(" << i << ") = " << host_parscan_check( i ) << " != 0" << std::endl; } } + + ASSERT_EQ( 0L , error_count ); } }; @@ -524,8 +598,15 @@ struct TestTaskTeamValue { //const unsigned memory_capacity = 10000; // Causes memory pool infinite loop. const unsigned memory_capacity = 100000; + enum { MinBlockSize = 64 }; + enum { MaxBlockSize = 1024 }; + enum { SuperBlockSize = 1u << 12 }; + sched_type root_sched( typename sched_type::memory_space() - , memory_capacity ); + , memory_capacity + , MinBlockSize + , MaxBlockSize + , SuperBlockSize ); view_type root_result( "result", n + 1 ); @@ -556,6 +637,41 @@ struct TestTaskTeamValue { } // namespace TestTaskScheduler -#endif // #if defined( KOKKOS_ENABLE_TASKDAG ) +namespace Test { + +TEST_F( TEST_CATEGORY, task_fib ) +{ + const int N = 24 ; // 25 triggers tbd bug on Cuda/Pascal + for ( int i = 0; i < N; ++i ) { + TestTaskScheduler::TestFib< TEST_EXECSPACE >::run( i , ( i + 1 ) * ( i + 1 ) * 10000 ); + } +} +#if defined(KOKKOS_ARCH_MAXWELL) || defined(KOKKOS_ARCH_PASCAL) + // TODO: Resolve bug in task DAG for Pascal + #define KOKKOS_IMPL_DISABLE_UNIT_TEST_TASK_DAG_PASCAL +#endif + +#ifndef KOKKOS_IMPL_DISABLE_UNIT_TEST_TASK_DAG_PASCAL + +TEST_F( TEST_CATEGORY, task_depend ) +{ + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< TEST_EXECSPACE >::run( i ); + } +} + +TEST_F( TEST_CATEGORY, task_team ) +{ + TestTaskScheduler::TestTaskTeam< TEST_EXECSPACE >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< TEST_EXECSPACE >::run( 1000 ); // Put back after testing. +} + +#else //ndef KOKKOS_IMPL_DISABLE_UNIT_TEST_TASK_DAG_PASCAL +#undef KOKKOS_IMPL_DISABLE_UNIT_TEST_TASK_DAG_PASCAL +#endif //ndef KOKKOS_IMPL_DISABLE_UNIT_TEST_TASK_DAG_PASCAL + +} +#endif // #if defined( KOKKOS_ENABLE_TASKDAG ) #endif // #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP + diff --git a/lib/kokkos/core/unit_test/TestTeam.hpp b/lib/kokkos/core/unit_test/TestTeam.hpp index 11a523921db9995c18d38ac5e18661244acd0ecb..a9d733e5ef11eab8a347c92e381e5376e3f50a56 100644 --- a/lib/kokkos/core/unit_test/TestTeam.hpp +++ b/lib/kokkos/core/unit_test/TestTeam.hpp @@ -41,7 +41,7 @@ //@HEADER */ -#include <stdio.h> +#include <cstdio> #include <stdexcept> #include <sstream> #include <iostream> @@ -308,7 +308,8 @@ public: } // Team max: - const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ), JoinMax() ); + int long m = (long int) ( ind.league_rank() + ind.team_rank() ); + ind.team_reduce( Kokkos::Experimental::Max<int long>(m) ); if ( m != ind.league_rank() + ( ind.team_size() - 1 ) ) { printf( "ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n", @@ -491,6 +492,7 @@ struct TestSharedTeam { namespace Test { #if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) template< class MemorySpace, class ExecSpace, class ScheduleType > struct TestLambdaSharedTeam { TestLambdaSharedTeam() { run(); } @@ -558,6 +560,7 @@ struct TestLambdaSharedTeam { } }; #endif +#endif } // namespace Test @@ -655,7 +658,7 @@ struct TestScratchTeam { int thread_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_THREAD_COUNT ); - Kokkos::parallel_reduce( team_exec.set_scratch_size( 0, Kokkos::PerTeam( team_scratch_size ), + Kokkos::parallel_reduce( team_exec.set_scratch_size( 1, Kokkos::PerTeam( team_scratch_size ), Kokkos::PerThread( thread_scratch_size ) ), Functor(), result_type( & error_count ) ); @@ -866,6 +869,7 @@ struct ClassWithShmemSizeFunction { template< class ExecSpace, class ScheduleType > void test_team_mulit_level_scratch_test_lambda() { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) Kokkos::View< int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > errors; Kokkos::View< int, ExecSpace > d_errors( "Errors" ); errors = d_errors; @@ -899,6 +903,7 @@ void test_team_mulit_level_scratch_test_lambda() { ASSERT_EQ( error, 0 ); Kokkos::fence(); #endif +#endif } } // namespace Test @@ -944,4 +949,5 @@ struct TestShmemSize { } }; + } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp index 8d16ac66db8abbf1b5afc3f12aaff7afe0159307..e9e2f7548aebf589796d51e4f5bd3490ecfd0fab 100644 --- a/lib/kokkos/core/unit_test/TestTeamVector.hpp +++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp @@ -75,6 +75,14 @@ struct my_complex { return *this ; } + KOKKOS_INLINE_FUNCTION + my_complex & operator=( const volatile my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + return *this ; + } + KOKKOS_INLINE_FUNCTION my_complex( const volatile my_complex & src ) { re = src.re; @@ -159,6 +167,20 @@ struct my_complex { return re; } }; +} + +namespace Kokkos { +template<> +struct reduction_identity<TestTeamVector::my_complex > { + typedef reduction_identity<double> t_red_ident; + KOKKOS_FORCEINLINE_FUNCTION static TestTeamVector::my_complex sum() + {return TestTeamVector::my_complex(t_red_ident::sum());} + KOKKOS_FORCEINLINE_FUNCTION static TestTeamVector::my_complex prod() + {return TestTeamVector::my_complex(t_red_ident::prod());} +}; +} + +namespace TestTeamVector { template< typename Scalar, class ExecutionSpace > struct functor_team_for { @@ -267,13 +289,13 @@ struct functor_team_reduce { }; template< typename Scalar, class ExecutionSpace > -struct functor_team_reduce_join { +struct functor_team_reduce_reducer { typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - functor_team_reduce_join( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + functor_team_reduce_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } @@ -285,8 +307,7 @@ struct functor_team_reduce_join { { val += i - team.league_rank() + team.league_size() + team.team_size(); }, - [] ( volatile Scalar & val, const volatile Scalar & src ) { val += src; }, - value + Kokkos::Experimental::Sum<Scalar>(value) ); team.team_barrier(); @@ -300,7 +321,7 @@ struct functor_team_reduce_join { } if ( test != value ) { - printf( "FAILED team_vector_parallel_reduce_join %i %i %f %f\n", + printf( "FAILED team_vector_parallel_reduce_reducer %i %i %f %f\n", team.league_rank(), team.team_rank(), static_cast<double>( test ), static_cast<double>( value ) ); @@ -420,13 +441,13 @@ struct functor_team_vector_reduce { }; template< typename Scalar, class ExecutionSpace > -struct functor_team_vector_reduce_join { +struct functor_team_vector_reduce_reducer { typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - functor_team_vector_reduce_join( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + functor_team_vector_reduce_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } @@ -438,8 +459,7 @@ struct functor_team_vector_reduce_join { { val += i - team.league_rank() + team.league_size() + team.team_size(); }, - [] ( volatile Scalar & val, const volatile Scalar & src ) { val += src; }, - value + Kokkos::Experimental::Sum<Scalar>(value) ); team.team_barrier(); @@ -453,7 +473,7 @@ struct functor_team_vector_reduce_join { } if ( test != value ) { - printf( "FAILED team_vector_parallel_reduce_join %i %i %f %f\n", + printf( "FAILED team_vector_parallel_reduce_reducer %i %i %f %f\n", team.league_rank(), team.team_rank(), static_cast<double>( test ), static_cast<double>( value ) ); @@ -590,13 +610,13 @@ struct functor_vec_red { }; template< typename Scalar, class ExecutionSpace > -struct functor_vec_red_join { +struct functor_vec_red_reducer { typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - functor_vec_red_join( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} + functor_vec_red_reducer( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} KOKKOS_INLINE_FUNCTION void operator()( typename policy_type::member_type team ) const { @@ -608,9 +628,7 @@ struct functor_vec_red_join { Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i, Scalar & val ) { val *= ( i % 5 + 1 ); - }, - [&] ( Scalar & val, const Scalar & src ) { val *= src; }, - value + }, Kokkos::Experimental::Prod<Scalar>(value) ); Kokkos::single( Kokkos::PerThread( team ), [&] () @@ -620,7 +638,7 @@ struct functor_vec_red_join { for ( int i = 0; i < 13; i++ ) test *= ( i % 5 + 1 ); if ( test != value ) { - printf( "FAILED vector_par_reduce_join %i %i %f %f\n", + printf( "FAILED vector_par_reduce_reducer %i %i %f %f\n", team.league_rank(), team.team_rank(), (double) test, (double) value ); flag() = 1; @@ -685,8 +703,13 @@ bool test_scalar( int nteams, int team_size, int test ) { functor_vec_red< Scalar, ExecutionSpace >( d_flag ) ); } else if ( test == 1 ) { + #if defined(KOKKOS_ENABLE_CUDA) + #if defined(KOKKOS_CUDA_CLANG_WORKAROUND) || defined(KOKKOS_ARCH_PASCAL) + if(!std::is_same<ExecutionSpace,Kokkos::Cuda>::value) + #endif + #endif Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), - functor_vec_red_join< Scalar, ExecutionSpace >( d_flag ) ); + functor_vec_red_reducer< Scalar, ExecutionSpace >( d_flag ) ); } else if ( test == 2 ) { Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), @@ -710,7 +733,7 @@ bool test_scalar( int nteams, int team_size, int test ) { } else if ( test == 7 ) { Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ), - functor_team_reduce_join< Scalar, ExecutionSpace >( d_flag ) ); + functor_team_reduce_reducer< Scalar, ExecutionSpace >( d_flag ) ); } else if ( test == 8 ) { Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), @@ -722,7 +745,7 @@ bool test_scalar( int nteams, int team_size, int test ) { } else if ( test == 10 ) { Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), - functor_team_vector_reduce_join< Scalar, ExecutionSpace >( d_flag ) ); + functor_team_vector_reduce_reducer< Scalar, ExecutionSpace >( d_flag ) ); } Kokkos::deep_copy( h_flag, d_flag ); @@ -743,3 +766,129 @@ bool Test( int test ) { } } // namespace TestTeamVector + +namespace Test { + +// Computes y^T*A*x +// ( modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) + +#if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || (defined( KOKKOS_ENABLE_CUDA_LAMBDA ) && (8000 <= CUDA_VERSION)) + +template< typename ScalarType, class DeviceType > +class TestTripleNestedReduce +{ +public: + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + + TestTripleNestedReduce( const size_type & nrows, const size_type & ncols + , const size_type & team_size, const size_type & vector_length ) + { + run_test( nrows, ncols, team_size, vector_length ); + } + + void run_test( const size_type & nrows, const size_type & ncols + , const size_type & team_size, const size_type & vector_length ) + { + //typedef Kokkos::LayoutLeft Layout; + typedef Kokkos::LayoutRight Layout; + + typedef Kokkos::View< ScalarType*, DeviceType > ViewVector; + typedef Kokkos::View< ScalarType**, Layout, DeviceType > ViewMatrix; + + ViewVector y( "y", nrows ); + ViewVector x( "x", ncols ); + ViewMatrix A( "A", nrows, ncols ); + + typedef Kokkos::RangePolicy<DeviceType> range_policy; + + // Initialize y vector. + Kokkos::parallel_for( range_policy( 0, nrows ), KOKKOS_LAMBDA ( const int i ) { y( i ) = 1; } ); + + // Initialize x vector. + Kokkos::parallel_for( range_policy( 0, ncols ), KOKKOS_LAMBDA ( const int i ) { x( i ) = 1; } ); + + typedef Kokkos::TeamPolicy< DeviceType > team_policy; + typedef typename Kokkos::TeamPolicy< DeviceType >::member_type member_type; + + // Initialize A matrix, note 2D indexing computation. + Kokkos::parallel_for( team_policy( nrows, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type & teamMember ) { + const int j = teamMember.league_rank(); + Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, ncols ), [&] ( const int i ) { + A( j, i ) = 1; + } ); + } ); + + // Three level parallelism kernel to force caching of vector x. + ScalarType result = 0.0; + int chunk_size = 128; + Kokkos::parallel_reduce( team_policy( nrows / chunk_size, team_size, vector_length ), + KOKKOS_LAMBDA ( const member_type & teamMember, double & update ) { + const int row_start = teamMember.league_rank() * chunk_size; + const int row_end = row_start + chunk_size; + Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, row_start, row_end ), [&] ( const int i ) { + ScalarType sum_i = 0.0; + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( teamMember, ncols ), [&] ( const int j, ScalarType &innerUpdate ) { + innerUpdate += A( i, j ) * x( j ); + }, sum_i ); + Kokkos::single( Kokkos::PerThread( teamMember ), [&] () { + update += y( i ) * sum_i; + } ); + } ); + }, result ); + + const ScalarType solution = (ScalarType) nrows * (ScalarType) ncols; + ASSERT_EQ( solution, result ); + } +}; + +#else // #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) + +template< typename ScalarType, class DeviceType > +class TestTripleNestedReduce +{ +public: + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + + TestTripleNestedReduce( const size_type &, const size_type + , const size_type &, const size_type ) + {} +}; + +#endif + +#if !defined(KOKKOS_CUDA_CLANG_WORKAROUND) +TEST_F( TEST_CATEGORY, team_vector ) +{ + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< TEST_EXECSPACE >( 10 ) ) ); +} +#endif + +#ifdef KOKKOS_COMPILER_GNU +#if ( KOKKOS_COMPILER_GNU == 472 ) +#define SKIP_TEST +#endif +#endif + +#if !defined(KOKKOS_CUDA_CLANG_WORKAROUND) +#ifndef SKIP_TEST +TEST_F( TEST_CATEGORY, triple_nested_parallelism ) +{ + TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, TEST_EXECSPACE >( 8192, 2048, 16, 16 ); +} +#endif +#endif +} diff --git a/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp b/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp index 7bcf3f8a32691ee8a27bac5ed997ed68c6c39082..6501f16ca50f30e20a7838400d2396fbcf851c39 100644 --- a/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp +++ b/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp @@ -206,3 +206,11 @@ void TestTemplateMetaFunctions() { } } // namespace + +namespace Test { +TEST_F( TEST_CATEGORY, template_meta_functions ) +{ + TestTemplateMetaFunctions< int, TEST_EXECSPACE >(); +} +} + diff --git a/lib/kokkos/core/unit_test/TestTile.hpp b/lib/kokkos/core/unit_test/TestTile.hpp index 7d096c24c38ee82a6930ed192858e538e345dc29..8f57dfea75fd362d0fc9669820e19c92bbc9ff74 100644 --- a/lib/kokkos/core/unit_test/TestTile.hpp +++ b/lib/kokkos/core/unit_test/TestTile.hpp @@ -139,4 +139,31 @@ void test( const size_t dim0, const size_t dim1 ) } // namespace TestTile +namespace Test { +TEST_F( TEST_CATEGORY, tile_layout ) +{ + TestTile::test< TEST_EXECSPACE, 1, 1 >( 1, 1 ); + TestTile::test< TEST_EXECSPACE, 1, 1 >( 2, 3 ); + TestTile::test< TEST_EXECSPACE, 1, 1 >( 9, 10 ); + + TestTile::test< TEST_EXECSPACE, 2, 2 >( 1, 1 ); + TestTile::test< TEST_EXECSPACE, 2, 2 >( 2, 3 ); + TestTile::test< TEST_EXECSPACE, 2, 2 >( 4, 4 ); + TestTile::test< TEST_EXECSPACE, 2, 2 >( 9, 9 ); + + TestTile::test< TEST_EXECSPACE, 2, 4 >( 9, 9 ); + TestTile::test< TEST_EXECSPACE, 4, 2 >( 9, 9 ); + + TestTile::test< TEST_EXECSPACE, 4, 4 >( 1, 1 ); + TestTile::test< TEST_EXECSPACE, 4, 4 >( 4, 4 ); + TestTile::test< TEST_EXECSPACE, 4, 4 >( 9, 9 ); + TestTile::test< TEST_EXECSPACE, 4, 4 >( 9, 11 ); + + TestTile::test< TEST_EXECSPACE, 8, 8 >( 1, 1 ); + TestTile::test< TEST_EXECSPACE, 8, 8 >( 4, 4 ); + TestTile::test< TEST_EXECSPACE, 8, 8 >( 9, 9 ); + TestTile::test< TEST_EXECSPACE, 8, 8 >( 9, 11 ); +} + +} #endif //TEST_TILE_HPP diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp index cbf86dc58c78fb44442d08497874a667f3923efb..232163f11e1f86b13ce986837c5d17ffa4431460 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp @@ -68,8 +68,8 @@ struct TestViewOperator { typedef typename DeviceType::execution_space execution_space; - static const unsigned N = 100; - static const unsigned D = 3; + enum { N = 1000 }; + enum { D = 3 }; typedef Kokkos::View< T*[D], execution_space > view_type; @@ -81,11 +81,6 @@ struct TestViewOperator , v2( "v2", N ) {} - static void testit() - { - Kokkos::parallel_for( N, TestViewOperator() ); - } - KOKKOS_INLINE_FUNCTION void operator()( const unsigned i ) const { @@ -144,13 +139,11 @@ struct TestViewOperator_LeftAndRight< DataType, DeviceType, 8 > , right_alloc( allocation_count( right ) ) {} - static void testit() + void testit() { - TestViewOperator_LeftAndRight driver; - int error_flag = 0; - Kokkos::parallel_reduce( 1, driver, error_flag ); + Kokkos::parallel_reduce( 1, *this, error_flag ); ASSERT_EQ( error_flag, 0 ); } @@ -237,13 +230,12 @@ struct TestViewOperator_LeftAndRight< DataType, DeviceType, 7 > , right_alloc( allocation_count( right ) ) {} - static void testit() + void testit() { - TestViewOperator_LeftAndRight driver; int error_flag = 0; - Kokkos::parallel_reduce( 1, driver, error_flag ); + Kokkos::parallel_reduce( 1, *this, error_flag ); ASSERT_EQ( error_flag, 0 ); } @@ -318,13 +310,12 @@ struct TestViewOperator_LeftAndRight< DataType, DeviceType, 6 > , right_alloc( allocation_count( right ) ) {} - static void testit() + void testit() { - TestViewOperator_LeftAndRight driver; int error_flag = 0; - Kokkos::parallel_reduce( 1, driver, error_flag ); + Kokkos::parallel_reduce( 1, *this, error_flag ); ASSERT_EQ( error_flag, 0 ); } @@ -402,13 +393,12 @@ struct TestViewOperator_LeftAndRight< DataType, DeviceType, 5 > , right_alloc( allocation_count( right ) ) {} - static void testit() + void testit() { - TestViewOperator_LeftAndRight driver; int error_flag = 0; - Kokkos::parallel_reduce( 1, driver, error_flag ); + Kokkos::parallel_reduce( 1, *this, error_flag ); ASSERT_EQ( error_flag, 0 ); } @@ -485,13 +475,12 @@ struct TestViewOperator_LeftAndRight< DataType, DeviceType, 4 > , right_alloc( allocation_count( right ) ) {} - static void testit() + void testit() { - TestViewOperator_LeftAndRight driver; int error_flag = 0; - Kokkos::parallel_reduce( 1, driver, error_flag ); + Kokkos::parallel_reduce( 1, *this, error_flag ); ASSERT_EQ( error_flag, 0 ); } @@ -565,13 +554,12 @@ struct TestViewOperator_LeftAndRight< DataType, DeviceType, 3 > , right_alloc( allocation_count( right ) ) {} - static void testit() + void testit() { - TestViewOperator_LeftAndRight driver; int error_flag = 0; - Kokkos::parallel_reduce( 1, driver, error_flag ); + Kokkos::parallel_reduce( 1, *this, error_flag ); ASSERT_EQ( error_flag, 0 ); } @@ -650,13 +638,12 @@ struct TestViewOperator_LeftAndRight< DataType, DeviceType, 2 > , right_alloc( allocation_count( right ) ) {} - static void testit() + void testit() { - TestViewOperator_LeftAndRight driver; int error_flag = 0; - Kokkos::parallel_reduce( 1, driver, error_flag ); + Kokkos::parallel_reduce( 1, *this, error_flag ); ASSERT_EQ( error_flag, 0 ); } @@ -733,13 +720,13 @@ struct TestViewOperator_LeftAndRight< DataType, DeviceType, 1 > , right_alloc( allocation_count( right ) ) {} - static void testit() + void testit() { TestViewOperator_LeftAndRight driver; int error_flag = 0; - Kokkos::parallel_reduce( 1, driver, error_flag ); + Kokkos::parallel_reduce( 1, *this, error_flag ); ASSERT_EQ( error_flag, 0 ); } @@ -839,15 +826,18 @@ public: run_test_subview_strided(); run_test_vector(); - TestViewOperator< T, device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2][3], device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2], device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3][4], device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3], device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2], device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4], device >::testit(); - TestViewOperator_LeftAndRight< int[2][3], device >::testit(); - TestViewOperator_LeftAndRight< int[2], device >::testit(); + + {TestViewOperator< T, device > f; Kokkos::parallel_for(int(N0),f);} +#ifndef KOKKOS_ENABLE_OPENMPTARGET + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2][3], device > f8; f8.testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2], device > f7; f7.testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4], device >f6; f6.testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3], device >f5; f5.testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2], device >f4; f4.testit(); + TestViewOperator_LeftAndRight< int[2][3][4], device >f3; f3.testit(); + TestViewOperator_LeftAndRight< int[2][3], device >f2; f2.testit(); + TestViewOperator_LeftAndRight< int[2], device >f1; f1.testit(); +#endif TestViewMirror< Kokkos::LayoutLeft, device >::testit(); TestViewMirror< Kokkos::LayoutRight, device >::testit(); } @@ -883,8 +873,9 @@ public: Kokkos::deep_copy( dx, hx ); Kokkos::deep_copy( dy, dx ); Kokkos::deep_copy( hy, dy ); - +#ifndef KOKKOS_ENABLE_OPENMPTARGET ASSERT_EQ( hx(), hy() ); +#endif } static void run_test() @@ -1005,7 +996,9 @@ public: ASSERT_EQ( dy.dimension_3(), unsigned( N3 ) ); ASSERT_EQ( unmanaged_from_ptr_dx.capacity(), unsigned( N0 ) * unsigned( N1 ) * unsigned( N2 ) * unsigned( N3 ) ); - +#ifdef KOKKOS_ENABLE_OPENMPTARGET +return; +#endif hx = Kokkos::create_mirror( dx ); hy = Kokkos::create_mirror( dy ); @@ -1319,4 +1312,67 @@ public: } }; +TEST_F( TEST_CATEGORY, view_api ) +{ + TestViewAPI< double, TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_remap ) +{ + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; + + #ifdef KOKKOS_ENABLE_CUDA + #define EXECSPACE std::conditional<std::is_same<TEST_EXECSPACE,Kokkos::Cuda>::value,Kokkos::CudaHostPinnedSpace,TEST_EXECSPACE>::type + #else + #ifdef KOKKOS_ENABLE_OPENMPTARGET + #define EXECSPACE Kokkos::HostSpace + #else + #define EXECSPACE TEST_EXECSPACE + #endif + #endif + + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + EXECSPACE > output_type; + + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + EXECSPACE > input_type; + + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + EXECSPACE > diff_type; + + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); + + Kokkos::fence(); + int value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } + + Kokkos::fence(); + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + Kokkos::fence(); + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } +} + } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewMapping.hpp b/lib/kokkos/core/unit_test/TestViewMapping_a.hpp similarity index 77% rename from lib/kokkos/core/unit_test/TestViewMapping.hpp rename to lib/kokkos/core/unit_test/TestViewMapping_a.hpp index 71604bed51d93e374c8de9776bb24d2135c95182..6830c2e049bac5badc2ce8923026b25ba8e1f171 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping_a.hpp @@ -1030,163 +1030,10 @@ void test_view_mapping() } } -template< class Space > -struct TestViewMappingSubview -{ - typedef typename Space::execution_space ExecSpace; - typedef typename Space::memory_space MemSpace; - - typedef Kokkos::pair< int, int > range; - - enum { AN = 10 }; - typedef Kokkos::View< int*, ExecSpace > AT; - typedef Kokkos::View< const int*, ExecSpace > ACT; - typedef Kokkos::Subview< AT, range > AS; - - enum { BN0 = 10, BN1 = 11, BN2 = 12 }; - typedef Kokkos::View< int***, ExecSpace > BT; - typedef Kokkos::Subview< BT, range, range, range > BS; - - enum { CN0 = 10, CN1 = 11, CN2 = 12 }; - typedef Kokkos::View< int***[13][14], ExecSpace > CT; - typedef Kokkos::Subview< CT, range, range, range, int, int > CS; - - enum { DN0 = 10, DN1 = 11, DN2 = 12, DN3 = 13, DN4 = 14 }; - typedef Kokkos::View< int***[DN3][DN4], ExecSpace > DT; - typedef Kokkos::Subview< DT, int, range, range, range, int > DS; - - typedef Kokkos::View< int***[13][14], Kokkos::LayoutLeft, ExecSpace > DLT; - typedef Kokkos::Subview< DLT, range, int, int, int, int > DLS1; - - static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout, Kokkos::LayoutLeft >::value - , "Subview layout error for rank 1 subview of left-most range of LayoutLeft" ); - - typedef Kokkos::View< int***[13][14], Kokkos::LayoutRight, ExecSpace > DRT; - typedef Kokkos::Subview< DRT, int, int, int, int, range > DRS1; - - static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout, Kokkos::LayoutRight >::value - , "Subview layout error for rank 1 subview of right-most range of LayoutRight" ); - - AT Aa; - AS Ab; - ACT Ac; - BT Ba; - BS Bb; - CT Ca; - CS Cb; - DT Da; - DS Db; - - TestViewMappingSubview() - : Aa( "Aa", AN ) - , Ab( Kokkos::Experimental::subview( Aa, std::pair< int, int >( 1, AN - 1 ) ) ) - , Ac( Aa, std::pair< int, int >( 1, AN - 1 ) ) - , Ba( "Ba", BN0, BN1, BN2 ) - , Bb( Kokkos::Experimental::subview( Ba - , std::pair< int, int >( 1, BN0 - 1 ) - , std::pair< int, int >( 1, BN1 - 1 ) - , std::pair< int, int >( 1, BN2 - 1 ) - ) ) - , Ca( "Ca", CN0, CN1, CN2 ) - , Cb( Kokkos::Experimental::subview( Ca - , std::pair< int, int >( 1, CN0 - 1 ) - , std::pair< int, int >( 1, CN1 - 1 ) - , std::pair< int, int >( 1, CN2 - 1 ) - , 1 - , 2 - ) ) - , Da( "Da", DN0, DN1, DN2 ) - , Db( Kokkos::Experimental::subview( Da - , 1 - , std::pair< int, int >( 1, DN1 - 1 ) - , std::pair< int, int >( 1, DN2 - 1 ) - , std::pair< int, int >( 1, DN3 - 1 ) - , 2 - ) ) - {} - - KOKKOS_INLINE_FUNCTION - void operator()( const int, long & error_count ) const - { - auto Ad = Kokkos::Experimental::subview< Kokkos::MemoryUnmanaged >( Aa, Kokkos::pair< int, int >( 1, AN - 1 ) ); - - for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ab[i - 1] ) ++error_count; - for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ac[i - 1] ) ++error_count; - for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ad[i - 1] ) ++error_count; - - for ( int i2 = 1; i2 < BN2 - 1; ++i2 ) - for ( int i1 = 1; i1 < BN1 - 1; ++i1 ) - for ( int i0 = 1; i0 < BN0 - 1; ++i0 ) - { - if ( & Ba( i0, i1, i2 ) != & Bb( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; - } - - for ( int i2 = 1; i2 < CN2 - 1; ++i2 ) - for ( int i1 = 1; i1 < CN1 - 1; ++i1 ) - for ( int i0 = 1; i0 < CN0 - 1; ++i0 ) - { - if ( & Ca( i0, i1, i2, 1, 2 ) != & Cb( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; - } - - for ( int i2 = 1; i2 < DN3 - 1; ++i2 ) - for ( int i1 = 1; i1 < DN2 - 1; ++i1 ) - for ( int i0 = 1; i0 < DN1 - 1; ++i0 ) - { - if ( & Da( 1, i0, i1, i2, 2 ) != & Db( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; - } - } - - static void run() - { - TestViewMappingSubview self; - - ASSERT_EQ( self.Aa.dimension_0(), AN ); - ASSERT_EQ( self.Ab.dimension_0(), AN - 2 ); - ASSERT_EQ( self.Ac.dimension_0(), AN - 2 ); - ASSERT_EQ( self.Ba.dimension_0(), BN0 ); - ASSERT_EQ( self.Ba.dimension_1(), BN1 ); - ASSERT_EQ( self.Ba.dimension_2(), BN2 ); - ASSERT_EQ( self.Bb.dimension_0(), BN0 - 2 ); - ASSERT_EQ( self.Bb.dimension_1(), BN1 - 2 ); - ASSERT_EQ( self.Bb.dimension_2(), BN2 - 2 ); - - ASSERT_EQ( self.Ca.dimension_0(), CN0 ); - ASSERT_EQ( self.Ca.dimension_1(), CN1 ); - ASSERT_EQ( self.Ca.dimension_2(), CN2 ); - ASSERT_EQ( self.Ca.dimension_3(), 13 ); - ASSERT_EQ( self.Ca.dimension_4(), 14 ); - ASSERT_EQ( self.Cb.dimension_0(), CN0 - 2 ); - ASSERT_EQ( self.Cb.dimension_1(), CN1 - 2 ); - ASSERT_EQ( self.Cb.dimension_2(), CN2 - 2 ); - - ASSERT_EQ( self.Da.dimension_0(), DN0 ); - ASSERT_EQ( self.Da.dimension_1(), DN1 ); - ASSERT_EQ( self.Da.dimension_2(), DN2 ); - ASSERT_EQ( self.Da.dimension_3(), DN3 ); - ASSERT_EQ( self.Da.dimension_4(), DN4 ); - - ASSERT_EQ( self.Db.dimension_0(), DN1 - 2 ); - ASSERT_EQ( self.Db.dimension_1(), DN2 - 2 ); - ASSERT_EQ( self.Db.dimension_2(), DN3 - 2 ); - - ASSERT_EQ( self.Da.stride_1(), self.Db.stride_0() ); - ASSERT_EQ( self.Da.stride_2(), self.Db.stride_1() ); - ASSERT_EQ( self.Da.stride_3(), self.Db.stride_2() ); - - long error_count = -1; - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, 1 ), self, error_count ); - ASSERT_EQ( error_count, 0 ); - } -}; - -template< class Space > -void test_view_mapping_subview() +TEST_F( TEST_CATEGORY , view_mapping ) { - typedef typename Space::execution_space ExecSpace; - - TestViewMappingSubview< ExecSpace >::run(); + test_view_mapping< TEST_EXECSPACE >(); } - /*--------------------------------------------------------------------------*/ template< class ViewType > @@ -1268,45 +1115,43 @@ struct TestViewMapOperator { } } - constexpr static size_t N0 = 10; - constexpr static size_t N1 = 9; - constexpr static size_t N2 = 8; - constexpr static size_t N3 = 7; - constexpr static size_t N4 = 6; - constexpr static size_t N5 = 5; - constexpr static size_t N6 = 4; - constexpr static size_t N7 = 3; + enum { N0 = 10 }; + enum { N1 = 9 }; + enum { N2 = 8 }; + enum { N3 = 7 }; + enum { N4 = 6 }; + enum { N5 = 5 }; + enum { N6 = 4 }; + enum { N7 = 3 }; TestViewMapOperator() : v( "Test", N0, N1, N2, N3, N4, N5, N6, N7 ) {} - static void run() + void run() { - TestViewMapOperator self; - - ASSERT_EQ( self.v.dimension_0(), ( 0 < ViewType::rank ? N0 : 1 ) ); - ASSERT_EQ( self.v.dimension_1(), ( 1 < ViewType::rank ? N1 : 1 ) ); - ASSERT_EQ( self.v.dimension_2(), ( 2 < ViewType::rank ? N2 : 1 ) ); - ASSERT_EQ( self.v.dimension_3(), ( 3 < ViewType::rank ? N3 : 1 ) ); - ASSERT_EQ( self.v.dimension_4(), ( 4 < ViewType::rank ? N4 : 1 ) ); - ASSERT_EQ( self.v.dimension_5(), ( 5 < ViewType::rank ? N5 : 1 ) ); - ASSERT_EQ( self.v.dimension_6(), ( 6 < ViewType::rank ? N6 : 1 ) ); - ASSERT_EQ( self.v.dimension_7(), ( 7 < ViewType::rank ? N7 : 1 ) ); - - ASSERT_LE( self.v.dimension_0() * - self.v.dimension_1() * - self.v.dimension_2() * - self.v.dimension_3() * - self.v.dimension_4() * - self.v.dimension_5() * - self.v.dimension_6() * - self.v.dimension_7() - , self.v.span() ); + ASSERT_EQ( v.dimension_0(), ( 0 < ViewType::rank ? TestViewMapOperator<ViewType>::N0 : 1 ) ); + ASSERT_EQ( v.dimension_1(), ( 1 < ViewType::rank ? TestViewMapOperator<ViewType>::N1 : 1 ) ); + ASSERT_EQ( v.dimension_2(), ( 2 < ViewType::rank ? TestViewMapOperator<ViewType>::N2 : 1 ) ); + ASSERT_EQ( v.dimension_3(), ( 3 < ViewType::rank ? TestViewMapOperator<ViewType>::N3 : 1 ) ); + ASSERT_EQ( v.dimension_4(), ( 4 < ViewType::rank ? TestViewMapOperator<ViewType>::N4 : 1 ) ); + ASSERT_EQ( v.dimension_5(), ( 5 < ViewType::rank ? TestViewMapOperator<ViewType>::N5 : 1 ) ); + ASSERT_EQ( v.dimension_6(), ( 6 < ViewType::rank ? TestViewMapOperator<ViewType>::N6 : 1 ) ); + ASSERT_EQ( v.dimension_7(), ( 7 < ViewType::rank ? TestViewMapOperator<ViewType>::N7 : 1 ) ); + + ASSERT_LE( v.dimension_0() * + v.dimension_1() * + v.dimension_2() * + v.dimension_3() * + v.dimension_4() * + v.dimension_5() * + v.dimension_6() * + v.dimension_7() + , v.span() ); long error_count; - Kokkos::RangePolicy< typename ViewType::execution_space > range( 0, self.v.dimension_0() ); - Kokkos::parallel_reduce( range, self, error_count ); + Kokkos::RangePolicy< typename ViewType::execution_space > range( 0, v.dimension_0() ); + Kokkos::parallel_reduce( range, *this, error_count ); ASSERT_EQ( 0, error_count ); - } +} }; template< class Space > @@ -1314,150 +1159,29 @@ void test_view_mapping_operator() { typedef typename Space::execution_space ExecSpace; - TestViewMapOperator< Kokkos::View<int, Kokkos::LayoutLeft, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*, Kokkos::LayoutLeft, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int**, Kokkos::LayoutLeft, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int***, Kokkos::LayoutLeft, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int****, Kokkos::LayoutLeft, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*****, Kokkos::LayoutLeft, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int******, Kokkos::LayoutLeft, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*******, Kokkos::LayoutLeft, ExecSpace> >::run(); - - TestViewMapOperator< Kokkos::View<int, Kokkos::LayoutRight, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*, Kokkos::LayoutRight, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int**, Kokkos::LayoutRight, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int***, Kokkos::LayoutRight, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int****, Kokkos::LayoutRight, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*****, Kokkos::LayoutRight, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int******, Kokkos::LayoutRight, ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*******, Kokkos::LayoutRight, ExecSpace> >::run(); + { TestViewMapOperator< Kokkos::View<int, Kokkos::LayoutLeft, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int*, Kokkos::LayoutLeft, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int**, Kokkos::LayoutLeft, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int***, Kokkos::LayoutLeft, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int****, Kokkos::LayoutLeft, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int*****, Kokkos::LayoutLeft, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int******, Kokkos::LayoutLeft, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int*******, Kokkos::LayoutLeft, ExecSpace> > f; f.run(); } + + { TestViewMapOperator< Kokkos::View<int, Kokkos::LayoutRight, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int*, Kokkos::LayoutRight, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int**, Kokkos::LayoutRight, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int***, Kokkos::LayoutRight, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int****, Kokkos::LayoutRight, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int*****, Kokkos::LayoutRight, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int******, Kokkos::LayoutRight, ExecSpace> > f; f.run(); } + { TestViewMapOperator< Kokkos::View<int*******, Kokkos::LayoutRight, ExecSpace> > f; f.run(); } } -/*--------------------------------------------------------------------------*/ - -template< class Space > -struct TestViewMappingAtomic { - typedef typename Space::execution_space ExecSpace; - typedef typename Space::memory_space MemSpace; - - typedef Kokkos::MemoryTraits< Kokkos::Atomic > mem_trait; - - typedef Kokkos::View< int *, ExecSpace > T; - typedef Kokkos::View< int *, ExecSpace, mem_trait > T_atom; - - T x; - T_atom x_atom; - - constexpr static size_t N = 100000; - - struct TagInit {}; - struct TagUpdate {}; - struct TagVerify {}; - - KOKKOS_INLINE_FUNCTION - void operator()( const TagInit &, const int i ) const - { x( i ) = i; } - - KOKKOS_INLINE_FUNCTION - void operator()( const TagUpdate &, const int i ) const - { x_atom( i % 2 ) += 1; } - - KOKKOS_INLINE_FUNCTION - void operator()( const TagVerify &, const int i, long & error_count ) const - { - if ( i < 2 ) { if ( x( i ) != int( i + N / 2 ) ) ++error_count; } - else { if ( x( i ) != int( i ) ) ++error_count; } - } - - TestViewMappingAtomic() - : x( "x", N ) - , x_atom( x ) - {} - - static void run() - { - ASSERT_TRUE( T::reference_type_is_lvalue_reference ); - ASSERT_FALSE( T_atom::reference_type_is_lvalue_reference ); - - TestViewMappingAtomic self; - - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, TagInit >( 0, N ), self ); - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, TagUpdate >( 0, N ), self ); - - long error_count = -1; - - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TagVerify >( 0, N ), self, error_count ); - - ASSERT_EQ( 0, error_count ); - - typename TestViewMappingAtomic::T_atom::HostMirror x_host = Kokkos::create_mirror_view( self.x ); - Kokkos::deep_copy( x_host, self.x ); - - error_count = -1; - - Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::DefaultHostExecutionSpace, TagVerify >( 0, N ), - [=] ( const TagVerify &, const int i, long & tmp_error_count ) - { - if ( i < 2 ) { - if ( x_host( i ) != int( i + N / 2 ) ) ++tmp_error_count ; - } - else { - if ( x_host( i ) != int( i ) ) ++tmp_error_count ; - } - }, error_count); - - ASSERT_EQ( 0 , error_count ); - Kokkos::deep_copy( self.x, x_host ); - } -}; - -/*--------------------------------------------------------------------------*/ - -template< class Space > -struct TestViewMappingClassValue { - typedef typename Space::execution_space ExecSpace; - typedef typename Space::memory_space MemSpace; - - struct ValueType { - KOKKOS_INLINE_FUNCTION - ValueType() - { -#if 0 -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) - printf( "TestViewMappingClassValue construct on Cuda\n" ); -#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - printf( "TestViewMappingClassValue construct on Host\n" ); -#else - printf( "TestViewMappingClassValue construct unknown\n" ); -#endif -#endif - } - KOKKOS_INLINE_FUNCTION - ~ValueType() - { -#if 0 -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) - printf( "TestViewMappingClassValue destruct on Cuda\n" ); -#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - printf( "TestViewMappingClassValue destruct on Host\n" ); -#else - printf( "TestViewMappingClassValue destruct unknown\n" ); -#endif -#endif - } - }; - - static void run() - { - using namespace Kokkos::Experimental; +TEST_F( TEST_CATEGORY , view_mapping_operator ) +{ + test_view_mapping_operator< TEST_EXECSPACE >(); +} - ExecSpace::fence(); - { - View< ValueType, ExecSpace > a( "a" ); - ExecSpace::fence(); - } - ExecSpace::fence(); - } -}; +} -} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewMapping_b.hpp b/lib/kokkos/core/unit_test/TestViewMapping_b.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ee1c96b423e15185d3c6e4f33c9d4f9ad8e55709 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewMapping_b.hpp @@ -0,0 +1,186 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <stdexcept> +#include <sstream> +#include <iostream> + +#include <Kokkos_Core.hpp> + +namespace Test { + +/*--------------------------------------------------------------------------*/ + +template< class Space > +struct TestViewMappingAtomic { + typedef typename Space::execution_space ExecSpace; + typedef typename Space::memory_space MemSpace; + + typedef Kokkos::MemoryTraits< Kokkos::Atomic > mem_trait; + + typedef Kokkos::View< int *, ExecSpace > T; + typedef Kokkos::View< int *, ExecSpace, mem_trait > T_atom; + + T x; + T_atom x_atom; + + enum { N = 100000}; + + struct TagInit {}; + struct TagUpdate {}; + struct TagVerify {}; + + KOKKOS_INLINE_FUNCTION + void operator()( const TagInit &, const int i ) const + { x( i ) = i; } + + KOKKOS_INLINE_FUNCTION + void operator()( const TagUpdate &, const int i ) const + { x_atom( i % 2 ) += 1; } + + KOKKOS_INLINE_FUNCTION + void operator()( const TagVerify &, const int i, long & error_count ) const + { + if ( i < 2 ) { if ( x( i ) != int( i + N / 2 ) ) ++error_count; } + else { if ( x( i ) != int( i ) ) ++error_count; } + } + + TestViewMappingAtomic() + : x( "x", N ) + , x_atom( x ) + {} + + void run() { + + ASSERT_TRUE( T::reference_type_is_lvalue_reference ); + ASSERT_FALSE( T_atom::reference_type_is_lvalue_reference ); + + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, TagInit > ( 0, N ), *this ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, TagUpdate >( 0, N ), *this ); + + long error_count = -1; + + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TagVerify >( 0, N ), *this, error_count ); + + ASSERT_EQ( 0, error_count ); + + typename T_atom::HostMirror x_host = Kokkos::create_mirror_view( x ); + Kokkos::deep_copy( x_host, x ); + + error_count = -1; + + Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::DefaultHostExecutionSpace, TagVerify >( 0, N ), + [=] ( const TagVerify &, const int i, long & tmp_error_count ) + { + if ( i < 2 ) { + if ( x_host( i ) != int( i + N / 2 ) ) ++tmp_error_count ; + } + else { + if ( x_host( i ) != int( i ) ) ++tmp_error_count ; + } + }, error_count); + + ASSERT_EQ( 0 , error_count ); + Kokkos::deep_copy( x, x_host ); + } +}; + +TEST_F( TEST_CATEGORY , view_mapping_atomic ) +{ + TestViewMappingAtomic< TEST_EXECSPACE > f; + f.run(); +} + +/*--------------------------------------------------------------------------*/ +namespace Test { +struct ValueType { + KOKKOS_INLINE_FUNCTION + ValueType() + { +#if 0 +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) + printf( "TestViewMappingClassValue construct on Cuda\n" ); +#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + printf( "TestViewMappingClassValue construct on Host\n" ); +#else + printf( "TestViewMappingClassValue construct unknown\n" ); +#endif +#endif + } + KOKKOS_INLINE_FUNCTION + ~ValueType() + { +#if 0 +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) + printf( "TestViewMappingClassValue destruct on Cuda\n" ); +#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + printf( "TestViewMappingClassValue destruct on Host\n" ); +#else + printf( "TestViewMappingClassValue destruct unknown\n" ); +#endif +#endif + } + }; +} + +template< class Space > +void test_view_mapping_class_value() +{ + typedef typename Space::execution_space ExecSpace; + + ExecSpace::fence(); + { + Kokkos::View< Test::ValueType, ExecSpace > a( "a" ); + ExecSpace::fence(); + } + ExecSpace::fence(); +} + +TEST_F( TEST_CATEGORY , view_mapping_class_value ) +{ + test_view_mapping_class_value< TEST_EXECSPACE >(); +} + +} diff --git a/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp b/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp new file mode 100644 index 0000000000000000000000000000000000000000..219a4d1f2030c20ed1279b13f7f7ceb475d2e74f --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewMapping_subview.hpp @@ -0,0 +1,211 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <stdexcept> +#include <sstream> +#include <iostream> + +#include <Kokkos_Core.hpp> + +namespace Test { + +template< class Space > +struct TestViewMappingSubview +{ + typedef typename Space::execution_space ExecSpace; + typedef typename Space::memory_space MemSpace; + + typedef Kokkos::pair< int, int > range; + + enum { AN = 10 }; + typedef Kokkos::View< int*, ExecSpace > AT; + typedef Kokkos::View< const int*, ExecSpace > ACT; + typedef Kokkos::Subview< AT, range > AS; + + enum { BN0 = 10, BN1 = 11, BN2 = 12 }; + typedef Kokkos::View< int***, ExecSpace > BT; + typedef Kokkos::Subview< BT, range, range, range > BS; + + enum { CN0 = 10, CN1 = 11, CN2 = 12 }; + typedef Kokkos::View< int***[13][14], ExecSpace > CT; + typedef Kokkos::Subview< CT, range, range, range, int, int > CS; + + enum { DN0 = 10, DN1 = 11, DN2 = 12, DN3 = 13, DN4 = 14 }; + typedef Kokkos::View< int***[DN3][DN4], ExecSpace > DT; + typedef Kokkos::Subview< DT, int, range, range, range, int > DS; + + typedef Kokkos::View< int***[13][14], Kokkos::LayoutLeft, ExecSpace > DLT; + typedef Kokkos::Subview< DLT, range, int, int, int, int > DLS1; + + static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout, Kokkos::LayoutLeft >::value + , "Subview layout error for rank 1 subview of left-most range of LayoutLeft" ); + + typedef Kokkos::View< int***[13][14], Kokkos::LayoutRight, ExecSpace > DRT; + typedef Kokkos::Subview< DRT, int, int, int, int, range > DRS1; + + static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout, Kokkos::LayoutRight >::value + , "Subview layout error for rank 1 subview of right-most range of LayoutRight" ); + + AT Aa; + AS Ab; + ACT Ac; + BT Ba; + BS Bb; + CT Ca; + CS Cb; + DT Da; + DS Db; + + TestViewMappingSubview() + : Aa( "Aa", AN ) + , Ab( Kokkos::Experimental::subview( Aa, std::pair< int, int >( 1, AN - 1 ) ) ) + , Ac( Aa, std::pair< int, int >( 1, AN - 1 ) ) + , Ba( "Ba", BN0, BN1, BN2 ) + , Bb( Kokkos::Experimental::subview( Ba + , std::pair< int, int >( 1, BN0 - 1 ) + , std::pair< int, int >( 1, BN1 - 1 ) + , std::pair< int, int >( 1, BN2 - 1 ) + ) ) + , Ca( "Ca", CN0, CN1, CN2 ) + , Cb( Kokkos::Experimental::subview( Ca + , std::pair< int, int >( 1, CN0 - 1 ) + , std::pair< int, int >( 1, CN1 - 1 ) + , std::pair< int, int >( 1, CN2 - 1 ) + , 1 + , 2 + ) ) + , Da( "Da", DN0, DN1, DN2 ) + , Db( Kokkos::Experimental::subview( Da + , 1 + , std::pair< int, int >( 1, DN1 - 1 ) + , std::pair< int, int >( 1, DN2 - 1 ) + , std::pair< int, int >( 1, DN3 - 1 ) + , 2 + ) ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int, long & error_count ) const + { + auto Ad = Kokkos::Experimental::subview< Kokkos::MemoryUnmanaged >( Aa, Kokkos::pair< int, int >( 1, AN - 1 ) ); + + for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ab[i - 1] ) ++error_count; + for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ac[i - 1] ) ++error_count; + for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ad[i - 1] ) ++error_count; + + for ( int i2 = 1; i2 < BN2 - 1; ++i2 ) + for ( int i1 = 1; i1 < BN1 - 1; ++i1 ) + for ( int i0 = 1; i0 < BN0 - 1; ++i0 ) + { + if ( & Ba( i0, i1, i2 ) != & Bb( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; + } + + for ( int i2 = 1; i2 < CN2 - 1; ++i2 ) + for ( int i1 = 1; i1 < CN1 - 1; ++i1 ) + for ( int i0 = 1; i0 < CN0 - 1; ++i0 ) + { + if ( & Ca( i0, i1, i2, 1, 2 ) != & Cb( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; + } + + for ( int i2 = 1; i2 < DN3 - 1; ++i2 ) + for ( int i1 = 1; i1 < DN2 - 1; ++i1 ) + for ( int i0 = 1; i0 < DN1 - 1; ++i0 ) + { + if ( & Da( 1, i0, i1, i2, 2 ) != & Db( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; + } + } + + void run() + { + typedef typename Space::execution_space ExecSpace; + + TestViewMappingSubview< ExecSpace > self; + + ASSERT_EQ( Aa.dimension_0(), AN ); + ASSERT_EQ( Ab.dimension_0(), AN - 2 ); + ASSERT_EQ( Ac.dimension_0(), AN - 2 ); + ASSERT_EQ( Ba.dimension_0(), BN0 ); + ASSERT_EQ( Ba.dimension_1(), BN1 ); + ASSERT_EQ( Ba.dimension_2(), BN2 ); + ASSERT_EQ( Bb.dimension_0(), BN0 - 2 ); + ASSERT_EQ( Bb.dimension_1(), BN1 - 2 ); + ASSERT_EQ( Bb.dimension_2(), BN2 - 2 ); + + ASSERT_EQ( Ca.dimension_0(), CN0 ); + ASSERT_EQ( Ca.dimension_1(), CN1 ); + ASSERT_EQ( Ca.dimension_2(), CN2 ); + ASSERT_EQ( Ca.dimension_3(), 13 ); + ASSERT_EQ( Ca.dimension_4(), 14 ); + ASSERT_EQ( Cb.dimension_0(), CN0 - 2 ); + ASSERT_EQ( Cb.dimension_1(), CN1 - 2 ); + ASSERT_EQ( Cb.dimension_2(), CN2 - 2 ); + + ASSERT_EQ( Da.dimension_0(), DN0 ); + ASSERT_EQ( Da.dimension_1(), DN1 ); + ASSERT_EQ( Da.dimension_2(), DN2 ); + ASSERT_EQ( Da.dimension_3(), DN3 ); + ASSERT_EQ( Da.dimension_4(), DN4 ); + + ASSERT_EQ( Db.dimension_0(), DN1 - 2 ); + ASSERT_EQ( Db.dimension_1(), DN2 - 2 ); + ASSERT_EQ( Db.dimension_2(), DN3 - 2 ); + + ASSERT_EQ( Da.stride_1(), Db.stride_0() ); + ASSERT_EQ( Da.stride_2(), Db.stride_1() ); + ASSERT_EQ( Da.stride_3(), Db.stride_2() ); + + long error_count = -1; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, 1 ), *this, error_count ); + ASSERT_EQ( error_count, 0 ); + } +}; + +TEST_F( TEST_CATEGORY , view_mapping_subview ) +{ + TestViewMappingSubview< TEST_EXECSPACE > f; + f.run(); +} + +} diff --git a/lib/kokkos/core/unit_test/TestViewOfClass.hpp b/lib/kokkos/core/unit_test/TestViewOfClass.hpp index d624c5dda2034b04b5b1a427614f38186aa032d8..8576d3042ed822eedadeb763fe286520d1cc31d2 100644 --- a/lib/kokkos/core/unit_test/TestViewOfClass.hpp +++ b/lib/kokkos/core/unit_test/TestViewOfClass.hpp @@ -118,4 +118,9 @@ void view_nested_view() ASSERT_EQ( 0, host_tracking( 0 ) ); } +TEST_F( TEST_CATEGORY, view_nested_view ) +{ + view_nested_view< TEST_EXECSPACE >(); +} + } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp index 386301b45dbc9f9d6bb5770133d818a7eccba40e..e3a12e684e8b45688bf831ebd92f4d7074e236d2 100644 --- a/lib/kokkos/core/unit_test/TestViewSubview.hpp +++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp @@ -40,7 +40,8 @@ // ************************************************************************ //@HEADER */ - +#ifndef TESTVIEWSUBVIEW_HPP_ +#define TESTVIEWSUBVIEW_HPP_ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> @@ -122,6 +123,7 @@ void test_auto_1d () fill_2D< mv_type, Space > f1( X, ONE ); Kokkos::parallel_for( X.dimension_0(), f1 ); + Kokkos::fence(); Kokkos::deep_copy( X_h, X ); for ( size_type j = 0; j < numCols; ++j ) { for ( size_type i = 0; i < numRows; ++i ) { @@ -131,6 +133,7 @@ void test_auto_1d () fill_2D< mv_type, Space > f2( X, 0.0 ); Kokkos::parallel_for( X.dimension_0(), f2 ); + Kokkos::fence(); Kokkos::deep_copy( X_h, X ); for ( size_type j = 0; j < numCols; ++j ) { for ( size_type i = 0; i < numRows; ++i ) { @@ -140,6 +143,7 @@ void test_auto_1d () fill_2D< mv_type, Space > f3( X, TWO ); Kokkos::parallel_for( X.dimension_0(), f3 ); + Kokkos::fence(); Kokkos::deep_copy( X_h, X ); for ( size_type j = 0; j < numCols; ++j ) { for ( size_type i = 0; i < numRows; ++i ) { @@ -152,6 +156,7 @@ void test_auto_1d () fill_1D< decltype( X_j ), Space > f4( X_j, ZERO ); Kokkos::parallel_for( X_j.dimension_0(), f4 ); + Kokkos::fence(); Kokkos::deep_copy( X_h, X ); for ( size_type i = 0; i < numRows; ++i ) { ASSERT_TRUE( X_h( i, j ) == ZERO ); @@ -161,6 +166,7 @@ void test_auto_1d () auto X_jj = Kokkos::subview ( X, Kokkos::ALL, jj ); fill_1D< decltype( X_jj ), Space > f5( X_jj, ONE ); Kokkos::parallel_for( X_jj.dimension_0(), f5 ); + Kokkos::fence(); Kokkos::deep_copy( X_h, X ); for ( size_type i = 0; i < numRows; ++i ) { ASSERT_TRUE( X_h( i, jj ) == ONE ); @@ -1289,3 +1295,4 @@ void test_layoutright_to_layoutright() { } } // namespace TestViewSubview +#endif diff --git a/lib/kokkos/core/unit_test/UnitTestMainInit.cpp b/lib/kokkos/core/unit_test/UnitTestMainInit.cpp new file mode 100644 index 0000000000000000000000000000000000000000..21f851274b81c3c90f3d83aea82a2bd10f08aafd --- /dev/null +++ b/lib/kokkos/core/unit_test/UnitTestMainInit.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> +#include <Kokkos_Core.hpp> + +int main( int argc, char *argv[] ) { + Kokkos::initialize(argc,argv); + ::testing::InitGoogleTest( &argc, argv ); + + int result = RUN_ALL_TESTS(); + Kokkos::finalize(); + return result; +} diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_Category.hpp similarity index 86% rename from lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp rename to lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_Category.hpp index be0c4c5715eeba492112e9a83dbc3cba09796d98..8c69933e2bf546338726b2e7504d0a28c8bc6c26 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_Category.hpp @@ -41,20 +41,25 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP + +#include <gtest/gtest.h> namespace Test { -TEST_F( cuda, impl_view_mapping_a ) -{ - test_view_mapping< Kokkos::CudaSpace >(); - test_view_mapping_operator< Kokkos::CudaSpace >(); -} +class cuda_hostpinned : public ::testing::Test { +protected: + static void SetUpTestCase() { + } -TEST_F( cuda, view_of_class ) -{ - TestViewMappingClassValue< Kokkos::CudaSpace >::run(); - TestViewMappingClassValue< Kokkos::CudaUVMSpace >::run(); -} + static void TearDownTestCase() { + } +}; } // namespace Test + +#define TEST_CATEGORY cuda_hostpinned +#define TEST_EXECSPACE Kokkos::CudaHostPinnedSpace + +#endif diff --git a/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_SharedAlloc.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_SharedAlloc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6e2f7dd597b8ba7e24c0fb7130d4c49af339b317 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_SharedAlloc.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestSharedAlloc.hpp> + +namespace Test { + + +TEST_F( TEST_CATEGORY, impl_shared_alloc ) +{ + test_shared_alloc< TEST_EXECSPACE, Kokkos::DefaultHostExecutionSpace >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cae49fd3ef1a520678c1686585554c6ca563c665 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestViewAPI.hpp> diff --git a/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0af34336b80c3481810a33eb6d448b120d6fb82f --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_a.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestViewMapping_a.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cf8d3c2b975bfca0f35514c02345d93abc452e3f --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_b.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestViewMapping_b.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_subview.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_subview.cpp new file mode 100644 index 0000000000000000000000000000000000000000..207d162d7e28d13271c14a06e6aed8df646858a0 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_subview.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaHostPinned_Category.hpp> +#include <TestViewMapping_subview.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_Category.hpp similarity index 86% rename from lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp rename to lib/kokkos/core/unit_test/cuda/TestCudaUVM_Category.hpp index 01eed4e023447acb953c27ce2e8aa2ab18d155a4..15203588d1b91e910f70f576d52b995e0be251e3 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_Category.hpp @@ -41,16 +41,25 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP + +#include <gtest/gtest.h> namespace Test { -TEST_F( cuda, reducers ) -{ - TestReducers< int, Kokkos::Cuda >::execute_integer(); - TestReducers< size_t, Kokkos::Cuda >::execute_integer(); - TestReducers< double, Kokkos::Cuda >::execute_float(); - TestReducers< Kokkos::complex<double>, Kokkos::Cuda >::execute_basic(); -} +class cuda_uvm : public ::testing::Test { +protected: + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + } +}; } // namespace Test + +#define TEST_CATEGORY cuda_uvm +#define TEST_EXECSPACE Kokkos::CudaUVMSpace + +#endif diff --git a/lib/kokkos/core/unit_test/cuda/TestCudaUVM_SharedAlloc.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_SharedAlloc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9aab558042e5f7799f81256f34fb6aa4abfd62d7 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_SharedAlloc.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestSharedAlloc.hpp> + +namespace Test { + + +TEST_F( TEST_CATEGORY, impl_shared_alloc ) +{ + test_shared_alloc< TEST_EXECSPACE, Kokkos::DefaultHostExecutionSpace >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a4e5528a15f0157b19a716c335399c669943b356 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewAPI.hpp> diff --git a/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..035a8a3d3b0b088124ea001d4ecf4e268c3396a7 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_a.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewMapping_a.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..776a3acfca8ed72cc9945f040ed98265d97f3bff --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_b.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewMapping_b.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_subview.cpp b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_subview.cpp new file mode 100644 index 0000000000000000000000000000000000000000..969d9a42176e452e41aced38610892cb4c953335 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_subview.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewMapping_subview.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_AtomicOperations.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_AtomicOperations.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0922c7991582e80a13c1c5aa2fa7e1fd497cd1ac --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_AtomicOperations.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<cuda/TestCuda_Category.hpp> +#include<TestAtomicOperations.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_AtomicViews.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_AtomicViews.cpp new file mode 100644 index 0000000000000000000000000000000000000000..999dab9a06238487620fed4be5de444af81ae521 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_AtomicViews.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<cuda/TestCuda_Category.hpp> +#include<TestAtomicViews.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp index 7cf19b26d1b3ebe6a73f2614aab51dda9d9bd88c..9e05a0f67695803bcc9878fe0e907970f718e7a9 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp @@ -41,163 +41,6 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCuda_Category.hpp> +#include <TestAtomic.hpp> -namespace Test { - -TEST_F( cuda, atomics ) -{ - const int loop_count = 1e3; - - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Cuda >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Cuda >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Cuda >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Cuda >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Cuda >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Cuda >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Cuda >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Cuda >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Cuda >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Cuda >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Cuda >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Cuda >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Cuda >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Cuda >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Cuda >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Cuda >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Cuda >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Cuda >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Cuda >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Cuda >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Cuda >( 100, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Cuda >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Cuda >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Cuda >( 100, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Cuda >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Cuda >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Cuda >( 100, 3 ) ) ); -} - -TEST_F( cuda, atomic_operations ) -{ - const int start = 1; // Avoid zero for division. - const int end = 11; - - for ( int i = start; i < end; ++i ) - { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 4 ) ) ); - } -} - -TEST_F( cuda, atomic_views_integral ) -{ - const long length = 1000000; - - { - // Integral Types. - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 8 ) ) ); - } -} - -TEST_F( cuda, atomic_views_nonintegral ) -{ - const long length = 1000000; - - { - // Non-Integral Types. - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 4 ) ) ); - } -} - -TEST_F( cuda, atomic_view_api ) -{ - TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Cuda >(); -} - -} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Category.hpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Category.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a52438a9c80a57c383462e1f877e0d0c9df43289 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Category.hpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP + +#include <gtest/gtest.h> + +namespace Test { + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + } +}; + +} // namespace Test + +#define TEST_CATEGORY cuda +#define TEST_EXECSPACE Kokkos::Cuda + +#endif diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Complex.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Complex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ff4b2e1d8563adc4d1083e6f96d9526f82f7fb1d --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Complex.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<cuda/TestCuda_Category.hpp> +#include<TestComplex.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Init.cpp similarity index 93% rename from lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp rename to lib/kokkos/core/unit_test/cuda/TestCuda_Init.cpp index d5fd24456d782409450fcf949d6c6280504bb785..d5e89625eddb83abb3c31f1ddab61ffc227e91c2 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Init.cpp @@ -1,3 +1,4 @@ + /* //@HEADER // ************************************************************************ @@ -41,13 +42,9 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> - -namespace Test { +#include<cuda/TestCuda_Category.hpp> +#include<TestInit.hpp> +#include<TestCompilerMacros.hpp> +#include<TestPolicyConstruction.hpp> -TEST_F( cuda, view_api_b ) -{ - TestViewAPI< double, Kokkos::CudaUVMSpace >(); -} -} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_MDRange.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_MDRange.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a246fcce49f2d901f91cee4f55906b867bba43e7 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_MDRange.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<cuda/TestCuda_Category.hpp> +#include<TestMDRange.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp index e655193a51f513dd390a5545aebe66ebb44f2c11..ba06b71192061191ea71745795ba93f1fd5ae84e 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp @@ -1,3 +1,4 @@ + /* //@HEADER // ************************************************************************ @@ -41,154 +42,9 @@ //@HEADER */ -#define TEST_CUDA_INSTANTIATE_SETUP_TEARDOWN - -#include <cuda/TestCuda.hpp> - -namespace Test { - -TEST_F( cuda, init ) -{ - ; -} - -TEST_F( cuda , mdrange_for ) { - TestMDRange_2D< Kokkos::Cuda >::test_for2( 100, 100 ); - TestMDRange_3D< Kokkos::Cuda >::test_for3( 100, 100, 100 ); - TestMDRange_4D< Kokkos::Cuda >::test_for4( 100, 10, 100, 10 ); - TestMDRange_5D< Kokkos::Cuda >::test_for5( 100, 10, 10, 10, 5 ); - TestMDRange_6D< Kokkos::Cuda >::test_for6( 100, 10, 5, 2, 10, 5 ); -} - -TEST_F( cuda, policy_construction ) -{ - TestRangePolicyConstruction< Kokkos::Cuda >(); - TestTeamPolicyConstruction< Kokkos::Cuda >(); -} - -TEST_F( cuda, range_tag ) -{ - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_scan( 0 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 0 ); - - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_scan( 2 ); - - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 3 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 3 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 3 ); - - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_scan( 1000 ); - - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1001 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1001 ); - TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 1001 ); -} - -//---------------------------------------------------------------------------- - -TEST_F( cuda, compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Cuda >() ) ); -} - -//---------------------------------------------------------------------------- - -TEST_F( cuda, memory_pool ) -{ - bool val = TestMemoryPool::test_mempool< Kokkos::Cuda >( 128, 128000000 ); - ASSERT_TRUE( val ); - - TestMemoryPool::test_mempool2< Kokkos::Cuda >( 64, 4, 1000000, 2000000 ); - - TestMemoryPool::test_memory_exhaustion< Kokkos::Cuda >(); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_TASKDAG ) - -TEST_F( cuda, task_fib ) -{ - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestFib< Kokkos::Cuda >::run( i, ( i + 1 ) * ( i + 1 ) * 10000 ); - } -} - -TEST_F( cuda, task_depend ) -{ - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::Cuda >::run( i ); - } -} - -TEST_F( cuda, task_team ) -{ - TestTaskScheduler::TestTaskTeam< Kokkos::Cuda >::run( 1000 ); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::Cuda >::run( 1000 ); // Put back after testing. -} - -#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) -TEST_F( cuda, cxx11 ) -{ - if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Cuda >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 1 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 2 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 3 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 4 ) ) ); - } -} -#endif - -TEST_F( cuda, tile_layout ) -{ - TestTile::test< Kokkos::Cuda, 1, 1 >( 1, 1 ); - TestTile::test< Kokkos::Cuda, 1, 1 >( 2, 3 ); - TestTile::test< Kokkos::Cuda, 1, 1 >( 9, 10 ); - - TestTile::test< Kokkos::Cuda, 2, 2 >( 1, 1 ); - TestTile::test< Kokkos::Cuda, 2, 2 >( 2, 3 ); - TestTile::test< Kokkos::Cuda, 2, 2 >( 4, 4 ); - TestTile::test< Kokkos::Cuda, 2, 2 >( 9, 9 ); - - TestTile::test< Kokkos::Cuda, 2, 4 >( 9, 9 ); - TestTile::test< Kokkos::Cuda, 4, 2 >( 9, 9 ); - - TestTile::test< Kokkos::Cuda, 4, 4 >( 1, 1 ); - TestTile::test< Kokkos::Cuda, 4, 4 >( 4, 4 ); - TestTile::test< Kokkos::Cuda, 4, 4 >( 9, 9 ); - TestTile::test< Kokkos::Cuda, 4, 4 >( 9, 11 ); - - TestTile::test< Kokkos::Cuda, 8, 8 >( 1, 1 ); - TestTile::test< Kokkos::Cuda, 8, 8 >( 4, 4 ); - TestTile::test< Kokkos::Cuda, 8, 8 >( 9, 9 ); - TestTile::test< Kokkos::Cuda, 8, 8 >( 9, 11 ); -} - -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) -#if defined( KOKKOS_COMPILER_CLANG ) -TEST_F( cuda, dispatch ) -{ - const int repeat = 100; - for ( int i = 0; i < repeat; ++i ) { - for ( int j = 0; j < repeat; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda >( 0, j ) - , KOKKOS_LAMBDA( int ) {} ); - } - } -} -#endif -#endif - -} // namespace Test +#include<cuda/TestCuda_Category.hpp> +#include<TestTemplateMetaFunctions.hpp> +#include<TestAggregate.hpp> +#include<TestMemoryPool.hpp> +#include<TestCXX11.hpp> +#include<TestTile.hpp> diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_RangePolicy.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_RangePolicy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b39e959f72d4e2c4dfa33023560e51fd67a0f680 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_RangePolicy.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<cuda/TestCuda_Category.hpp> +#include<TestRange.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..911cbdf4209ecd4e3174f6b846473480c80dfca7 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions.cpp @@ -0,0 +1,48 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCuda_Category.hpp> +#include <TestFunctorAnalysis.hpp> +#include <TestReduce.hpp> +#include <TestCXX11Deduction.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp deleted file mode 100644 index 7f4e0973e7a512a5e855ba30c9e65e5a539c123d..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp +++ /dev/null @@ -1,138 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <cuda/TestCuda.hpp> - -namespace Test { - -TEST_F( cuda, long_reduce ) -{ - TestReduce< long, Kokkos::Cuda >( 0 ); - TestReduce< long, Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda, double_reduce ) -{ - TestReduce< double, Kokkos::Cuda >( 0 ); - TestReduce< double, Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda, long_reduce_dynamic ) -{ - TestReduceDynamic< long, Kokkos::Cuda >( 0 ); - TestReduceDynamic< long, Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda, double_reduce_dynamic ) -{ - TestReduceDynamic< double, Kokkos::Cuda >( 0 ); - TestReduceDynamic< double, Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda, long_reduce_dynamic_view ) -{ - TestReduceDynamicView< long, Kokkos::Cuda >( 0 ); - TestReduceDynamicView< long, Kokkos::Cuda >( 1000000 ); -} - -TEST_F( cuda, scan ) -{ - TestScan< Kokkos::Cuda >::test_range( 1, 1000 ); - TestScan< Kokkos::Cuda >( 0 ); - TestScan< Kokkos::Cuda >( 100000 ); - TestScan< Kokkos::Cuda >( 10000000 ); - Kokkos::Cuda::fence(); -} - -#if 0 -TEST_F( cuda, scan_small ) -{ - typedef TestScan< Kokkos::Cuda, Kokkos::Impl::CudaExecUseScanSmall > TestScanFunctor; - - for ( int i = 0; i < 1000; ++i ) { - TestScanFunctor( 10 ); - TestScanFunctor( 10000 ); - } - TestScanFunctor( 1000000 ); - TestScanFunctor( 10000000 ); - - Kokkos::Cuda::fence(); -} -#endif - -TEST_F( cuda, team_scan ) -{ - TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); -} - -TEST_F( cuda, team_long_reduce ) -{ - TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( cuda, team_double_reduce ) -{ - TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( cuda, reduction_deduction ) -{ - TestCXX11::test_reduction_deduction< Kokkos::Cuda >(); -} - -} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Scan.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Scan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..44c8ae70fb78355ea0dc44d5d4f6f8c3f00cd19e --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Scan.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<cuda/TestCuda_Category.hpp> +#include<TestScan.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SharedAlloc.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SharedAlloc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..05fdca355baecc917a2ac9f343c786010000f862 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SharedAlloc.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCuda_Category.hpp> +#include <TestSharedAlloc.hpp> + +namespace Test { + + +TEST_F( TEST_CATEGORY, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::CudaSpace, Kokkos::DefaultHostExecutionSpace >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp index 5bed7640daa114879f789e67807946e0dc2343f4..aa19fc88eaeb5141558e10f94db075ce522a0972 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp @@ -41,7 +41,8 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <Kokkos_Core.hpp> +#include <cuda/TestCuda_Category.hpp> namespace Test { diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp index 0aea35db517bdba78967eb8b443cb771aaf2215f..e4a622aec61a39a6468b34ffb3d157233715b0a9 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp @@ -41,63 +41,64 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_auto_1d_left ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Cuda >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, TEST_EXECSPACE >(); } -TEST_F( cuda, view_subview_auto_1d_right ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Cuda >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, TEST_EXECSPACE >(); } -TEST_F( cuda, view_subview_auto_1d_stride ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Cuda >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, TEST_EXECSPACE >(); } -TEST_F( cuda, view_subview_assign_strided ) +TEST_F( TEST_CATEGORY, view_subview_assign_strided ) { - TestViewSubview::test_1d_strided_assignment< Kokkos::Cuda >(); + TestViewSubview::test_1d_strided_assignment< TEST_EXECSPACE >(); } -TEST_F( cuda, view_subview_left_0 ) +TEST_F( TEST_CATEGORY, view_subview_left_0 ) { - TestViewSubview::test_left_0< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_left_0< TEST_EXECSPACE >(); } -TEST_F( cuda, view_subview_left_1 ) +TEST_F( TEST_CATEGORY, view_subview_left_1 ) { - TestViewSubview::test_left_1< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_left_1< TEST_EXECSPACE >(); } -TEST_F( cuda, view_subview_left_2 ) +TEST_F( TEST_CATEGORY, view_subview_left_2 ) { - TestViewSubview::test_left_2< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_left_2< TEST_EXECSPACE >(); } -TEST_F( cuda, view_subview_left_3 ) +TEST_F( TEST_CATEGORY, view_subview_left_3 ) { - TestViewSubview::test_left_3< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_left_3< TEST_EXECSPACE >(); } -TEST_F( cuda, view_subview_right_0 ) +TEST_F( TEST_CATEGORY, view_subview_right_0 ) { - TestViewSubview::test_right_0< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_right_0< TEST_EXECSPACE >(); } -TEST_F( cuda, view_subview_right_1 ) +TEST_F( TEST_CATEGORY, view_subview_right_1 ) { - TestViewSubview::test_right_1< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_right_1< TEST_EXECSPACE >(); } -TEST_F( cuda, view_subview_right_3 ) +TEST_F( TEST_CATEGORY, view_subview_right_3 ) { - TestViewSubview::test_right_3< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_right_3< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp index f31f4cbe62bc06bd5fee04abc6a71913c6fbddd9..d655c71319459ac525f0218fb84de21971eec555 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp @@ -41,22 +41,23 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_layoutleft_to_layoutleft ) +TEST_F( TEST_CATEGORY, view_subview_layoutleft_to_layoutleft ) { - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -TEST_F( cuda, view_subview_layoutright_to_layoutright ) +TEST_F( TEST_CATEGORY, view_subview_layoutright_to_layoutright ) { - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp index 0213a196e8612b4d9d3821de6d657803e9e22b6c..ae4339448d114c42754c6752a0acbf551efb49ab 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_1d_assign ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign ) { - TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp index 181e1bab2ccb531722b08e627a8ee724fcd393d9..fd1f8eae1dd8338beeb7df3433d18bcdbcd475e1 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_1d_assign_atomic ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp index 708cc1f5ba98fc7eb0f5603524c2b533eb090fee..8b508a3253bc414897c916d2c468479755a0c4cf 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_1d_assign_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp index a3db996f8d87d63dd1a21ea74eb83a615a0e7162..562be4cee4149bb5a73af82163e4fdc1b969185c 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_2d_from_3d ) +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d ) { - TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp index 2f7cffa75da133039d0624d2d812053774013846..6689c8724d08dd1a96f9656b04c82bce787f2f45 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_2d_from_3d_atomic ) +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp index 949c6f3e0b9d3055e7da32ace79a810310861d99..2d665f167bca5154820c1802c1ba3e3879173f8b 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_2d_from_3d_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp index 3e68277a9e93b447a90a9b3496e0b4d0ccc407e2..5d30bf7c6a55a4933f6663409681f464a937b905 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_left ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp index 0cd91b7795f52f457f4403559cb353180bcdbe44..a72e37acad069ca4d70c78d859eecb8a8826862a 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_left_atomic ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp index cd1c13f7d073f1a445c35ded9eaa9fd121d35fee..0c8b7008423c5fccf34bccd32aceecd5c39b1be4 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_left_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp index 22d27535431f7b6414c52305a46547654c40ccbb..38b9dbad02a27f12a4ec860098269fbce68d5bd5 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_right ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp index 5dc5f87b4e2b7faa2a52163f8b8af732b53000a9..cc3639f4386b7fa1d27afff964e3eb5f36679614 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_right_atomic ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp index 318d8edbbb82eb6dd097b959e07861cf74a77099..0805f2be01e085c6a1966540e301757f13c373fa 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCudaUVM_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_right_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Task.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Task.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a5510e24067b313d1cf91726f74d238afde262f2 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Task.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<cuda/TestCuda_Category.hpp> +#include<TestTaskScheduler.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp index 8d9b9328ba9691fe90947554aeb9e9825322d55a..dae6eda4476d79ea09599a4adf260081ad46ce28 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp @@ -41,86 +41,35 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <cuda/TestCuda_Category.hpp> +#include <TestTeam.hpp> namespace Test { -TEST_F( cuda, team_tag ) +TEST_F( TEST_CATEGORY, team_for ) { - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); - TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); } -TEST_F( cuda, team_shared_request ) -{ - TestSharedTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); -} - -// This tests request to much L0 scratch. -//TEST_F( cuda, team_scratch_request ) -//{ -// TestScratchTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); -// TestScratchTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); -//} - -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) -TEST_F( cuda, team_lambda_shared_request ) -{ - TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); - TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); - TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); -} -#endif -TEST_F( cuda, shmem_size ) +TEST_F( TEST_CATEGORY, team_reduce ) { - TestShmemSize< Kokkos::Cuda >(); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } - -TEST_F( cuda, multi_level_scratch ) -{ - TestMultiLevelScratchTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); - TestMultiLevelScratchTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); } -#if !defined(KOKKOS_CUDA_CLANG_WORKAROUND) && !defined(KOKKOS_ARCH_PASCAL) -TEST_F( cuda, team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 0 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 1 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 2 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 3 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 4 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 5 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 6 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 7 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 8 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 9 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 10 ) ) ); -} -#endif +#include <TestTeamVector.hpp> -TEST_F( cuda, triple_nested_parallelism ) -{ - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048, 32, 32 ); - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048, 32, 16 ); - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048, 16, 16 ); -} -} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_TeamReductionScan.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_TeamReductionScan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..27a689a53ef00c80477d4cb64585992ef228a053 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_TeamReductionScan.cpp @@ -0,0 +1,82 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCuda_Category.hpp> +#include <TestTeam.hpp> + +namespace Test { + +#if !defined(KOKKOS_CUDA_CLANG_WORKAROUND) +TEST_F( TEST_CATEGORY, team_scan ) +{ + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} +#endif + +TEST_F( TEST_CATEGORY, team_long_reduce ) +{ + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( TEST_CATEGORY, team_double_reduce ) +{ + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial.hpp b/lib/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp similarity index 64% rename from lib/kokkos/core/unit_test/serial/TestSerial.hpp rename to lib/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp index 03da07e065e371e636f1d2c59ba99a2832dd574c..8e8fa9ac2a092e5b8cbb0395cbf5dd036dffcbd4 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial.hpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_TeamScratch.cpp @@ -41,59 +41,43 @@ //@HEADER */ -#ifndef KOKKOS_TEST_SERIAL_HPP -#define KOKKOS_TEST_SERIAL_HPP - -#include <gtest/gtest.h> - -#include <Kokkos_Macros.hpp> +#include <cuda/TestCuda_Category.hpp> +#include <TestTeam.hpp> -#ifdef KOKKOS_LAMBDA -#undef KOKKOS_LAMBDA -#endif -#define KOKKOS_LAMBDA [=] +namespace Test { -#include <Kokkos_Core.hpp> +TEST_F( TEST_CATEGORY, team_shared_request ) +{ + TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} -#include <TestTile.hpp> -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> -#include <TestViewAPI.hpp> -#include <TestViewOfClass.hpp> -#include <TestViewSubview.hpp> -#include <TestAtomic.hpp> -#include <TestAtomicOperations.hpp> -#include <TestAtomicViews.hpp> -#include <TestRange.hpp> -#include <TestTeam.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestAggregate.hpp> -#include <TestCompilerMacros.hpp> -#include <TestTaskScheduler.hpp> -#include <TestMemoryPool.hpp> -#include <TestCXX11.hpp> -#include <TestCXX11Deduction.hpp> -#include <TestTeamVector.hpp> -#include <TestTemplateMetaFunctions.hpp> -#include <TestPolicyConstruction.hpp> -#include <TestMDRange.hpp> +TEST_F( TEST_CATEGORY, team_scratch_request ) +{ + TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} -namespace Test { +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) +TEST_F( TEST_CATEGORY, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif +#endif -class serial : public ::testing::Test { -protected: - static void SetUpTestCase() - { - Kokkos::HostSpace::execution_space::initialize(); - } +TEST_F( TEST_CATEGORY, shmem_size ) +{ + TestShmemSize< TEST_EXECSPACE >(); +} - static void TearDownTestCase() - { - Kokkos::HostSpace::execution_space::finalize(); - } -}; +TEST_F( TEST_CATEGORY, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} } // namespace Test -#endif diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp index b4d8e5d953f8e753eac945560fac763589bd2025..aa1fc28af68a5979b4a1a1c75c555fcc858a20f1 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp @@ -41,14 +41,5 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> - -namespace Test { - -TEST_F( cuda, impl_view_mapping_d ) -{ - test_view_mapping< Kokkos::CudaHostPinnedSpace >(); - test_view_mapping_operator< Kokkos::CudaHostPinnedSpace >(); -} - -} // namespace Test +#include <cuda/TestCuda_Category.hpp> +#include <TestViewAPI.hpp> diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b13ab93da358b2abbff6cf57efefe4a2393cbcbd --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_a.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCuda_Category.hpp> +#include <TestViewMapping_a.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..367922a3e23c097575baf6a0ab5795a5ead815a5 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_b.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCuda_Category.hpp> +#include <TestViewMapping_b.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_subview.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_subview.cpp new file mode 100644 index 0000000000000000000000000000000000000000..22f78ebe740a355746431c792e08ca122a708924 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_subview.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCuda_Category.hpp> +#include <TestViewMapping_subview.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewOfClass.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewOfClass.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bc989ccaa4bfb375c649991af982cb99f3a744f8 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewOfClass.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <cuda/TestCuda_Category.hpp> +#include <TestViewOfClass.hpp> + diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType.cpp similarity index 81% rename from lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceType.cpp index f85a35c096516fe77c39cfaaa1778a9d5bb895ef..e52df4df741b04eb222c4ec22f71342610382c35 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp +++ b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType.cpp @@ -44,36 +44,14 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> +#include <default/TestDefaultDeviceType_Category.hpp> #if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) -#include <TestAtomic.hpp> -#include <TestViewAPI.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestTeam.hpp> -#include <TestAggregate.hpp> -#include <TestCompilerMacros.hpp> -#include <TestCXX11.hpp> -#include <TestTeamVector.hpp> -#include <TestUtilities.hpp> - namespace Test { -class defaultdevicetype : public ::testing::Test { -protected: - static void SetUpTestCase() - { - Kokkos::initialize(); - } - - static void TearDownTestCase() - { - Kokkos::finalize(); - } -}; -TEST_F( defaultdevicetype, host_space_access ) +TEST_F( TEST_CATEGORY, host_space_access ) { typedef Kokkos::HostSpace::execution_space host_exec_space; typedef Kokkos::Device< host_exec_space, Kokkos::HostSpace > device_space; @@ -89,11 +67,6 @@ TEST_F( defaultdevicetype, host_space_access ) Kokkos::Impl::SpaceAccessibility< mirror_space, Kokkos::HostSpace >::accessible, "" ); } -TEST_F( defaultdevicetype, view_api ) -{ - TestViewAPI< double, Kokkos::DefaultExecutionSpace >(); -} - } // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_1.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_1.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_1.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_1.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_10.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_10.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_10.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_10.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_11.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_11.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_11.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_11.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_12.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_12.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_12.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_12.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_13.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_13.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_13.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_13.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_14.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_14.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_14.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_14.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_15.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_15.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_15.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_15.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_16.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_16.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_16.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_16.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_2.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_2.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_2.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_2.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_3.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_3.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_3.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_3.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_4.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_4.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_4.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_4.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_5.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_5.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_5.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_5.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_6.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_6.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_6.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_6.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_7.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_7.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_7.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_7.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_8.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_8.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_8.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_8.cpp diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_9.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_9.cpp similarity index 100% rename from lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_9.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeInit_9.cpp diff --git a/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_Category.hpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_Category.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8b9c14b76f6668f95a4ac3f6719b0fdec753e143 --- /dev/null +++ b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_Category.hpp @@ -0,0 +1,67 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP + +#include <gtest/gtest.h> + +namespace Test { + +class defaultdevicetype : public ::testing::Test { +protected: + static void SetUpTestCase() + { + } + + static void TearDownTestCase() + { + } +}; + +} // namespace Test + +#define TEST_CATEGORY defaultdevicetype +#define TEST_EXECSPACE Kokkos::DefaultExecutionSpace + +#endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_a.cpp similarity index 91% rename from lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceType_a.cpp index 4fdfa959107becae384ffa5c5e09d444e9299670..b50ef3446bd833e6e8b5f3f170df7dc4e2096d31 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp +++ b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_a.cpp @@ -47,22 +47,11 @@ #if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) -#include <TestReduce.hpp> +#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestReduceCombinatorical.hpp> namespace Test { -class defaultdevicetype : public ::testing::Test { -protected: - static void SetUpTestCase() - { - Kokkos::initialize(); - } - - static void TearDownTestCase() - { - Kokkos::finalize(); - } -}; TEST_F( defaultdevicetype, reduce_instantiation_a ) { diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_b.cpp similarity index 90% rename from lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceType_b.cpp index 841f34e03dd1f9900d304a8f6e889a5d30dc2a65..c947e935dd538b5d8e0f219163d6d30457cf3d03 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp +++ b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_b.cpp @@ -47,23 +47,11 @@ #if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) -#include <TestReduce.hpp> +#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestReduceCombinatorical.hpp> namespace Test { -class defaultdevicetype : public ::testing::Test { -protected: - static void SetUpTestCase() - { - Kokkos::initialize(); - } - - static void TearDownTestCase() - { - Kokkos::finalize(); - } -}; - TEST_F( defaultdevicetype, reduce_instantiation_b ) { TestReduceCombinatoricalInstantiation<>::execute_b(); diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_c.cpp similarity index 90% rename from lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp rename to lib/kokkos/core/unit_test/default/TestDefaultDeviceType_c.cpp index 602863be3852a603d6c8e803752ad4a67709c0d5..e11996e8f90111b1fc2cd95727842eeb42466826 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp +++ b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_c.cpp @@ -47,23 +47,11 @@ #if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) -#include <TestReduce.hpp> +#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestReduceCombinatorical.hpp> namespace Test { -class defaultdevicetype : public ::testing::Test { -protected: - static void SetUpTestCase() - { - Kokkos::initialize(); - } - - static void TearDownTestCase() - { - Kokkos::finalize(); - } -}; - TEST_F( defaultdevicetype, reduce_instantiation_c ) { TestReduceCombinatoricalInstantiation<>::execute_c(); diff --git a/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_d.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_d.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8b98a35384ee139cf92bf788a7741582e9fda4a4 --- /dev/null +++ b/lib/kokkos/core/unit_test/default/TestDefaultDeviceType_d.cpp @@ -0,0 +1,73 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <gtest/gtest.h> + +#include <Kokkos_Core.hpp> + +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) + +#include <default/TestDefaultDeviceType_Category.hpp> +#include <TestUtilities.hpp> + +namespace Test { + +TEST_F( defaultdevicetype, test_utilities ) +{ + test_utilities(); +} + +TEST_F( defaultdevicetype, malloc ) +{ + int* data = (int*) Kokkos::kokkos_malloc( 100 * sizeof( int ) ); + ASSERT_NO_THROW( data = (int*) Kokkos::kokkos_realloc( data, 120 * sizeof( int ) ) ); + Kokkos::kokkos_free( data ); + + int* data2 = (int*) Kokkos::kokkos_malloc( 0 ); + ASSERT_TRUE( data2 == NULL ); + Kokkos::kokkos_free( data2 ); +} + +} // namespace Test + +#endif diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp index ed9bb68cd60a004c214ec473ae35653f61c6a814..2f8daf7ad74a6c8c0533aeec27cd6d0370969a40 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp @@ -78,6 +78,7 @@ #include <TestTemplateMetaFunctions.hpp> #include <TestPolicyConstruction.hpp> #include <TestMDRange.hpp> +#include <TestConcurrentBitset.hpp> namespace Test { diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_AtomicOperations.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_AtomicOperations.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f9002421984c64467ff9fc057f61385aa93c4262 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_AtomicOperations.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmp/TestOpenMP_Category.hpp> +#include<TestAtomicOperations.hpp> + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_AtomicViews.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_AtomicViews.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6ba4d29bafd6f73e73e347348c33d5850564ed7f --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_AtomicViews.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmp/TestOpenMP_Category.hpp> +#include<TestAtomicViews.hpp> + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp index 2585c01973b3aeba5fd00f27068c361b15552800..497153e07e0bc1943487cc6f1f8785421224886d 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp @@ -41,161 +41,6 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestAtomic.hpp> -namespace Test { - -TEST_F( openmp, atomics ) -{ - const int loop_count = 1e4; - - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::OpenMP >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::OpenMP >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::OpenMP >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::OpenMP >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::OpenMP >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::OpenMP >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::OpenMP >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::OpenMP >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::OpenMP >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::OpenMP >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::OpenMP >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::OpenMP >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::OpenMP >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::OpenMP >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::OpenMP >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::OpenMP >( 100, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::OpenMP >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::OpenMP >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::OpenMP >( 100, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::OpenMP >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::OpenMP >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::OpenMP >( 100, 3 ) ) ); -} - -TEST_F( openmp, atomic_operations ) -{ - const int start = 1; // Avoid zero for division. - const int end = 11; - - for ( int i = start; i < end; ++i ) - { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 4 ) ) ); - } -} - -TEST_F( openmp, atomic_views_integral ) -{ - const long length = 1000000; - { - // Integral Types. - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 8 ) ) ); - } -} - -TEST_F( openmp, atomic_views_nonintegral ) -{ - const long length = 1000000; - { - // Non-Integral Types. - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 4 ) ) ); - } -} - -TEST_F( openmp, atomic_view_api ) -{ - TestAtomicViews::TestAtomicViewAPI<int, Kokkos::OpenMP >(); -} - -} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Category.hpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Category.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6fcc117a8c74ff4bac9144129f44515dc2716e63 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Category.hpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP + +#include <gtest/gtest.h> + +namespace Test { + +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + } +}; + +} // namespace Test + +#define TEST_CATEGORY openmp +#define TEST_EXECSPACE Kokkos::OpenMP + +#endif diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Complex.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Complex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c7e13b1a97e3b2dfedc07e9c8862917654c90a89 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Complex.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmp/TestOpenMP_Category.hpp> +#include<TestComplex.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Init.cpp similarity index 92% rename from lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_Init.cpp index 649023e4afcaf921511edab82cc10035776246ae..e5c9bb13534bef1acd3a76bd7555a92427c6f529 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Init.cpp @@ -1,3 +1,4 @@ + /* //@HEADER // ************************************************************************ @@ -41,13 +42,9 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> - -namespace Test { +#include<openmp/TestOpenMP_Category.hpp> +#include<TestInit.hpp> +#include<TestCompilerMacros.hpp> +#include<TestPolicyConstruction.hpp> -TEST_F( cuda, view_api_c ) -{ - TestViewAPI< double, Kokkos::CudaHostPinnedSpace >(); -} -} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_MDRange.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_MDRange.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cfc46977d1eb2fb7ebd613d843a8ac8b54492e2d --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_MDRange.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmp/TestOpenMP_Category.hpp> +#include<TestMDRange.hpp> + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp index b4f32dac706222e2c1f79f43469eadb4f5e3e6c6..5e9535638d59c289b7eb18dedbf48639e9f9722b 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp @@ -1,3 +1,4 @@ + /* //@HEADER // ************************************************************************ @@ -41,172 +42,9 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> - -namespace Test { - -TEST_F( openmp, init ) -{ - ; -} - -TEST_F( openmp, mdrange_for ) -{ - Kokkos::Timer timer; - TestMDRange_2D< Kokkos::OpenMP >::test_for2( 10000, 1000 ); - std::cout << " 2D: " << timer.seconds() << std::endl; - - timer.reset(); - TestMDRange_3D< Kokkos::OpenMP >::test_for3( 100, 100, 1000 ); - std::cout << " 3D: " << timer.seconds() << std::endl; - - timer.reset(); - TestMDRange_4D< Kokkos::OpenMP >::test_for4( 100, 10, 100, 100 ); - std::cout << " 4D: " << timer.seconds() << std::endl; - - timer.reset(); - TestMDRange_5D< Kokkos::OpenMP >::test_for5( 100, 10, 10, 100, 50 ); - std::cout << " 5D: " << timer.seconds() << std::endl; - - timer.reset(); - TestMDRange_6D< Kokkos::OpenMP >::test_for6( 10, 10, 10, 10, 50, 50 ); - std::cout << " 6D: " << timer.seconds() << std::endl; -} - -TEST_F( openmp, mdrange_reduce ) -{ - TestMDRange_2D< Kokkos::OpenMP >::test_reduce2( 100, 100 ); - TestMDRange_3D< Kokkos::OpenMP >::test_reduce3( 100, 10, 100 ); -} - -TEST_F( openmp, policy_construction ) -{ - TestRangePolicyConstruction< Kokkos::OpenMP >(); - TestTeamPolicyConstruction< Kokkos::OpenMP >(); -} - -TEST_F( openmp, range_tag ) -{ - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_scan( 0 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 0 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 0 ); - - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_scan( 2 ); - - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 3 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 3 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 3 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 3 ); - - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_scan( 1000 ); - - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1001 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1001 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 1001 ); - TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 1000 ); -} - -//---------------------------------------------------------------------------- - -TEST_F( openmp, compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::OpenMP >() ) ); -} - -//---------------------------------------------------------------------------- - -TEST_F( openmp, memory_pool ) -{ - bool val = TestMemoryPool::test_mempool< Kokkos::OpenMP >( 128, 128000000 ); - ASSERT_TRUE( val ); - - TestMemoryPool::test_mempool2< Kokkos::OpenMP >( 64, 4, 1000000, 2000000 ); - - TestMemoryPool::test_memory_exhaustion< Kokkos::OpenMP >(); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_TASKDAG ) - -TEST_F( openmp, task_fib ) -{ - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestFib< Kokkos::OpenMP >::run( i, ( i + 1 ) * ( i + 1 ) * 10000 ); - } -} - -TEST_F( openmp, task_depend ) -{ - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::OpenMP >::run( i ); - } -} - -TEST_F( openmp, task_team ) -{ - TestTaskScheduler::TestTaskTeam< Kokkos::OpenMP >::run( 1000 ); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::OpenMP >::run( 1000 ); // Put back after testing. -} - -#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) -TEST_F( openmp, cxx11 ) -{ - if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::OpenMP >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 1 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 2 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 3 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 4 ) ) ); - } -} -#endif - -TEST_F( openmp, tile_layout ) -{ - TestTile::test< Kokkos::OpenMP, 1, 1 >( 1, 1 ); - TestTile::test< Kokkos::OpenMP, 1, 1 >( 2, 3 ); - TestTile::test< Kokkos::OpenMP, 1, 1 >( 9, 10 ); - - TestTile::test< Kokkos::OpenMP, 2, 2 >( 1, 1 ); - TestTile::test< Kokkos::OpenMP, 2, 2 >( 2, 3 ); - TestTile::test< Kokkos::OpenMP, 2, 2 >( 4, 4 ); - TestTile::test< Kokkos::OpenMP, 2, 2 >( 9, 9 ); - - TestTile::test< Kokkos::OpenMP, 2, 4 >( 9, 9 ); - TestTile::test< Kokkos::OpenMP, 4, 2 >( 9, 9 ); - - TestTile::test< Kokkos::OpenMP, 4, 4 >( 1, 1 ); - TestTile::test< Kokkos::OpenMP, 4, 4 >( 4, 4 ); - TestTile::test< Kokkos::OpenMP, 4, 4 >( 9, 9 ); - TestTile::test< Kokkos::OpenMP, 4, 4 >( 9, 11 ); - - TestTile::test< Kokkos::OpenMP, 8, 8 >( 1, 1 ); - TestTile::test< Kokkos::OpenMP, 8, 8 >( 4, 4 ); - TestTile::test< Kokkos::OpenMP, 8, 8 >( 9, 9 ); - TestTile::test< Kokkos::OpenMP, 8, 8 >( 9, 11 ); -} - -TEST_F( openmp, dispatch ) -{ - const int repeat = 100; - for ( int i = 0; i < repeat; ++i ) { - for ( int j = 0; j < repeat; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::OpenMP >( 0, j ) - , KOKKOS_LAMBDA( int ) {} ); - } - } -} - -} // namespace Test +#include<openmp/TestOpenMP_Category.hpp> +#include<TestTemplateMetaFunctions.hpp> +#include<TestAggregate.hpp> +#include<TestMemoryPool.hpp> +#include<TestCXX11.hpp> +#include<TestTile.hpp> diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_RangePolicy.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_RangePolicy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a5a9d7e99b274766bfa3f02f5056438da2f2b274 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_RangePolicy.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmp/TestOpenMP_Category.hpp> +#include<TestRange.hpp> + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp index 22c29308a6289361bfa0b62d47e579e4bb1e29c2..820e06361475f4ba0f4911d0e3c79c6020adf414 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp @@ -41,106 +41,7 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestReduce.hpp> +#include <TestCXX11Deduction.hpp> -namespace Test { - -TEST_F( openmp, long_reduce ) -{ - TestReduce< long, Kokkos::OpenMP >( 0 ); - TestReduce< long, Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, double_reduce ) -{ - TestReduce< double, Kokkos::OpenMP >( 0 ); - TestReduce< double, Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, reducers ) -{ - TestReducers< int, Kokkos::OpenMP >::execute_integer(); - TestReducers< size_t, Kokkos::OpenMP >::execute_integer(); - TestReducers< double, Kokkos::OpenMP >::execute_float(); - TestReducers< Kokkos::complex<double>, Kokkos::OpenMP >::execute_basic(); -} - -TEST_F( openmp, long_reduce_dynamic ) -{ - TestReduceDynamic< long, Kokkos::OpenMP >( 0 ); - TestReduceDynamic< long, Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, double_reduce_dynamic ) -{ - TestReduceDynamic< double, Kokkos::OpenMP >( 0 ); - TestReduceDynamic< double, Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, long_reduce_dynamic_view ) -{ - TestReduceDynamicView< long, Kokkos::OpenMP >( 0 ); - TestReduceDynamicView< long, Kokkos::OpenMP >( 1000000 ); -} - -TEST_F( openmp, scan ) -{ - TestScan< Kokkos::OpenMP >::test_range( 1, 1000 ); - TestScan< Kokkos::OpenMP >( 0 ); - TestScan< Kokkos::OpenMP >( 100000 ); - TestScan< Kokkos::OpenMP >( 10000000 ); - Kokkos::OpenMP::fence(); -} - -#if 0 -TEST_F( openmp, scan_small ) -{ - typedef TestScan< Kokkos::OpenMP, Kokkos::Impl::OpenMPExecUseScanSmall > TestScanFunctor; - - for ( int i = 0; i < 1000; ++i ) { - TestScanFunctor( 10 ); - TestScanFunctor( 10000 ); - } - TestScanFunctor( 1000000 ); - TestScanFunctor( 10000000 ); - - Kokkos::OpenMP::fence(); -} -#endif - -TEST_F( openmp, team_scan ) -{ - TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); -} - -TEST_F( openmp, team_long_reduce ) -{ - TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( openmp, team_double_reduce ) -{ - TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( openmp, reduction_deduction ) -{ - TestCXX11::test_reduction_deduction< Kokkos::OpenMP >(); -} - -} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Scan.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Scan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d8427e284805e394676bd36a1d318d4c4fd4bd49 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Scan.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmp/TestOpenMP_Category.hpp> +#include<TestScan.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SharedAlloc.cpp similarity index 91% rename from lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_SharedAlloc.cpp index e4e6894c5346b6283371903bc2e1bdea18c5f399..ee9ab45edfa8475c9282711b948d8d00d5c09ba1 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SharedAlloc.cpp @@ -41,14 +41,15 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestSharedAlloc.hpp> namespace Test { -TEST_F( cuda, impl_view_mapping_c ) + +TEST_F( TEST_CATEGORY, impl_shared_alloc ) { - test_view_mapping< Kokkos::CudaUVMSpace >(); - test_view_mapping_operator< Kokkos::CudaUVMSpace >(); + test_shared_alloc< Kokkos::HostSpace, TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp index fefae073227a7086bb440152b76abf16dc9c00b2..bbb12c2ddb6e9ec26bead40359dd0885c1e88d82 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp @@ -41,63 +41,64 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_auto_1d_left ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::OpenMP >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, TEST_EXECSPACE >(); } -TEST_F( openmp, view_subview_auto_1d_right ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::OpenMP >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, TEST_EXECSPACE >(); } -TEST_F( openmp, view_subview_auto_1d_stride ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::OpenMP >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, TEST_EXECSPACE >(); } -TEST_F( openmp, view_subview_assign_strided ) +TEST_F( TEST_CATEGORY, view_subview_assign_strided ) { - TestViewSubview::test_1d_strided_assignment< Kokkos::OpenMP >(); + TestViewSubview::test_1d_strided_assignment< TEST_EXECSPACE >(); } -TEST_F( openmp, view_subview_left_0 ) +TEST_F( TEST_CATEGORY, view_subview_left_0 ) { - TestViewSubview::test_left_0< Kokkos::OpenMP >(); + TestViewSubview::test_left_0< TEST_EXECSPACE >(); } -TEST_F( openmp, view_subview_left_1 ) +TEST_F( TEST_CATEGORY, view_subview_left_1 ) { - TestViewSubview::test_left_1< Kokkos::OpenMP >(); + TestViewSubview::test_left_1< TEST_EXECSPACE >(); } -TEST_F( openmp, view_subview_left_2 ) +TEST_F( TEST_CATEGORY, view_subview_left_2 ) { - TestViewSubview::test_left_2< Kokkos::OpenMP >(); + TestViewSubview::test_left_2< TEST_EXECSPACE >(); } -TEST_F( openmp, view_subview_left_3 ) +TEST_F( TEST_CATEGORY, view_subview_left_3 ) { - TestViewSubview::test_left_3< Kokkos::OpenMP >(); + TestViewSubview::test_left_3< TEST_EXECSPACE >(); } -TEST_F( openmp, view_subview_right_0 ) +TEST_F( TEST_CATEGORY, view_subview_right_0 ) { - TestViewSubview::test_right_0< Kokkos::OpenMP >(); + TestViewSubview::test_right_0< TEST_EXECSPACE >(); } -TEST_F( openmp, view_subview_right_1 ) +TEST_F( TEST_CATEGORY, view_subview_right_1 ) { - TestViewSubview::test_right_1< Kokkos::OpenMP >(); + TestViewSubview::test_right_1< TEST_EXECSPACE >(); } -TEST_F( openmp, view_subview_right_3 ) +TEST_F( TEST_CATEGORY, view_subview_right_3 ) { - TestViewSubview::test_right_3< Kokkos::OpenMP >(); + TestViewSubview::test_right_3< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp index 7de7ca91bdc082057bccc1b71ec8f482a16bc0f9..591129916d820b14181a6cbdb8bad0ae0247cc34 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp @@ -41,22 +41,23 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_layoutleft_to_layoutleft ) +TEST_F( TEST_CATEGORY, view_subview_layoutleft_to_layoutleft ) { - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -TEST_F( openmp, view_subview_layoutright_to_layoutright ) +TEST_F( TEST_CATEGORY, view_subview_layoutright_to_layoutright ) { - TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp index d727ec0ee592c57d357b8cfebfa83a9bcc06eb12..c8bf28e777b87e3ec8269d89fee2fd265f539c87 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_1d_assign ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign ) { - TestViewSubview::test_1d_assign< Kokkos::OpenMP >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp index df43f555d385037dafe3a29b9cec66ef2eb9b781..64bccc297da4974e95128979cf7d6c4e62d30c8d 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_1d_assign_atomic ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp index 38f241ebf7bdea50af2f8a0b06dd69b16175667c..668f583de7bde91a4ab3439beabee623cc264838 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_1d_assign_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp index 11a4ea8ac24bf457f9d4fbe97b5180536d1fac69..9cad90a10a1dc13b4ac77b996d73be98785335f3 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_2d_from_3d ) +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d ) { - TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp index a91baa34df3f0fc41db37909fdcdbeefc27a3158..a292fb85e21b95588e117563a3a173dc9e719008 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { TEST_F( openmp, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp index 20d4d9bd64462eaa9d90a5d776c7129a7a816312..37aa23406e525210a638d526322fd79ded1ea103 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_2d_from_3d_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp index 528df1c0700d7582f427310d8f7610376f9166bb..33391fcfa2f6e23fcce11f2bc91c2e0c4e12ce2d 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_left ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp index d9eea8dba91a7c03cdfd8460b2241438ffbbce1d..c9958e7bcb6a37bff6eb02e87332fc9f5a5d1034 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_left_atomic ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp index f909dc33c067ca4ff6c3badeddf92c6bb12a2bd6..e482c6e87ebb589c75d56414621e4ceb09b71015 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_left_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp index 59996d5e33b594a23c7e368354208c68707339e9..348eb2a795595c2ecf119187c15d57e75cafd788 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_right ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp index 3f9c215d9b10dbbeb3aada555515ab27c1e38adb..1341d65e6f7220e92fd216095deebdb422bbaad0 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_right_atomic ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp index d3a73483a0bc11c4d60eb4d6d658c00fde838566..da38096bcaaa17aaefcacdcb80d227c64f6dc911 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_right_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Task.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Task.cpp new file mode 100644 index 0000000000000000000000000000000000000000..90692648c11a2d29db440699cb95ab4fb4c2f87b --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Task.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmp/TestOpenMP_Category.hpp> +#include<TestTaskScheduler.hpp> + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp index 216789e8bf6ebcd1d2deab1e567317376c611e0b..a937ea41ab0d711aa83e3772f2389b466fef962d 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp @@ -41,87 +41,35 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestTeam.hpp> namespace Test { -TEST_F( openmp, team_tag ) +TEST_F( TEST_CATEGORY, team_for ) { - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); - TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); } -TEST_F( openmp, team_shared_request ) -{ - TestSharedTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >(); -} -TEST_F( openmp, team_scratch_request ) +TEST_F( TEST_CATEGORY, team_reduce ) { - TestScratchTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >(); - TestScratchTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >(); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } - -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) -TEST_F( openmp, team_lambda_shared_request ) -{ - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >(); } -#endif -TEST_F( openmp, shmem_size ) -{ - TestShmemSize< Kokkos::OpenMP >(); -} +#include <TestTeamVector.hpp> -TEST_F( openmp, multi_level_scratch ) -{ - TestMultiLevelScratchTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >(); - TestMultiLevelScratchTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >(); -} - -TEST_F( openmp, team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 0 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 1 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 2 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 3 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 4 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 5 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 6 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 7 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 8 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 9 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 10 ) ) ); -} - -#ifdef KOKKOS_COMPILER_GNU -#if ( KOKKOS_COMPILER_GNU == 472 ) -#define SKIP_TEST -#endif -#endif - -#ifndef SKIP_TEST -TEST_F( openmp, triple_nested_parallelism ) -{ - TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048, 32, 32 ); - TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048, 32, 16 ); - TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048, 16, 16 ); -} -#endif -} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_TeamReductionScan.cpp similarity index 55% rename from lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp rename to lib/kokkos/core/unit_test/openmp/TestOpenMP_TeamReductionScan.cpp index 82a3dd83e88c3b047525771a5dd9deca32d6d891..3e4183be8b0acce0d37cc1cf5d20e8271fbc7359 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_TeamReductionScan.cpp @@ -41,76 +41,41 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <openmp/TestOpenMP_Category.hpp> +#include <TestTeam.hpp> namespace Test { -TEST_F( cuda, view_nested_view ) -{ - ::Test::view_nested_view< Kokkos::Cuda >(); -} -TEST_F( cuda, view_remap ) +TEST_F( TEST_CATEGORY, team_scan ) { - enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3], - Kokkos::LayoutRight, - Kokkos::CudaUVMSpace > output_type; - - typedef Kokkos::View< int**[N2][N3], - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace > input_type; - - typedef Kokkos::View< int*[N0][N2][N3], - Kokkos::LayoutLeft, - Kokkos::CudaUVMSpace > diff_type; - - output_type output( "output", N0 ); - input_type input ( "input", N0, N1 ); - diff_type diff ( "diff", N0 ); - - Kokkos::fence(); - - int value = 0; - - for ( size_t i3 = 0; i3 < N3; ++i3 ) - for ( size_t i2 = 0; i2 < N2; ++i2 ) - for ( size_t i1 = 0; i1 < N1; ++i1 ) - for ( size_t i0 = 0; i0 < N0; ++i0 ) - { - input( i0, i1, i2, i3 ) = ++value; - } - - Kokkos::fence(); - - // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. - Kokkos::deep_copy( output, input ); - - Kokkos::fence(); - - value = 0; - - for ( size_t i3 = 0; i3 < N3; ++i3 ) - for ( size_t i2 = 0; i2 < N2; ++i2 ) - for ( size_t i1 = 0; i1 < N1; ++i1 ) - for ( size_t i0 = 0; i0 < N0; ++i0 ) - { - ++value; - ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); - } - - Kokkos::fence(); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); } -TEST_F( cuda, view_aggregate ) +TEST_F( TEST_CATEGORY, team_long_reduce ) { - TestViewAggregate< Kokkos::Cuda >(); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); } -TEST_F( cuda, template_meta_functions ) +TEST_F( TEST_CATEGORY, team_double_reduce ) { - TestTemplateMetaFunctions< int, Kokkos::Cuda >(); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); } } // namespace Test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b9bf7ac439a0b6ada5e01459360fa0f91b17e5c0 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp @@ -0,0 +1,83 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmp/TestOpenMP_Category.hpp> +#include <TestTeam.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, team_shared_request ) +{ + TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( TEST_CATEGORY, team_scratch_request ) +{ + TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) +TEST_F( TEST_CATEGORY, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif +#endif + +TEST_F( TEST_CATEGORY, shmem_size ) +{ + TestShmemSize< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp index c802fb79caf081b103c6e65bf54d8e20fe3b7193..d53bc77684e42f01932129333c1bf5c342622c21 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp @@ -41,84 +41,5 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> - -namespace Test { - -TEST_F( openmp, impl_shared_alloc ) -{ - test_shared_alloc< Kokkos::HostSpace, Kokkos::OpenMP >(); -} - -TEST_F( openmp, impl_view_mapping_b ) -{ - test_view_mapping_subview< Kokkos::OpenMP >(); - TestViewMappingAtomic< Kokkos::OpenMP >::run(); -} - -TEST_F( openmp, view_api ) -{ - TestViewAPI< double, Kokkos::OpenMP >(); -} - -TEST_F( openmp, view_nested_view ) -{ - ::Test::view_nested_view< Kokkos::OpenMP >(); -} - -TEST_F( openmp, view_remap ) -{ - enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3], - Kokkos::LayoutRight, - Kokkos::OpenMP > output_type; - - typedef Kokkos::View< int**[N2][N3], - Kokkos::LayoutLeft, - Kokkos::OpenMP > input_type; - - typedef Kokkos::View< int*[N0][N2][N3], - Kokkos::LayoutLeft, - Kokkos::OpenMP > diff_type; - - output_type output( "output", N0 ); - input_type input ( "input", N0, N1 ); - diff_type diff ( "diff", N0 ); - - int value = 0; - - for ( size_t i3 = 0; i3 < N3; ++i3 ) - for ( size_t i2 = 0; i2 < N2; ++i2 ) - for ( size_t i1 = 0; i1 < N1; ++i1 ) - for ( size_t i0 = 0; i0 < N0; ++i0 ) - { - input( i0, i1, i2, i3 ) = ++value; - } - - // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. - Kokkos::deep_copy( output, input ); - - value = 0; - - for ( size_t i3 = 0; i3 < N3; ++i3 ) - for ( size_t i2 = 0; i2 < N2; ++i2 ) - for ( size_t i1 = 0; i1 < N1; ++i1 ) - for ( size_t i0 = 0; i0 < N0; ++i0 ) - { - ++value; - ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); - } -} - -TEST_F( openmp, view_aggregate ) -{ - TestViewAggregate< Kokkos::OpenMP >(); -} - -TEST_F( openmp, template_meta_functions ) -{ - TestTemplateMetaFunctions< int, Kokkos::OpenMP >(); -} - -} // namespace Test +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewAPI.hpp> diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_a.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f6e7ed97982b38f46f236b9d59d1b778d50405a2 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_a.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewMapping_a.hpp> + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..793c4620c138f601c5c2c5822546d0a7093408ac --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_b.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewMapping_b.hpp> + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_subview.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_subview.cpp new file mode 100644 index 0000000000000000000000000000000000000000..09a28d95a179a3ab74d78d7527c685f2524a1c40 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_subview.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewMapping_subview.hpp> + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewOfClass.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewOfClass.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4daad9eddccb5943e90f2f99cdece484d3694d96 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewOfClass.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmp/TestOpenMP_Category.hpp> +#include <TestViewOfClass.hpp> + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda.hpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget.hpp similarity index 60% rename from lib/kokkos/core/unit_test/cuda/TestCuda.hpp rename to lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget.hpp index 768b0392048184a4e26c320f16329c07bb8caba5..9a5f4afad003cf30af0170fc37c2de603d52f59f 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda.hpp +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget.hpp @@ -41,62 +41,70 @@ //@HEADER */ -#ifndef KOKKOS_TEST_CUDA_HPP -#define KOKKOS_TEST_CUDA_HPP +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP #include <gtest/gtest.h> #include <Kokkos_Macros.hpp> + +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + #include <Kokkos_Core.hpp> #include <TestTile.hpp> -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> -#include <TestViewAPI.hpp> -#include <TestViewOfClass.hpp> -#include <TestViewSubview.hpp> -#include <TestViewSpaceAssign.hpp> -#include <TestAtomic.hpp> -#include <TestAtomicOperations.hpp> -#include <TestAtomicViews.hpp> +//#include <TestSharedAlloc.hpp> +//#include <TestViewAPI.hpp> +//#include <TestViewOfClass.hpp> +//#include <TestViewSubview.hpp> +//#include <TestAtomic.hpp> +//#include <TestAtomicOperations.hpp> +//#include <TestAtomicViews.hpp> #include <TestRange.hpp> #include <TestTeam.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestAggregate.hpp> -#include <TestCompilerMacros.hpp> -#include <TestTaskScheduler.hpp> -#include <TestMemoryPool.hpp> -#include <TestCXX11.hpp> -#include <TestCXX11Deduction.hpp> +//#include <TestReduce.hpp> +//#include <TestScan.hpp> +//#include <TestAggregate.hpp> +//#include <TestCompilerMacros.hpp> + +//TODO enable task scheduler tests for openmptarget +//#include <TestTaskScheduler.hpp> + +//#include <TestMemoryPool.hpp> +//#include <TestCXX11.hpp> +//#include <TestCXX11Deduction.hpp> #include <TestTeamVector.hpp> -#include <TestTemplateMetaFunctions.hpp> -#include <TestPolicyConstruction.hpp> -#include <TestMDRange.hpp> +//#include <TestTemplateMetaFunctions.hpp> +//#include <TestPolicyConstruction.hpp> +//#include <TestMDRange.hpp> namespace Test { -// For some reason I can only have the definition of SetUp and TearDown in one cpp file ... -class cuda : public ::testing::Test { +class openmptarget : public ::testing::Test { protected: - static void SetUpTestCase(); - static void TearDownTestCase(); -}; + static void SetUpTestCase() + { + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned openmptarget_per_core = Kokkos::hwloc::get_available_openmptarget_per_core(); -#ifdef TEST_CUDA_INSTANTIATE_SETUP_TEARDOWN -void cuda::SetUpTestCase() -{ - Kokkos::print_configuration( std::cout ); - Kokkos::HostSpace::execution_space::initialize(); - Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( 0 ) ); -} - -void cuda::TearDownTestCase() -{ - Kokkos::Cuda::finalize(); - Kokkos::HostSpace::execution_space::finalize(); -} -#endif + unsigned openmptarget_count = 0; + + openmptarget_count = std::max( 1u, numa_count ) + * std::max( 2u, cores_per_numa * openmptarget_per_core ); + + Kokkos::OpenMPTarget::initialize( openmptarget_count ); + Kokkos::print_configuration( std::cout, true /* detailed */ ); + } + + static void TearDownTestCase() + { + Kokkos::OpenMPTarget::finalize(); + } +}; } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_AtomicOperations.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_AtomicOperations.cpp new file mode 100644 index 0000000000000000000000000000000000000000..df9d6b55304ed754d9f603978124ad64f5a82c10 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_AtomicOperations.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmptarget/TestOpenMPTarget_Category.hpp> +#include<TestAtomicOperations.hpp> + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_AtomicViews.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_AtomicViews.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8dd7385e432d4d00634aadc5a8fe6346fc4ad5ba --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_AtomicViews.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmptarget/TestOpenMPTarget_Category.hpp> +#include<TestAtomicViews.hpp> + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Atomics.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Atomics.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7d2d721d4411c6f28b9643a9575fa952fb3f1fa9 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Atomics.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestAtomic.hpp> + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Category.hpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Category.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e3ac705faf1e9ca37e3628dfd42559a4378e5a30 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Category.hpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP + +#include <gtest/gtest.h> + +namespace Test { + +class openmptarget : public ::testing::Test { +protected: + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + } +}; + +} // namespace Test + +#define TEST_CATEGORY openmptarget +#define TEST_EXECSPACE Kokkos::Experimental::OpenMPTarget + +#endif diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Complex.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Complex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d2a5d233ef970e86a9e8fbabe5523eaed148b9c3 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Complex.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmptarget/TestOpenMPTarget_Category.hpp> +#include<TestComplex.hpp> + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Init.cpp similarity index 90% rename from lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp rename to lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Init.cpp index 2192159b8439a2b4fdd0fcc38b3be4d382973821..ff153b6c4709fd0c65a30e90bfe4aa78833ce7ae 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Init.cpp @@ -1,3 +1,4 @@ + /* //@HEADER // ************************************************************************ @@ -41,14 +42,9 @@ //@HEADER */ -#include <serial/TestSerial.hpp> - -namespace Test { +#include<openmptarget/TestOpenMPTarget_Category.hpp> +#include<TestInit.hpp> +#include<TestCompilerMacros.hpp> +#include<TestPolicyConstruction.hpp> -TEST_F( serial, impl_view_mapping_a ) -{ - test_view_mapping< Kokkos::Serial >(); - test_view_mapping_operator< Kokkos::Serial >(); -} -} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_MDRange.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_MDRange.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ef67dfe1c53b2c037190d5e754885c9eda3e726b --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_MDRange.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmptarget/TestOpenMPTarget_Category.hpp> +#include<TestMDRange.hpp> + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Other.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Other.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d53c1d01c9a783d60b53b25371e2e1668072448a --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Other.cpp @@ -0,0 +1,50 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmptarget/TestOpenMPTarget_Category.hpp> +#include<TestTemplateMetaFunctions.hpp> +#include<TestAggregate.hpp> +#include<TestMemoryPool.hpp> +#include<TestCXX11.hpp> +#include<TestTile.hpp> diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_RangePolicy.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_RangePolicy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8f9ade4e83fbcba61ddedfd20230195af66c8df9 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_RangePolicy.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmptarget/TestOpenMPTarget_Category.hpp> +#include<TestRange.hpp> + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Reductions.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Reductions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b9758300bc5f1e2144d116570089115d0a27f957 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Reductions.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestReduce.hpp> +#include <TestCXX11Deduction.hpp> diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Scan.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Scan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9f6c0a57f1bd7d2de7380e1b705b428729c1617b --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Scan.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<openmptarget/TestOpenMPTarget_Category.hpp> +#include<TestScan.hpp> + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SharedAlloc.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SharedAlloc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4e08046aa9853f58c5b40b3aec7aa372a0b08981 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SharedAlloc.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestSharedAlloc.hpp> + +namespace Test { + + +TEST_F( TEST_CATEGORY, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::Experimental::OpenMPTargetSpace, Kokkos::DefaultHostExecutionSpace >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_a.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4aafb9a408c7c6d45acbc2d8a3d4a8ea39e346a7 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_a.cpp @@ -0,0 +1,104 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_auto_1d_left ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_subview_auto_1d_right ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_subview_auto_1d_stride ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_subview_assign_strided ) +{ + TestViewSubview::test_1d_strided_assignment< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_subview_left_0 ) +{ + TestViewSubview::test_left_0< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_subview_left_1 ) +{ + TestViewSubview::test_left_1< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_subview_left_2 ) +{ + TestViewSubview::test_left_2< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_subview_left_3 ) +{ + TestViewSubview::test_left_3< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_subview_right_0 ) +{ + TestViewSubview::test_right_0< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_subview_right_1 ) +{ + TestViewSubview::test_right_1< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, view_subview_right_3 ) +{ + TestViewSubview::test_right_3< TEST_EXECSPACE >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_b.cpp similarity index 71% rename from lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp rename to lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_b.cpp index 27450fa6ff827dbbe6970331eca68589a423c406..d9bc921cc3635377af0ba8f8e6b1c65e91a06eef 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_b.cpp @@ -41,25 +41,23 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, impl_shared_alloc ) +TEST_F( TEST_CATEGORY, view_subview_layoutleft_to_layoutleft ) { - test_shared_alloc< Kokkos::CudaSpace, Kokkos::HostSpace::execution_space >(); - test_shared_alloc< Kokkos::CudaUVMSpace, Kokkos::HostSpace::execution_space >(); - test_shared_alloc< Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace::execution_space >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -TEST_F( cuda, impl_view_mapping_b ) +TEST_F( TEST_CATEGORY, view_subview_layoutright_to_layoutright ) { - test_view_mapping_subview< Kokkos::CudaSpace >(); - test_view_mapping_subview< Kokkos::CudaUVMSpace >(); - test_view_mapping_subview< Kokkos::CudaHostPinnedSpace >(); - TestViewMappingAtomic< Kokkos::CudaSpace >::run(); - TestViewMappingAtomic< Kokkos::CudaUVMSpace >::run(); - TestViewMappingAtomic< Kokkos::CudaHostPinnedSpace >::run(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c01.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c01.cpp new file mode 100644 index 0000000000000000000000000000000000000000..009554ea2a452f7bb37502d469f016ea17534a87 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c01.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_1d_assign ) +{ + TestViewSubview::test_1d_assign< TEST_EXECSPACE >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c02.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c02.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9993ec888e34dc20218a416bb9fa2ee662784c49 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c02.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_1d_assign_atomic ) +{ + TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c03.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c03.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dfa1aac6ba09fa770c5414eb12beb8b1b1b07e40 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c03.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_1d_assign_randomaccess ) +{ + TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c04.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c04.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9f2bafb72a6504087f1b3379cc7afe9821716397 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c04.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d ) +{ + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c05.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c05.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ca6b613f9f8433aec45d4e461ee32d044a44c33f --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c05.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( openmptarget, view_subview_2d_from_3d_atomic ) +{ + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c06.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c06.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c4e3b1f6cf8453ba8426eaae57171000cce495bc --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c06.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d_randomaccess ) +{ + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c07.cpp similarity index 90% rename from lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp rename to lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c07.cpp index b46b1e5f8173bd724c0333de776366704c23f152..d1cd0a7336dba68cb041e3d3d1fa1507e782e597 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c07.cpp @@ -41,14 +41,14 @@ //@HEADER */ -#include <cuda/TestCuda.hpp> +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( cuda, view_space_assign ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left ) { - view_space_assign< Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace >(); - view_space_assign< Kokkos::CudaSpace, Kokkos::CudaUVMSpace >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c08.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c08.cpp new file mode 100644 index 0000000000000000000000000000000000000000..15bc73c1de26793e465179fb8da9657bdb7f1f30 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c08.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_atomic ) +{ + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c09.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c09.cpp new file mode 100644 index 0000000000000000000000000000000000000000..447437528a2447c647538e9319e156eb42b02aed --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c09.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c10.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c10.cpp new file mode 100644 index 0000000000000000000000000000000000000000..60c94db90dcb12bb6ebb87cc80cd1743c8c16026 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c10.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right ) +{ + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c11.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c11.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4b486c72ec1e9be72446b45a1f7bd8a25b5ff7a6 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c11.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_atomic ) +{ + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c12.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c12.cpp new file mode 100644 index 0000000000000000000000000000000000000000..32cf5a0e16636c87b0c045b7ff1bbd124c4e2007 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c12.cpp @@ -0,0 +1,54 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewSubview.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Team.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Team.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e3c91b286a3e3ca02301b64164ead8bb56282973 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Team.cpp @@ -0,0 +1,75 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestTeam.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, team_for ) +{ + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); + + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); +} + + +TEST_F( TEST_CATEGORY, team_reduce ) +{ + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); +} +} + +#include <TestTeamVector.hpp> + + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_TeamReductionScan.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_TeamReductionScan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6d6c0237cd83102c0a11f11e89f799e76a8704d --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_TeamReductionScan.cpp @@ -0,0 +1,81 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestTeam.hpp> + +namespace Test { + + +TEST_F( TEST_CATEGORY, team_scan ) +{ + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + +TEST_F( TEST_CATEGORY, team_long_reduce ) +{ + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( TEST_CATEGORY, team_double_reduce ) +{ + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_TeamScratch.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_TeamScratch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9442782749740d06378dc59dee639331f9b33295 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_TeamScratch.cpp @@ -0,0 +1,83 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestTeam.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, team_shared_request ) +{ + TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( TEST_CATEGORY, team_scratch_request ) +{ + TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) +TEST_F( TEST_CATEGORY, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif +#endif + +TEST_F( TEST_CATEGORY, shmem_size ) +{ + TestShmemSize< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewAPI_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3e5151b681e26641f6bfdc972f7d8e8596bf2c94 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewAPI_b.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewAPI.hpp> diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_a.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8e32fa10eecc8d2d901565bf6f020f53fb920349 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_a.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewMapping_a.hpp> + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_b.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cd629d7083de0b146c9a83c4b7801cdcc17d8115 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_b.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewMapping_b.hpp> + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_subview.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_subview.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9a3457bcffa9346259d129bb7cf0618af7b5c48a --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_subview.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewMapping_subview.hpp> + diff --git a/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewOfClass.cpp b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewOfClass.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d0a38c873c8b05a1e78ec715e4da949633c511f8 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewOfClass.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <openmptarget/TestOpenMPTarget_Category.hpp> +#include <TestViewOfClass.hpp> + diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Category.hpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Category.hpp new file mode 100644 index 0000000000000000000000000000000000000000..eb2a4de4c7f9a07c312cb3731dd9568c9b6fbf04 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Category.hpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP + +#include <gtest/gtest.h> + +namespace Test { + +class qthreads : public ::testing::Test { +protected: + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + } +}; + +} // namespace Test + +#define TEST_CATEGORY qthreads +#define TEST_EXECSPACE Kokkos::Qthreads + +#endif diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Complex.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Complex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..799b8454f8d8922bd5a509ed819d3f3adda57c26 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Complex.cpp @@ -0,0 +1,3 @@ +#include<qthreads/TestQthreads_Category.hpp> +#include<TestComplex.hpp> + diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Other.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Other.cpp index 0faec84056997dd0d1236ff8c00f2218b2549cf9..4e53fecf441a901271484d76540a85f9563d1914 100644 --- a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Other.cpp +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Other.cpp @@ -111,12 +111,7 @@ TEST_F( qthreads, compiler_macros ) TEST_F( qthreads, memory_pool ) { #if 0 - bool val = TestMemoryPool::test_mempool< Kokkos::Qthreads >( 128, 128000000 ); - ASSERT_TRUE( val ); - TestMemoryPool::test_mempool2< Kokkos::Qthreads >( 64, 4, 1000000, 2000000 ); - - TestMemoryPool::test_memory_exhaustion< Kokkos::Qthreads >(); #endif } @@ -127,7 +122,8 @@ TEST_F( qthreads, memory_pool ) TEST_F( qthreads, task_fib ) { #if 0 - for ( int i = 0; i < 25; ++i ) { + const int N = 24 ; // 25 triggers tbd bug on Cuda/Pascal + for ( int i = 0; i < N; ++i ) { TestTaskScheduler::TestFib< Kokkos::Qthreads >::run( i, ( i + 1 ) * ( i + 1 ) * 10000 ); } #endif diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_AtomicOperations.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_AtomicOperations.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c6cc6a8b45df309a17c390d675953a21dccc6924 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_AtomicOperations.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<serial/TestSerial_Category.hpp> +#include<TestAtomicOperations.hpp> + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_AtomicViews.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_AtomicViews.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9bcb8f2655e386b21155a68af144f460510b4f83 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_AtomicViews.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<serial/TestSerial_Category.hpp> +#include<TestAtomicViews.hpp> + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp index 81ba532a3d45322ca561498585763d413256be3c..ed338875d065373b25a03b7f8aa5057b9bfedd34 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp @@ -41,164 +41,6 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestAtomic.hpp> -namespace Test { - -TEST_F( serial, atomics ) -{ - const int loop_count = 1e6; - - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Serial >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Serial >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Serial >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Serial >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Serial >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Serial >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Serial >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Serial >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Serial >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Serial >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Serial >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Serial >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Serial >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Serial >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Serial >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Serial >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Serial >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Serial >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Serial >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Serial >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Serial >( 100, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Serial >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Serial >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Serial >( 100, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Serial >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Serial >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Serial >( 100, 3 ) ) ); -} - -TEST_F( serial, atomic_operations ) -{ - const int start = 1; // Avoid zero for division. - const int end = 11; - - for ( int i = start; i < end; ++i ) - { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 4 ) ) ); - } -} - - -TEST_F( serial, atomic_views_integral ) -{ - const long length = 1000000; - - { - // Integral Types. - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 8 ) ) ); - } -} - -TEST_F( serial, atomic_views_nonintegral ) -{ - const long length = 1000000; - - { - // Non-Integral Types. - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 4 ) ) ); - } -} - -TEST_F( serial, atomic_view_api ) -{ - TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Serial >(); -} - -} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Category.hpp b/lib/kokkos/core/unit_test/serial/TestSerial_Category.hpp new file mode 100644 index 0000000000000000000000000000000000000000..025437f15b81ea27c9e092200369d1a018e16b42 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Category.hpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP + +#include <gtest/gtest.h> + +namespace Test { + +class serial : public ::testing::Test { +protected: + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + } +}; + +} // namespace Test + +#define TEST_CATEGORY serial +#define TEST_EXECSPACE Kokkos::Serial + +#endif diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Complex.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Complex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..01b1fcd521bbc40711cab98c3cb8016be49f874f --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Complex.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<serial/TestSerial_Category.hpp> +#include<TestComplex.hpp> + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Init.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Init.cpp new file mode 100644 index 0000000000000000000000000000000000000000..752e61b6d96adfb2d2e444bd7ea1c62189344dbf --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Init.cpp @@ -0,0 +1,50 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<serial/TestSerial_Category.hpp> +#include<TestInit.hpp> +#include<TestCompilerMacros.hpp> +#include<TestPolicyConstruction.hpp> + + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_MDRange.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_MDRange.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5dcdac84709751a22d32adb7a48979bac902d02f --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_MDRange.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<serial/TestSerial_Category.hpp> +#include<TestMDRange.hpp> + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp index b40ed3f4afc5b4176f02c2ad7d16a5ce19f2614b..a6a76a03bd3b82c714f916f76ba5e48758ce540c 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp @@ -1,3 +1,4 @@ + /* //@HEADER // ************************************************************************ @@ -41,132 +42,9 @@ //@HEADER */ -#include <serial/TestSerial.hpp> - -namespace Test { - -TEST_F( serial , mdrange_for ) -{ - TestMDRange_2D< Kokkos::Serial >::test_for2( 100, 100 ); - TestMDRange_3D< Kokkos::Serial >::test_for3( 100, 10, 100 ); - TestMDRange_4D< Kokkos::Serial >::test_for4( 100, 10, 10, 10 ); - TestMDRange_5D< Kokkos::Serial >::test_for5( 100, 10, 10, 10, 5 ); - TestMDRange_6D< Kokkos::Serial >::test_for6( 10, 10, 10, 10, 5, 5 ); -} - -TEST_F( serial , mdrange_reduce ) -{ - TestMDRange_2D< Kokkos::Serial >::test_reduce2( 100, 100 ); - TestMDRange_3D< Kokkos::Serial >::test_reduce3( 100, 10, 100 ); -} - -TEST_F( serial, policy_construction ) -{ - TestRangePolicyConstruction< Kokkos::Serial >(); - TestTeamPolicyConstruction< Kokkos::Serial >(); -} - -TEST_F( serial, range_tag ) -{ - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_scan( 0 ); - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 0 ); - - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_scan( 1000 ); - - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1001 ); - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1001 ); - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 1001 ); - TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 1000 ); -} - -//---------------------------------------------------------------------------- - -TEST_F( serial, compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Serial >() ) ); -} - -//---------------------------------------------------------------------------- - -TEST_F( serial, memory_pool ) -{ - bool val = TestMemoryPool::test_mempool< Kokkos::Serial >( 128, 128000000 ); - ASSERT_TRUE( val ); - - TestMemoryPool::test_mempool2< Kokkos::Serial >( 64, 4, 1000000, 2000000 ); - - TestMemoryPool::test_memory_exhaustion< Kokkos::Serial >(); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_TASKDAG ) - -TEST_F( serial, task_fib ) -{ - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestFib< Kokkos::Serial >::run( i ); - } -} - -TEST_F( serial, task_depend ) -{ - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::Serial >::run( i ); - } -} - -TEST_F( serial, task_team ) -{ - TestTaskScheduler::TestTaskTeam< Kokkos::Serial >::run( 1000 ); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::Serial >::run( 1000 ); // Put back after testing. -} - -#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) -TEST_F( serial, cxx11 ) -{ - if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Serial >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 1 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 2 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 3 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 4 ) ) ); - } -} -#endif - -TEST_F( serial, tile_layout ) -{ - TestTile::test< Kokkos::Serial, 1, 1 >( 1, 1 ); - TestTile::test< Kokkos::Serial, 1, 1 >( 2, 3 ); - TestTile::test< Kokkos::Serial, 1, 1 >( 9, 10 ); - - TestTile::test< Kokkos::Serial, 2, 2 >( 1, 1 ); - TestTile::test< Kokkos::Serial, 2, 2 >( 2, 3 ); - TestTile::test< Kokkos::Serial, 2, 2 >( 4, 4 ); - TestTile::test< Kokkos::Serial, 2, 2 >( 9, 9 ); - - TestTile::test< Kokkos::Serial, 2, 4 >( 9, 9 ); - TestTile::test< Kokkos::Serial, 4, 2 >( 9, 9 ); - - TestTile::test< Kokkos::Serial, 4, 4 >( 1, 1 ); - TestTile::test< Kokkos::Serial, 4, 4 >( 4, 4 ); - TestTile::test< Kokkos::Serial, 4, 4 >( 9, 9 ); - TestTile::test< Kokkos::Serial, 4, 4 >( 9, 11 ); - - TestTile::test< Kokkos::Serial, 8, 8 >( 1, 1 ); - TestTile::test< Kokkos::Serial, 8, 8 >( 4, 4 ); - TestTile::test< Kokkos::Serial, 8, 8 >( 9, 9 ); - TestTile::test< Kokkos::Serial, 8, 8 >( 9, 11 ); -} - -} // namespace Test +#include<serial/TestSerial_Category.hpp> +#include<TestTemplateMetaFunctions.hpp> +#include<TestAggregate.hpp> +#include<TestMemoryPool.hpp> +#include<TestCXX11.hpp> +#include<TestTile.hpp> diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_RangePolicy.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_RangePolicy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ed9242925f00cafc1663e1fdd59ba7fddb41f2a7 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_RangePolicy.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<serial/TestSerial_Category.hpp> +#include<TestRange.hpp> + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp index 8a3d518cfbea93b97d9a885ac061a79494676362..db41a19ee6a7df5f081dc70797c95b1cbed795b8 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp @@ -41,89 +41,7 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestReduce.hpp> +#include <TestCXX11Deduction.hpp> -namespace Test { - -TEST_F( serial, long_reduce ) -{ - TestReduce< long, Kokkos::Serial >( 0 ); - TestReduce< long, Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, double_reduce ) -{ - TestReduce< double, Kokkos::Serial >( 0 ); - TestReduce< double, Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, reducers ) -{ - TestReducers< int, Kokkos::Serial >::execute_integer(); - TestReducers< size_t, Kokkos::Serial >::execute_integer(); - TestReducers< double, Kokkos::Serial >::execute_float(); - TestReducers< Kokkos::complex<double >, Kokkos::Serial>::execute_basic(); -} - -TEST_F( serial, long_reduce_dynamic ) -{ - TestReduceDynamic< long, Kokkos::Serial >( 0 ); - TestReduceDynamic< long, Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, double_reduce_dynamic ) -{ - TestReduceDynamic< double, Kokkos::Serial >( 0 ); - TestReduceDynamic< double, Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, long_reduce_dynamic_view ) -{ - TestReduceDynamicView< long, Kokkos::Serial >( 0 ); - TestReduceDynamicView< long, Kokkos::Serial >( 1000000 ); -} - -TEST_F( serial, scan ) -{ - TestScan< Kokkos::Serial >::test_range( 1, 1000 ); - TestScan< Kokkos::Serial >( 0 ); - TestScan< Kokkos::Serial >( 10 ); - TestScan< Kokkos::Serial >( 10000 ); -} - -TEST_F( serial, team_scan ) -{ - TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); -} - -TEST_F( serial, team_long_reduce ) -{ - TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( serial, team_double_reduce ) -{ - TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( serial, reduction_deduction ) -{ - TestCXX11::test_reduction_deduction< Kokkos::Serial >(); -} - -} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Scan.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Scan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a368a80894dc400358c95dfe6c6fd236ffb5a8ed --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Scan.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<serial/TestSerial_Category.hpp> +#include<TestScan.hpp> + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SharedAlloc.cpp similarity index 91% rename from lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp rename to lib/kokkos/core/unit_test/serial/TestSerial_SharedAlloc.cpp index aead381a11e5b5a88763d9622deac55c3ceaf631..1b9786191fd442a3c42aa97e2dd75d3981d28fca 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SharedAlloc.cpp @@ -41,14 +41,15 @@ //@HEADER */ -#include <openmp/TestOpenMP.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestSharedAlloc.hpp> namespace Test { -TEST_F( openmp, impl_view_mapping_a ) + +TEST_F( TEST_CATEGORY, impl_shared_alloc ) { - test_view_mapping< Kokkos::OpenMP >(); - test_view_mapping_operator< Kokkos::OpenMP >(); + test_shared_alloc< Kokkos::HostSpace, TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp index 3dc3e2019d9fd3927f422c689bfbd65fc45a997b..3fffcc74149e9e3bd2e6d93c38f42a561682989a 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp @@ -41,63 +41,64 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_auto_1d_left ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Serial >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, TEST_EXECSPACE >(); } -TEST_F( serial, view_subview_auto_1d_right ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Serial >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, TEST_EXECSPACE >(); } -TEST_F( serial, view_subview_auto_1d_stride ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Serial >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, TEST_EXECSPACE >(); } -TEST_F( serial, view_subview_assign_strided ) +TEST_F( TEST_CATEGORY, view_subview_assign_strided ) { - TestViewSubview::test_1d_strided_assignment< Kokkos::Serial >(); + TestViewSubview::test_1d_strided_assignment< TEST_EXECSPACE >(); } -TEST_F( serial, view_subview_left_0 ) +TEST_F( TEST_CATEGORY, view_subview_left_0 ) { - TestViewSubview::test_left_0< Kokkos::Serial >(); + TestViewSubview::test_left_0< TEST_EXECSPACE >(); } -TEST_F( serial, view_subview_left_1 ) +TEST_F( TEST_CATEGORY, view_subview_left_1 ) { - TestViewSubview::test_left_1< Kokkos::Serial >(); + TestViewSubview::test_left_1< TEST_EXECSPACE >(); } -TEST_F( serial, view_subview_left_2 ) +TEST_F( TEST_CATEGORY, view_subview_left_2 ) { - TestViewSubview::test_left_2< Kokkos::Serial >(); + TestViewSubview::test_left_2< TEST_EXECSPACE >(); } -TEST_F( serial, view_subview_left_3 ) +TEST_F( TEST_CATEGORY, view_subview_left_3 ) { - TestViewSubview::test_left_3< Kokkos::Serial >(); + TestViewSubview::test_left_3< TEST_EXECSPACE >(); } -TEST_F( serial, view_subview_right_0 ) +TEST_F( TEST_CATEGORY, view_subview_right_0 ) { - TestViewSubview::test_right_0< Kokkos::Serial >(); + TestViewSubview::test_right_0< TEST_EXECSPACE >(); } -TEST_F( serial, view_subview_right_1 ) +TEST_F( TEST_CATEGORY, view_subview_right_1 ) { - TestViewSubview::test_right_1< Kokkos::Serial >(); + TestViewSubview::test_right_1< TEST_EXECSPACE >(); } -TEST_F( serial, view_subview_right_3 ) +TEST_F( TEST_CATEGORY, view_subview_right_3 ) { - TestViewSubview::test_right_3< Kokkos::Serial >(); + TestViewSubview::test_right_3< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp index 536c3bf1979a5b3b9bc33cd8768a86ca3367a8c7..d49a2ba9c5c938b5b7253105cb6db7ed55194061 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp @@ -41,22 +41,23 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_layoutleft_to_layoutleft ) +TEST_F( TEST_CATEGORY, view_subview_layoutleft_to_layoutleft ) { - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -TEST_F( serial, view_subview_layoutright_to_layoutright ) +TEST_F( TEST_CATEGORY, view_subview_layoutright_to_layoutright ) { - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp index 579a12bf782a34c4739c9e4a30685878dc55900e..173fb7f68c193a7815fcd91a887ceeffd63ca29b 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_1d_assign ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign ) { - TestViewSubview::test_1d_assign< Kokkos::Serial >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp index ff009fef27715a8b366e848267eaa4c6c10bc2d7..88594c70d3ffc9c6c13cf3d8314f0af195320513 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_1d_assign_atomic ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp index a20478433cd2b87f0e07a0e793143c4f6f2ddf40..5c754ecdeea10c1710fd5dcabbf370ba6027e70e 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_1d_assign_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp index a34b26d9f79317b90dd0bfaf06385ad638d4757f..6b5ab72d9095f1c6f3e77c0b417f3316d4e67c0d 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_2d_from_3d ) +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Serial >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp index 6d1882cf04e3d384773d384215cd0244ebd8cfcd..27cb44dcc773cf77efdfe707b7f0c3823e8edad1 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { TEST_F( serial, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp index 12fb883b63e12812c947facc4b070c0577d09783..ce15de135c96279394d609afe8653810491736dc 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_2d_from_3d_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp index 8aae20c0239d5a6272879887c7626f0e1a0e2f2a..68694038e5377411d5d43a0ad294363a14e769bd 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_left ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp index e75db8d52dc1250b582d62c7e51b6bda8ce00b9b..5cdfd892ce8dcc70869e619648eff264ea70befb 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_left_atomic ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp index b9cea2ce89c6f2bb311299ee6463ac34185245d8..4d08803fc229bf78d6b519b6bb084cf03783f074 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_left_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp index e5dbcead376ebdcb37a4bb79dfdfe1916b3e2d0d..938d1e1dccee7188e4a912a9b049ceb87a789d84 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_right ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp index 3005030f934551a0f8ea5d6be7772cfefa605a98..6166d3a684c022fc364d67469685d0e54c2e0e30 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_right_atomic ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp index fee8cb7af2a20cdebafa9270932cda2457363602..6bdb0e943bffd5fa87bd3aae7b7839ac1feb7404 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_right_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Task.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Task.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a943334cd53493775afabc50f67389e86e1d4743 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Task.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<serial/TestSerial_Category.hpp> +#include<TestTaskScheduler.hpp> + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp index f13b2ce1b4bd20e92509fc9dc1801352ff3bb289..fc0520721038999fef9f3b5026c5d936e7915038 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp @@ -41,82 +41,35 @@ //@HEADER */ -#include <serial/TestSerial.hpp> +#include <serial/TestSerial_Category.hpp> +#include <TestTeam.hpp> namespace Test { -TEST_F( serial, team_tag ) +TEST_F( TEST_CATEGORY, team_for ) { - TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); - TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); - TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); - TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); - TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); -} - -TEST_F( serial, team_shared_request ) -{ - TestSharedTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >(); -} - -TEST_F( serial, team_scratch_request ) -{ - TestScratchTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >(); - TestScratchTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >(); -} + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) -TEST_F( serial, team_lambda_shared_request ) -{ - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >(); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); } -#endif -TEST_F( serial, shmem_size ) -{ - TestShmemSize< Kokkos::Serial >(); -} -TEST_F( serial, multi_level_scratch ) +TEST_F( TEST_CATEGORY, team_reduce ) { - TestMultiLevelScratchTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >(); - TestMultiLevelScratchTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >(); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } - -TEST_F( serial, team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 0 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 1 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 2 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 3 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 4 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 5 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 6 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 7 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 8 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 9 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 10 ) ) ); } -#ifdef KOKKOS_COMPILER_GNU -#if ( KOKKOS_COMPILER_GNU == 472 ) -#define SKIP_TEST -#endif -#endif +#include <TestTeamVector.hpp> -#ifndef SKIP_TEST -TEST_F( serial, triple_nested_parallelism ) -{ - TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048, 32, 32 ); - TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048, 32, 16 ); - TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048, 16, 16 ); -} -#endif -} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_TeamReductionScan.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_TeamReductionScan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5076b380bda5145493f9d090bbaabf2a3502a779 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_TeamReductionScan.cpp @@ -0,0 +1,81 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <serial/TestSerial_Category.hpp> +#include <TestTeam.hpp> + +namespace Test { + + +TEST_F( TEST_CATEGORY, team_scan ) +{ + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + +TEST_F( TEST_CATEGORY, team_long_reduce ) +{ + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( TEST_CATEGORY, team_double_reduce ) +{ + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8fa823920102dfb241b37da5e6b65a8df64e5b97 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp @@ -0,0 +1,83 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <serial/TestSerial_Category.hpp> +#include <TestTeam.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, team_shared_request ) +{ + TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( TEST_CATEGORY, team_scratch_request ) +{ + TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) +TEST_F( TEST_CATEGORY, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif +#endif + +TEST_F( TEST_CATEGORY, shmem_size ) +{ + TestShmemSize< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp index 8c48ad2ceda81ca46913e3d3206fac96e492950a..6f657d1da8188df2cf75fcabdf13fd67c79f035a 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp @@ -41,84 +41,5 @@ //@HEADER */ -#include <serial/TestSerial.hpp> - -namespace Test { - -TEST_F( serial, impl_shared_alloc ) -{ - test_shared_alloc< Kokkos::HostSpace, Kokkos::Serial >(); -} - -TEST_F( serial, impl_view_mapping_b ) -{ - test_view_mapping_subview< Kokkos::Serial >(); - TestViewMappingAtomic< Kokkos::Serial >::run(); -} - -TEST_F( serial, view_api ) -{ - TestViewAPI< double, Kokkos::Serial >(); -} - -TEST_F( serial, view_nested_view ) -{ - ::Test::view_nested_view< Kokkos::Serial >(); -} - -TEST_F( serial, view_remap ) -{ - enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3], - Kokkos::LayoutRight, - Kokkos::Serial > output_type; - - typedef Kokkos::View< int**[N2][N3], - Kokkos::LayoutLeft, - Kokkos::Serial > input_type; - - typedef Kokkos::View< int*[N0][N2][N3], - Kokkos::LayoutLeft, - Kokkos::Serial > diff_type; - - output_type output( "output", N0 ); - input_type input ( "input", N0, N1 ); - diff_type diff ( "diff", N0 ); - - int value = 0; - - for ( size_t i3 = 0; i3 < N3; ++i3 ) - for ( size_t i2 = 0; i2 < N2; ++i2 ) - for ( size_t i1 = 0; i1 < N1; ++i1 ) - for ( size_t i0 = 0; i0 < N0; ++i0 ) - { - input( i0, i1, i2, i3 ) = ++value; - } - - // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. - Kokkos::deep_copy( output, input ); - - value = 0; - - for ( size_t i3 = 0; i3 < N3; ++i3 ) - for ( size_t i2 = 0; i2 < N2; ++i2 ) - for ( size_t i1 = 0; i1 < N1; ++i1 ) - for ( size_t i0 = 0; i0 < N0; ++i0 ) - { - ++value; - ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); - } -} - -TEST_F( serial, view_aggregate ) -{ - TestViewAggregate< Kokkos::Serial >(); -} - -TEST_F( serial, template_meta_functions ) -{ - TestTemplateMetaFunctions< int, Kokkos::Serial >(); -} - -} // namespace Test +#include <serial/TestSerial_Category.hpp> +#include <TestViewAPI.hpp> diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f5058d88fd801f41bce799a1af35973d439b5ec8 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_a.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <serial/TestSerial_Category.hpp> +#include <TestViewMapping_a.hpp> + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6d5af0c975317093b993c9924c53ff0956e43b00 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_b.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <serial/TestSerial_Category.hpp> +#include <TestViewMapping_b.hpp> + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_subview.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_subview.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e9ae239942ca9db4f6eb4581ebca9f98d475b63b --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_subview.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <serial/TestSerial_Category.hpp> +#include <TestViewMapping_subview.hpp> + diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewOfClass.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewOfClass.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c5eeafaf3b5af4527dfdb7de061b35e4feba81b9 --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewOfClass.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <serial/TestSerial_Category.hpp> +#include <TestViewOfClass.hpp> + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads.hpp b/lib/kokkos/core/unit_test/threads/TestThreads.hpp index 0afd6772fefff3e2efd7d490d35f985346163fd6..2b4c7c1518daf20a34c0ccab55944e093e8be11d 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads.hpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads.hpp @@ -56,28 +56,30 @@ #include <Kokkos_Core.hpp> #include <TestTile.hpp> -#include <TestSharedAlloc.hpp> -#include <TestViewMapping.hpp> -#include <TestViewAPI.hpp> -#include <TestViewOfClass.hpp> -#include <TestViewSubview.hpp> -#include <TestAtomic.hpp> -#include <TestAtomicOperations.hpp> -#include <TestAtomicViews.hpp> +//#include <TestSharedAlloc.hpp> +//#include <TestViewAPI.hpp> +//#include <TestViewOfClass.hpp> +//#include <TestViewSubview.hpp> +//#include <TestAtomic.hpp> +//#include <TestAtomicOperations.hpp> +//#include <TestAtomicViews.hpp> #include <TestRange.hpp> #include <TestTeam.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestAggregate.hpp> -#include <TestCompilerMacros.hpp> -#include <TestTaskScheduler.hpp> -#include <TestMemoryPool.hpp> -#include <TestCXX11.hpp> -#include <TestCXX11Deduction.hpp> +//#include <TestReduce.hpp> +//#include <TestScan.hpp> +//#include <TestAggregate.hpp> +//#include <TestCompilerMacros.hpp> + +//TODO enable task scheduler tests for threads +//#include <TestTaskScheduler.hpp> + +//#include <TestMemoryPool.hpp> +//#include <TestCXX11.hpp> +//#include <TestCXX11Deduction.hpp> #include <TestTeamVector.hpp> -#include <TestTemplateMetaFunctions.hpp> -#include <TestPolicyConstruction.hpp> -#include <TestMDRange.hpp> +//#include <TestTemplateMetaFunctions.hpp> +//#include <TestPolicyConstruction.hpp> +//#include <TestMDRange.hpp> namespace Test { diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_AtomicOperations.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_AtomicOperations.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aaa568717fb8db99d495c03ef3b18b9f825a21e5 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_AtomicOperations.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<threads/TestThreads_Category.hpp> +#include<TestAtomicOperations.hpp> + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_AtomicViews.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_AtomicViews.cpp new file mode 100644 index 0000000000000000000000000000000000000000..864ba89d7019a29e73d35f0adf572cfd853401ce --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_AtomicViews.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<threads/TestThreads_Category.hpp> +#include<TestAtomicViews.hpp> + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp index d2a5ea5d6352acc79606082fd75c465b0b5b515e..c7fc35a2698758761e19aafafd3edee0d4315bcc 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp @@ -41,160 +41,6 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestAtomic.hpp> -namespace Test { - -TEST_F( threads, atomics ) -{ - const int loop_count = 1e4; - - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Threads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Threads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Threads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Threads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Threads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Threads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Threads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Threads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Threads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Threads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Threads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Threads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Threads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Threads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Threads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Threads >( loop_count, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Threads >( loop_count, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Threads >( loop_count, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Threads >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Threads >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Threads >( 100, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Threads >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Threads >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Threads >( 100, 3 ) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Threads >( 100, 1 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Threads >( 100, 2 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Threads >( 100, 3 ) ) ); -} - -TEST_F( threads, atomic_operations ) -{ - const int start = 1; // Avoid zero for division. - const int end = 11; - for ( int i = start; i < end; ++i ) - { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 4 ) ) ); - } -} - -TEST_F( threads, atomic_views_integral ) -{ - const long length = 1000000; - { - // Integral Types. - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 8 ) ) ); - } -} - -TEST_F( threads, atomic_views_nonintegral ) -{ - const long length = 1000000; - { - // Non-Integral Types. - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 4 ) ) ); - } -} - -TEST_F( threads, atomic_view_api ) -{ - TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Threads >(); -} - -} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Category.hpp b/lib/kokkos/core/unit_test/threads/TestThreads_Category.hpp new file mode 100644 index 0000000000000000000000000000000000000000..01a990b654f7199675a3507b85db4a5fa01624e3 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Category.hpp @@ -0,0 +1,65 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_THREADS_HPP +#define KOKKOS_TEST_THREADS_HPP + +#include <gtest/gtest.h> + +namespace Test { + +class threads : public ::testing::Test { +protected: + static void SetUpTestCase() { + } + + static void TearDownTestCase() { + } +}; + +} // namespace Test + +#define TEST_CATEGORY threads +#define TEST_EXECSPACE Kokkos::Threads + +#endif diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Complex.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Complex.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e2cd66a33cbe08bb1f961ebf0c1e23ccd19ac8ef --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Complex.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<threads/TestThreads_Category.hpp> +#include<TestComplex.hpp> + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Init.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Init.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0abd6413280d0c7ecc6b1b1d58216726537fcb2e --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Init.cpp @@ -0,0 +1,50 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<threads/TestThreads_Category.hpp> +#include<TestInit.hpp> +#include<TestCompilerMacros.hpp> +#include<TestPolicyConstruction.hpp> + + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_MDRange.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_MDRange.cpp new file mode 100644 index 0000000000000000000000000000000000000000..46d7126093e56061e53b4a4530d0425fdcd5663d --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_MDRange.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<threads/TestThreads_Category.hpp> +#include<TestMDRange.hpp> + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp index 7d268c14547e4680c1ad57d8e66e2b1a4bfaf501..c11155c5c0c1d55dcc580faa12bcd3646d5d55db 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp @@ -1,3 +1,4 @@ + /* //@HEADER // ************************************************************************ @@ -41,156 +42,9 @@ //@HEADER */ -#include <threads/TestThreads.hpp> - -namespace Test { - -TEST_F( threads, init ) -{ - ; -} - -TEST_F( threads , mdrange_for ) { - TestMDRange_2D< Kokkos::Threads >::test_for2( 100, 100 ); - TestMDRange_3D< Kokkos::Threads >::test_for3( 100, 10, 100 ); - TestMDRange_4D< Kokkos::Threads >::test_for4( 100, 10, 10, 10 ); - TestMDRange_5D< Kokkos::Threads >::test_for5( 100, 10, 10, 10, 5 ); - TestMDRange_6D< Kokkos::Threads >::test_for6( 10, 10, 10, 10, 5, 5 ); -} - -TEST_F( threads , mdrange_reduce ) { - TestMDRange_2D< Kokkos::Threads >::test_reduce2( 100, 100 ); - TestMDRange_3D< Kokkos::Threads >::test_reduce3( 100, 10, 100 ); -} - -TEST_F( threads, policy_construction ) -{ - TestRangePolicyConstruction< Kokkos::Threads >(); - TestTeamPolicyConstruction< Kokkos::Threads >(); -} - -TEST_F( threads, range_tag ) -{ - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_scan( 0 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 0 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 0 ); - - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_scan( 2 ); - - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 3 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 3 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 3 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 3 ); - - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_scan( 1000 ); - - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1001 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1001 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 1001 ); - TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 1000 ); -} - -//---------------------------------------------------------------------------- - -TEST_F( threads, compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Threads >() ) ); -} - -//---------------------------------------------------------------------------- - -TEST_F( threads, memory_pool ) -{ - bool val = TestMemoryPool::test_mempool< Kokkos::Threads >( 128, 128000000 ); - ASSERT_TRUE( val ); - - TestMemoryPool::test_mempool2< Kokkos::Threads >( 64, 4, 1000000, 2000000 ); - - TestMemoryPool::test_memory_exhaustion< Kokkos::Threads >(); -} - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_TASKDAG ) -/* -TEST_F( threads, task_fib ) -{ - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestFib< Kokkos::Threads >::run( i ); - } -} - -TEST_F( threads, task_depend ) -{ - for ( int i = 0; i < 25; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::Threads >::run( i ); - } -} - -TEST_F( threads, task_team ) -{ - TestTaskScheduler::TestTaskTeam< Kokkos::Threads >::run( 1000 ); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::Threads >::run( 1000 ); // Put back after testing. -} -*/ -#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ - -//---------------------------------------------------------------------------- - -#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) -TEST_F( threads, cxx11 ) -{ - if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Threads >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 1 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 2 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 3 ) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 4 ) ) ); - } -} -#endif - -TEST_F( threads, tile_layout ) -{ - TestTile::test< Kokkos::Threads, 1, 1 >( 1, 1 ); - TestTile::test< Kokkos::Threads, 1, 1 >( 2, 3 ); - TestTile::test< Kokkos::Threads, 1, 1 >( 9, 10 ); - - TestTile::test< Kokkos::Threads, 2, 2 >( 1, 1 ); - TestTile::test< Kokkos::Threads, 2, 2 >( 2, 3 ); - TestTile::test< Kokkos::Threads, 2, 2 >( 4, 4 ); - TestTile::test< Kokkos::Threads, 2, 2 >( 9, 9 ); - - TestTile::test< Kokkos::Threads, 2, 4 >( 9, 9 ); - TestTile::test< Kokkos::Threads, 4, 2 >( 9, 9 ); - - TestTile::test< Kokkos::Threads, 4, 4 >( 1, 1 ); - TestTile::test< Kokkos::Threads, 4, 4 >( 4, 4 ); - TestTile::test< Kokkos::Threads, 4, 4 >( 9, 9 ); - TestTile::test< Kokkos::Threads, 4, 4 >( 9, 11 ); - - TestTile::test< Kokkos::Threads, 8, 8 >( 1, 1 ); - TestTile::test< Kokkos::Threads, 8, 8 >( 4, 4 ); - TestTile::test< Kokkos::Threads, 8, 8 >( 9, 9 ); - TestTile::test< Kokkos::Threads, 8, 8 >( 9, 11 ); -} - -TEST_F( threads, dispatch ) -{ - const int repeat = 100; - for ( int i = 0; i < repeat; ++i ) { - for ( int j = 0; j < repeat; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >( 0, j ) - , KOKKOS_LAMBDA( int ) {} ); - } - } -} - -} // namespace Test +#include<threads/TestThreads_Category.hpp> +#include<TestTemplateMetaFunctions.hpp> +#include<TestAggregate.hpp> +#include<TestMemoryPool.hpp> +#include<TestCXX11.hpp> +#include<TestTile.hpp> diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_RangePolicy.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_RangePolicy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9eed794dec779bef351456a5248a0eb2f9cfdbfb --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_RangePolicy.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<threads/TestThreads_Category.hpp> +#include<TestRange.hpp> + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp index d2b75ca892b5abcf3f405aec37459f53c2a3aafc..c4b3850120be86364de95ec0217acfa1137c7e07 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp @@ -41,106 +41,6 @@ //@HEADER */ -#include <threads/TestThreads.hpp> - -namespace Test { - -TEST_F( threads, long_reduce ) -{ - TestReduce< long, Kokkos::Threads >( 0 ); - TestReduce< long, Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, double_reduce ) -{ - TestReduce< double, Kokkos::Threads >( 0 ); - TestReduce< double, Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, reducers ) -{ - TestReducers< int, Kokkos::Threads >::execute_integer(); - TestReducers< size_t, Kokkos::Threads >::execute_integer(); - TestReducers< double, Kokkos::Threads >::execute_float(); - TestReducers< Kokkos::complex<double>, Kokkos::Threads >::execute_basic(); -} - -TEST_F( threads, long_reduce_dynamic ) -{ - TestReduceDynamic< long, Kokkos::Threads >( 0 ); - TestReduceDynamic< long, Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, double_reduce_dynamic ) -{ - TestReduceDynamic< double, Kokkos::Threads >( 0 ); - TestReduceDynamic< double, Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, long_reduce_dynamic_view ) -{ - TestReduceDynamicView< long, Kokkos::Threads >( 0 ); - TestReduceDynamicView< long, Kokkos::Threads >( 1000000 ); -} - -TEST_F( threads, scan ) -{ - TestScan< Kokkos::Threads >::test_range( 1, 1000 ); - TestScan< Kokkos::Threads >( 0 ); - TestScan< Kokkos::Threads >( 100000 ); - TestScan< Kokkos::Threads >( 10000000 ); - Kokkos::Threads::fence(); -} - -#if 0 -TEST_F( threads, scan_small ) -{ - typedef TestScan< Kokkos::Threads, Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor; - - for ( int i = 0; i < 1000; ++i ) { - TestScanFunctor( 10 ); - TestScanFunctor( 10000 ); - } - TestScanFunctor( 1000000 ); - TestScanFunctor( 10000000 ); - - Kokkos::Threads::fence(); -} -#endif - -TEST_F( threads, team_scan ) -{ - TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); -} - -TEST_F( threads, team_long_reduce ) -{ - TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( threads, team_double_reduce ) -{ - TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); -} - -TEST_F( threads, reduction_deduction ) -{ - TestCXX11::test_reduction_deduction< Kokkos::Threads >(); -} - -} // namespace Test +#include <threads/TestThreads_Category.hpp> +#include <TestReduce.hpp> +#include <TestCXX11Deduction.hpp> diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Scan.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Scan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..11e99a7893386229a4516c470bc0b6babc8ba7f9 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Scan.cpp @@ -0,0 +1,47 @@ + +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include<threads/TestThreads_Category.hpp> +#include<TestScan.hpp> + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SharedAlloc.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SharedAlloc.cpp new file mode 100644 index 0000000000000000000000000000000000000000..75a824d5daeafe1a188020b9be768083a4020ddd --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SharedAlloc.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <threads/TestThreads_Category.hpp> +#include <TestSharedAlloc.hpp> + +namespace Test { + + +TEST_F( TEST_CATEGORY, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::HostSpace, TEST_EXECSPACE >(); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp index 68a9da6aedef550e94c037df93ff6dc741ff3589..c78714feba817dad40282bf7510bea8bf9bff89d 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp @@ -41,63 +41,64 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_auto_1d_left ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Threads >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, TEST_EXECSPACE >(); } -TEST_F( threads, view_subview_auto_1d_right ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Threads >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, TEST_EXECSPACE >(); } -TEST_F( threads, view_subview_auto_1d_stride ) +TEST_F( TEST_CATEGORY, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Threads >(); + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, TEST_EXECSPACE >(); } -TEST_F( threads, view_subview_assign_strided ) +TEST_F( TEST_CATEGORY, view_subview_assign_strided ) { - TestViewSubview::test_1d_strided_assignment< Kokkos::Threads >(); + TestViewSubview::test_1d_strided_assignment< TEST_EXECSPACE >(); } -TEST_F( threads, view_subview_left_0 ) +TEST_F( TEST_CATEGORY, view_subview_left_0 ) { - TestViewSubview::test_left_0< Kokkos::Threads >(); + TestViewSubview::test_left_0< TEST_EXECSPACE >(); } -TEST_F( threads, view_subview_left_1 ) +TEST_F( TEST_CATEGORY, view_subview_left_1 ) { - TestViewSubview::test_left_1< Kokkos::Threads >(); + TestViewSubview::test_left_1< TEST_EXECSPACE >(); } -TEST_F( threads, view_subview_left_2 ) +TEST_F( TEST_CATEGORY, view_subview_left_2 ) { - TestViewSubview::test_left_2< Kokkos::Threads >(); + TestViewSubview::test_left_2< TEST_EXECSPACE >(); } -TEST_F( threads, view_subview_left_3 ) +TEST_F( TEST_CATEGORY, view_subview_left_3 ) { - TestViewSubview::test_left_3< Kokkos::Threads >(); + TestViewSubview::test_left_3< TEST_EXECSPACE >(); } -TEST_F( threads, view_subview_right_0 ) +TEST_F( TEST_CATEGORY, view_subview_right_0 ) { - TestViewSubview::test_right_0< Kokkos::Threads >(); + TestViewSubview::test_right_0< TEST_EXECSPACE >(); } -TEST_F( threads, view_subview_right_1 ) +TEST_F( TEST_CATEGORY, view_subview_right_1 ) { - TestViewSubview::test_right_1< Kokkos::Threads >(); + TestViewSubview::test_right_1< TEST_EXECSPACE >(); } -TEST_F( threads, view_subview_right_3 ) +TEST_F( TEST_CATEGORY, view_subview_right_3 ) { - TestViewSubview::test_right_3< Kokkos::Threads >(); + TestViewSubview::test_right_3< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp index c5cf061e8289d9d8ac5ffea92d38c9cd91349922..6cea72c31bfcc5fee6bb990684baa3e41f3c3795 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp @@ -41,22 +41,23 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_layoutleft_to_layoutleft ) +TEST_F( TEST_CATEGORY, view_subview_layoutleft_to_layoutleft ) { - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -TEST_F( threads, view_subview_layoutright_to_layoutright ) +TEST_F( TEST_CATEGORY, view_subview_layoutright_to_layoutright ) { - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp index 9018c1f4f799c1f76ee082c57dedc644627c7a75..4ff2f871bc5212e8a25ec89a200632deda3d1f89 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_1d_assign ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign ) { - TestViewSubview::test_1d_assign< Kokkos::Threads >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp index 9483abd9cc3f78430f2234c71708fe0315a949a9..e522def61492e7bb5c29c5c8c2c3f79d098dc8e2 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_1d_assign_atomic ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp index e252a26565bf6dad6387b87340c5c93cd2b3415f..bb3f9a6106f4c251294e41867ec3d151b10fe021 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_1d_assign_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_1d_assign< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp index 3e211b1a58542b6307a731c3765190e91132d4dd..8cc26b5ce72b6d874237c235f2c294a7e0ff05bf 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_2d_from_3d ) +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Threads >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp index 865d50b1a1b918b99fb36d2a3e5c889a7c93e5a7..9d5824ba98f569da0a1fd0a0460597716ba30a4e 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { TEST_F( threads, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp index c5840073b6486226281942bfd0c0ad8e2052ff85..9e6426d48e32c1b63e4d887b909a0c67c02f3d38 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_2d_from_3d_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_2d_subview_3d< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp index 7b8825ef628dbaa4449f7830abd4e227d842dccc..9671df9ddab5ea1c96c17db2398f8f93cfecbe3e 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_left ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp index 7bc16a5827a602193db55f7ffa044b38babef77d..96f6a808d3a733cbf2117b3b2d7a14ffc69640ea 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_left_atomic ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp index 57b87b6098bdd818c8e215ffb1d5938043746494..d837cc086b0b66fa5ccfd961bdc75a98303486e7 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_left_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_3d_subview_5d_left< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp index 1875a883d485e1620430cadc59c09554dfc00ac1..84b90c76e501e95fd25b9c7e81eaccfcfc22b5be 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_right ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp index cf6428b18e333d66f4637fc92a45dc7f51052cc6..aafbfd01d3d3f0a6b2231508decb2c90ca745155 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_right_atomic ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp index 7060fdb273c928d7346686c54d0a374188c47257..2b855258a690616b082117a6ddf816dffc3ee450 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp @@ -41,13 +41,14 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestViewSubview.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_right_randomaccess ) +TEST_F( TEST_CATEGORY, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_3d_subview_5d_right< TEST_EXECSPACE, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } } // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp index d802d658309b4ecfbd28a5ec4ce6d17edc4a5f4a..1da6fb8fe6b0eb7648a5e5fd7d2350a2fce21588 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp @@ -41,87 +41,35 @@ //@HEADER */ -#include <threads/TestThreads.hpp> +#include <threads/TestThreads_Category.hpp> +#include <TestTeam.hpp> namespace Test { -TEST_F( threads, team_tag ) +TEST_F( TEST_CATEGORY, team_for ) { - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); - TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); } -TEST_F( threads, team_shared_request ) -{ - TestSharedTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >(); -} -TEST_F( threads, team_scratch_request ) +TEST_F( TEST_CATEGORY, team_reduce ) { - TestScratchTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >(); - TestScratchTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >(); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestTeamPolicy< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } - -#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) -TEST_F( threads, team_lambda_shared_request ) -{ - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >(); } -#endif -TEST_F( threads, shmem_size ) -{ - TestShmemSize< Kokkos::Threads >(); -} +#include <TestTeamVector.hpp> -TEST_F( threads, multi_level_scratch ) -{ - TestMultiLevelScratchTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >(); - TestMultiLevelScratchTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >(); -} - -TEST_F( threads, team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 0 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 1 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 2 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 3 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 4 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 5 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 6 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 7 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 8 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 9 ) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 10 ) ) ); -} - -#ifdef KOKKOS_COMPILER_GNU -#if ( KOKKOS_COMPILER_GNU == 472 ) -#define SKIP_TEST -#endif -#endif - -#ifndef SKIP_TEST -TEST_F( threads, triple_nested_parallelism ) -{ - TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048, 32, 32 ); - TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048, 32, 16 ); - TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048, 16, 16 ); -} -#endif -} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_TeamReductionScan.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_TeamReductionScan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8f4f3a515cd86f436b98a2a202c5cf5304cdb324 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_TeamReductionScan.cpp @@ -0,0 +1,81 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <threads/TestThreads_Category.hpp> +#include <TestTeam.hpp> + +namespace Test { + + +TEST_F( TEST_CATEGORY, team_scan ) +{ + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +} + +TEST_F( TEST_CATEGORY, team_long_reduce ) +{ + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +TEST_F( TEST_CATEGORY, team_double_reduce ) +{ + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +} + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dcf7c7463423e9c0dc3953fdcadce3fb5b5529f3 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp @@ -0,0 +1,83 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <threads/TestThreads_Category.hpp> +#include <TestTeam.hpp> + +namespace Test { + +TEST_F( TEST_CATEGORY, team_shared_request ) +{ + TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +TEST_F( TEST_CATEGORY, team_scratch_request ) +{ + TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if !defined(KOKKOS_ENABLE_CUDA) || ( 8000 <= CUDA_VERSION ) +TEST_F( TEST_CATEGORY, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} +#endif +#endif + +TEST_F( TEST_CATEGORY, shmem_size ) +{ + TestShmemSize< TEST_EXECSPACE >(); +} + +TEST_F( TEST_CATEGORY, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< TEST_EXECSPACE, Kokkos::Schedule<Kokkos::Dynamic> >(); +} + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp deleted file mode 100644 index 36eae287936ad9854dd030fc304506c3d3745c03..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <threads/TestThreads.hpp> - -namespace Test { - -TEST_F( threads, impl_view_mapping_a ) -{ - test_view_mapping< Kokkos::Threads >(); - test_view_mapping_operator< Kokkos::Threads >(); -} - -} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp index 8c78d094435b3f524668cb1bffa44b5144749063..cd3d73dfd099de4de15059bd8bb01b28cdd79d8d 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp @@ -41,84 +41,5 @@ //@HEADER */ -#include <threads/TestThreads.hpp> - -namespace Test { - -TEST_F( threads, impl_shared_alloc ) -{ - test_shared_alloc< Kokkos::HostSpace, Kokkos::Threads >(); -} - -TEST_F( threads, impl_view_mapping_b ) -{ - test_view_mapping_subview< Kokkos::Threads >(); - TestViewMappingAtomic< Kokkos::Threads >::run(); -} - -TEST_F( threads, view_api ) -{ - TestViewAPI< double, Kokkos::Threads >(); -} - -TEST_F( threads, view_nested_view ) -{ - ::Test::view_nested_view< Kokkos::Threads >(); -} - -TEST_F( threads, view_remap ) -{ - enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3], - Kokkos::LayoutRight, - Kokkos::Threads > output_type; - - typedef Kokkos::View< int**[N2][N3], - Kokkos::LayoutLeft, - Kokkos::Threads > input_type; - - typedef Kokkos::View< int*[N0][N2][N3], - Kokkos::LayoutLeft, - Kokkos::Threads > diff_type; - - output_type output( "output", N0 ); - input_type input ( "input", N0, N1 ); - diff_type diff ( "diff", N0 ); - - int value = 0; - - for ( size_t i3 = 0; i3 < N3; ++i3 ) - for ( size_t i2 = 0; i2 < N2; ++i2 ) - for ( size_t i1 = 0; i1 < N1; ++i1 ) - for ( size_t i0 = 0; i0 < N0; ++i0 ) - { - input( i0, i1, i2, i3 ) = ++value; - } - - // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. - Kokkos::deep_copy( output, input ); - - value = 0; - - for ( size_t i3 = 0; i3 < N3; ++i3 ) - for ( size_t i2 = 0; i2 < N2; ++i2 ) - for ( size_t i1 = 0; i1 < N1; ++i1 ) - for ( size_t i0 = 0; i0 < N0; ++i0 ) - { - ++value; - ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); - } -} - -TEST_F( threads, view_aggregate ) -{ - TestViewAggregate< Kokkos::Threads >(); -} - -TEST_F( threads, template_meta_functions ) -{ - TestTemplateMetaFunctions< int, Kokkos::Threads >(); -} - -} // namespace Test +#include <threads/TestThreads_Category.hpp> +#include <TestViewAPI.hpp> diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d37938a3a398a6a55b1ef8817899667638bbb505 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_a.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <threads/TestThreads_Category.hpp> +#include <TestViewMapping_a.hpp> + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..43cc97aa228b7b1ebbe19676a6062edf6398f451 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_b.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <threads/TestThreads_Category.hpp> +#include <TestViewMapping_b.hpp> + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_subview.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_subview.cpp new file mode 100644 index 0000000000000000000000000000000000000000..931bef0127b41e95cbb135752ca03aca95ead2ad --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_subview.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <threads/TestThreads_Category.hpp> +#include <TestViewMapping_subview.hpp> + diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewOfClass.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewOfClass.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c2d371283adfd41d9ba55f689ba9ccdf993cb667 --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewOfClass.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <threads/TestThreads_Category.hpp> +#include <TestViewOfClass.hpp> + diff --git a/lib/kokkos/example/cmake_build/CMakeLists.txt b/lib/kokkos/example/cmake_build/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..4e149726ee780f961c386eb49de28d4fb18284a0 --- /dev/null +++ b/lib/kokkos/example/cmake_build/CMakeLists.txt @@ -0,0 +1,44 @@ +# Kokkos requires CMake version 3.1 or higher and that you have the following +# line with a version of 3.1 or higher as the first line of your project: +# cmake_minimum_required(VERSION 3.1) +# +# The other CMake commands required to build Kokkos as part of your application +# are: +# add_subdirectory(path/to/kokkos) +# target_link_libraries(executable or library) +# +# If Kokkos is not a subdirectory of your project, you will also need to pass a +# binary directory to add_subdirectory(). We had to pass the binary directory +# for this example for that reason. Note that target_link_libraries() can be +# called on a target added by add_executable(), add_library(), or another +# similar command. +# +# All the flags, etc. required to build using the Kokkos library are +# transitively added to targets which depend on the library. +# +# The CMake variables CMAKE_CXX_STANDARD and CMAKE_CXX_EXTENSIONS are +# respected. We recommend that you set CMAKE_CXX_EXTENSIONS to OFF. +# Otherwise, CMake defaults to using extensions for the C++ standard, and the +# GNU extensions (-std=gnu++11) will be used for compilers that support it +# instead of standard C++11 (-std=c++11). +# +# A bunch of build options are added as variables (all starting with KOKKOS_) +# to the build. Check them out using ccmake or the CMake GUI. +# +# Building this example: +# 1. Create a build directory. +# 2. cd /path/to/build/directory +# 3. cmake /path/to/example +# 4. make + +cmake_minimum_required(VERSION 3.1) +project(Example CXX C) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_EXTENSIONS OFF) +list(APPEND CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} -O3) + +add_subdirectory(${Example_SOURCE_DIR}/../.. ${Example_BINARY_DIR}/kokkos) + +add_executable(example cmake_example.cpp) +target_link_libraries(example kokkos) diff --git a/lib/kokkos/example/cmake_build/cmake_example.cpp b/lib/kokkos/example/cmake_build/cmake_example.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4786eeb859b99ebabea6a4b2630b618342b411a0 --- /dev/null +++ b/lib/kokkos/example/cmake_build/cmake_example.cpp @@ -0,0 +1,87 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> +#include <cstdio> + +int main(int argc, char* argv[]) { + Kokkos::initialize(argc, argv); + Kokkos::DefaultExecutionSpace::print_configuration(std::cout); + + if (argc < 2) { + fprintf(stderr, "Usage: %s [<kokkos_options>] <size>\n", argv[0]); + Kokkos::finalize(); + exit(1); + } + + const long n = strtol(argv[1], NULL, 10); + + printf("Number of even integers from 0 to %ld\n", n - 1); + + Kokkos::Timer timer; + timer.reset(); + + // Compute the number of even integers from 0 to n-1, in parallel. + long count = 0; + Kokkos::parallel_reduce(n, KOKKOS_LAMBDA (const long i, long& lcount) { + lcount += (i % 2) == 0; + }, count); + + double count_time = timer.seconds(); + printf(" Parallel: %ld %10.6f\n", count, count_time); + + timer.reset(); + + // Compare to a sequential loop. + long seq_count = 0; + for (long i = 0; i < n; ++i) { + seq_count += (i % 2) == 0; + } + + count_time = timer.seconds(); + printf("Sequential: %ld %10.6f\n", seq_count, count_time); + + Kokkos::finalize(); + + return (count == seq_count) ? 0 : -1; +} diff --git a/lib/kokkos/example/feint/ElemFunctor.hpp b/lib/kokkos/example/feint/ElemFunctor.hpp index 583c4fda12a96a6c061ddb99d13e979a21f01a01..581faf46a417f96f963f5aa1776115361d8bcca0 100644 --- a/lib/kokkos/example/feint/ElemFunctor.hpp +++ b/lib/kokkos/example/feint/ElemFunctor.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP #define KOKKOS_EXAMPLE_FEINT_FUNCTORS_HPP -#include <stdio.h> +#include <cstdio> #include <Kokkos_Core.hpp> #include <BoxElemFixture.hpp> diff --git a/lib/kokkos/example/feint/feint_threads.cpp b/lib/kokkos/example/feint/feint_threads.cpp index 7f33401441478bd701d6de40bc819e1525245e01..44016caadad17a368d32e9570104930d2ea722e0 100644 --- a/lib/kokkos/example/feint/feint_threads.cpp +++ b/lib/kokkos/example/feint/feint_threads.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,14 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ #include <Kokkos_Core.hpp> -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) #include <feint.hpp> @@ -63,4 +63,5 @@ template void feint< Kokkos::Threads ,true>( } /* namespace Example */ } /* namespace Kokkos */ -#endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */ +#endif /* #if defined( KOKKOS_ENABLE_THREADS ) */ + diff --git a/lib/kokkos/example/feint/main.cpp b/lib/kokkos/example/feint/main.cpp index bfafc81fbbd3a1323af02bc6a5aa67166c2bd62f..616e584bf68fb0f1a4e935aeb43b965ce8d04221 100644 --- a/lib/kokkos/example/feint/main.cpp +++ b/lib/kokkos/example/feint/main.cpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,7 +35,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -48,7 +48,7 @@ int main() { -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) { // Use 4 cores per NUMA region, unless fewer available diff --git a/lib/kokkos/example/fenl/CGSolve.hpp b/lib/kokkos/example/fenl/CGSolve.hpp index 06a0030e09cadb0f9ab82080d8ab244563ae54b0..9533db312edf0141cc278311fd2acffb6c8b21d9 100644 --- a/lib/kokkos/example/fenl/CGSolve.hpp +++ b/lib/kokkos/example/fenl/CGSolve.hpp @@ -242,7 +242,7 @@ void cgsolve( const ImportType & import double old_rdot = Kokkos::Example::all_reduce( dot( count_owned , r , r ) , import.comm ); - norm_res = sqrt( old_rdot ); + norm_res = std::sqrt( old_rdot ); iteration = 0 ; Kokkos::Timer wall_clock ; @@ -269,7 +269,7 @@ void cgsolve( const ImportType & import /* p = r + beta * p ; */ waxpby( count_owned , p , 1.0 , r , beta , p ); - norm_res = sqrt( old_rdot = r_dot ); + norm_res = std::sqrt( old_rdot = r_dot ); ++iteration ; } diff --git a/lib/kokkos/example/fenl/fenl.cpp b/lib/kokkos/example/fenl/fenl.cpp index 4d50af66e26fa6ff095d17451d7c8770c40fedc9..5a6dc7e24176bbe600bb05fba009a7c35cf645ec 100644 --- a/lib/kokkos/example/fenl/fenl.cpp +++ b/lib/kokkos/example/fenl/fenl.cpp @@ -46,7 +46,7 @@ namespace Kokkos { namespace Example { namespace FENL { -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) template Perf fenl< Kokkos::Threads , Kokkos::Example::BoxElemPart::ElemLinear >( diff --git a/lib/kokkos/example/fenl/fenl.hpp b/lib/kokkos/example/fenl/fenl.hpp index e524a378c0c2eb75223c933c51267ff55852d08b..a23585fe7f4f6aa530a209f8c68fa0863b77b91a 100644 --- a/lib/kokkos/example/fenl/fenl.hpp +++ b/lib/kokkos/example/fenl/fenl.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef KOKKOS_EXAMPLE_FENL_HPP #define KOKKOS_EXAMPLE_FENL_HPP -#include <stdlib.h> +#include <cstdlib> #include <BoxElemPart.hpp> #include <WrapMPI.hpp> diff --git a/lib/kokkos/example/fenl/fenl_functors.hpp b/lib/kokkos/example/fenl/fenl_functors.hpp index 3020c99a2f58637c64377a9ae933d0e3549d3c12..d23c934bd2c4feb50730309a5cd2ed8efc0a9d0e 100644 --- a/lib/kokkos/example/fenl/fenl_functors.hpp +++ b/lib/kokkos/example/fenl/fenl_functors.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP #define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP -#include <stdio.h> +#include <cstdio> #include <iostream> #include <fstream> @@ -340,7 +340,7 @@ public: if ( phase == FILL_NODE_SET ) { operator()( TagFillNodeSet() , iwork ); } - else */ + else */ if ( phase == FILL_GRAPH_ENTRIES ) { fill_graph_entries( iwork ); } @@ -378,7 +378,7 @@ public: void init( const TagFillNodeSet & , unsigned & update ) const { update = 0 ; } KOKKOS_INLINE_FUNCTION - void join( const TagFillNodeSet & + void join( const TagFillNodeSet & , volatile unsigned & update , volatile const unsigned & input ) const { update += input ; } diff --git a/lib/kokkos/example/fenl/fenl_impl.hpp b/lib/kokkos/example/fenl/fenl_impl.hpp index 38dea539c2704be9e12a4ac4c1809e46339b67ed..6b2326c102e63de60c506479db0d7643526d4f8a 100644 --- a/lib/kokkos/example/fenl/fenl_impl.hpp +++ b/lib/kokkos/example/fenl/fenl_impl.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef KOKKOS_EXAMPLE_FENL_IMPL_HPP #define KOKKOS_EXAMPLE_FENL_IMPL_HPP -#include <math.h> +#include <cmath> // Kokkos libraries' headers: @@ -126,8 +126,8 @@ public: , zmax( arg_zmax ) , T_zmin( arg_T_zmin ) , T_zmax( arg_T_zmax ) - , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) ) - , b( 1.0 / sqrt(T_zmin) ) + , a( ( 1.0 / std::sqrt(T_zmax) - 1.0 / std::sqrt(T_zmin) ) / ( zmax - zmin ) ) + , b( 1.0 / std::sqrt(T_zmin) ) , K( 1.0 / ( 6.0 * a * a ) ) {} @@ -269,7 +269,7 @@ Perf fenl( << ")" ; } std::cout << " }" << std::endl ; - + std::cout << "Node coord {" ; for ( unsigned inode = 0 ; inode < fixture.node_count() ; ++inode ) { std::cout << " (" << h_node_coord(inode,0) diff --git a/lib/kokkos/example/fenl/main.cpp b/lib/kokkos/example/fenl/main.cpp index d79c6d8992ee60c8d81726d6d0a22144f2b0112e..33b0049bc6accd035e1ba5c6435ad2e2fc8b521e 100644 --- a/lib/kokkos/example/fenl/main.cpp +++ b/lib/kokkos/example/fenl/main.cpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,14 +35,15 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER -#include <math.h> -#include <stdio.h> -#include <stdlib.h> -#include <strings.h> +#include <cmath> + +#include <cstdio> +#include <cstdlib> +#include <cstring> #include <utility> #include <string> @@ -356,7 +357,7 @@ int main( int argc , char ** argv ) cmdline[ CMD_USE_FIXTURE_Z ] = 2 ; } -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) if ( cmdline[ CMD_USE_THREADS ] ) { diff --git a/lib/kokkos/example/fixture/BoxElemFixture.hpp b/lib/kokkos/example/fixture/BoxElemFixture.hpp index 66d6e741afcc40d6e7b838bb0712ab5c1652ffe8..d659bf80342dacc2991362b2b4a242572c84182e 100644 --- a/lib/kokkos/example/fixture/BoxElemFixture.hpp +++ b/lib/kokkos/example/fixture/BoxElemFixture.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP #define KOKKOS_EXAMPLE_BOXELEMFIXTURE_HPP -#include <stdio.h> +#include <cstdio> #include <utility> #include <Kokkos_Core.hpp> @@ -97,7 +97,7 @@ struct MapGridUnitCube { const double x = double(grid_x) / double(m_max_x); const double y = double(grid_y) / double(m_max_y); const double z = double(grid_z) / double(m_max_z); - + coord_x = x + x * x * ( x - 1 ) * ( x - 1 ) * m_a ; coord_y = y + y * y * ( y - 1 ) * ( y - 1 ) * m_b ; coord_z = z + z * z * ( z - 1 ) * ( z - 1 ) * m_c ; @@ -235,7 +235,7 @@ public: m_recv_node = rhs.m_recv_node ; m_send_node = rhs.m_send_node ; m_send_node_id = rhs.m_send_node_id ; - + for ( int i = 0 ; i < ElemNode ; ++i ) { m_elem_node_local[i][0] = rhs.m_elem_node_local[i][0] ; m_elem_node_local[i][1] = rhs.m_elem_node_local[i][1] ; @@ -279,7 +279,7 @@ public: } } - const size_t nwork = + const size_t nwork = std::max( m_recv_node.dimension_0() , std::max( m_send_node.dimension_0() , std::max( m_send_node_id.dimension_0() , diff --git a/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp b/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp index 8beac8880febfa0a5c43a009d932103441bdedab..fb33aef56e8f9eb5e5bd5beb7532d799efaef794 100644 --- a/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp +++ b/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -59,7 +59,7 @@ size_t run_serial(unsigned num_ids, unsigned num_find_iterations) size_t run_threads(unsigned num_ids, unsigned num_find_iterations) { -#ifdef KOKKOS_ENABLE_PTHREAD +#ifdef KOKKOS_ENABLE_THREADS std::cout << "Threads" << std::endl; return run_test<Kokkos::Threads>(num_ids,num_find_iterations); #else @@ -131,7 +131,7 @@ int main(int argc, char *argv[]) Kokkos::HostSpace::execution_space::finalize(); #endif -#ifdef KOKKOS_ENABLE_PTHREAD +#ifdef KOKKOS_ENABLE_THREADS Kokkos::Threads::initialize( threads_count ); num_errors += G2L::run_threads(num_ids,num_find_iterations); Kokkos::Threads::finalize(); diff --git a/lib/kokkos/example/grow_array/grow_array.hpp b/lib/kokkos/example/grow_array/grow_array.hpp index d8555bc4c61a491730f68827129c2b103efadf7c..f9c94ea2ed11b1683ac45490942ca9d5157544be 100644 --- a/lib/kokkos/example/grow_array/grow_array.hpp +++ b/lib/kokkos/example/grow_array/grow_array.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef EXAMPLE_GROW_ARRAY #define EXAMPLE_GROW_ARRAY -#include <stdlib.h> +#include <cstdlib> #include <Kokkos_Core.hpp> diff --git a/lib/kokkos/example/grow_array/main.cpp b/lib/kokkos/example/grow_array/main.cpp index 25c368213fdd069c2de64792ffca1819fc697119..e7438a9bf4e7429b658c5834eece03aeb2f5467e 100644 --- a/lib/kokkos/example/grow_array/main.cpp +++ b/lib/kokkos/example/grow_array/main.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -76,7 +76,7 @@ int main( int argc , char ** argv ) } #endif // defined( KOKKOS_ENABLE_SERIAL ) -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) { std::cout << "Kokkos::Threads" << std::endl ; Kokkos::Threads::initialize( num_threads , use_numa , use_core ); diff --git a/lib/kokkos/example/md_skeleton/main.cpp b/lib/kokkos/example/md_skeleton/main.cpp index 2563863cbdd9ed3ed03823e47a6a789cb88d4dbd..8d67425f9403f4031532bb54d9e7e8852149e0f0 100644 --- a/lib/kokkos/example/md_skeleton/main.cpp +++ b/lib/kokkos/example/md_skeleton/main.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -160,7 +160,7 @@ int main(int argc, char** argv) { Kokkos::Cuda::initialize(select_device); #elif defined( KOKKOS_ENABLE_OPENMP ) Kokkos::OpenMP::initialize(teams*num_threads); -#elif defined( KOKKOS_ENABLE_PTHREAD ) +#elif defined( KOKKOS_ENABLE_THREADS ) Kokkos::Threads::initialize(teams*num_threads); #endif diff --git a/lib/kokkos/example/multi_fem/ExplicitFunctors.hpp b/lib/kokkos/example/multi_fem/ExplicitFunctors.hpp index feea82244f2a57571034a87e31a537c530b0062e..889f0caa0d9447eacd0f50727e9fc15dbfb6c7f7 100644 --- a/lib/kokkos/example/multi_fem/ExplicitFunctors.hpp +++ b/lib/kokkos/example/multi_fem/ExplicitFunctors.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef KOKKOS_EXPLICITFUNCTORS_HPP #define KOKKOS_EXPLICITFUNCTORS_HPP -#include <math.h> +#include <cmath> #include <Kokkos_Core.hpp> #include <FEMesh.hpp> @@ -461,8 +461,8 @@ struct grad const typename Fields::elem_node_ids_type elem_node_connectivity ; const typename Fields::node_coords_type model_coords ; - const typename Fields::geom_state_array_type displacement ; - const typename Fields::geom_state_array_type velocity ; + const typename Fields::geom_state_array_type displacement ; + const typename Fields::geom_state_array_type velocity ; const typename Fields::elem_tensor_type vel_grad ; const typename Fields::scalar_type dt ; @@ -1171,14 +1171,14 @@ struct internal_force dot8<Scalar,execution_space>( grad_y , grad_y ) + dot8<Scalar,execution_space>( grad_z , grad_z ) ); - const Scalar dtrial = sqrt(elem_mass(ielem) * aspect / dil); + const Scalar dtrial = std::sqrt(elem_mass(ielem) * aspect / dil); const Scalar traced = (rot_stretch(ielem, 0) + rot_stretch(ielem, 1) + rot_stretch(ielem, 2)); const Scalar eps = traced < 0 ? (lin_bulk_visc - quad_bulk_visc * traced * dtrial) : lin_bulk_visc ; const Scalar bulkq = eps * dil * dtrial * traced; - Scalar cur_time_step = dtrial * ( sqrt( 1.0 + eps * eps) - eps); + Scalar cur_time_step = dtrial * ( std::sqrt( 1.0 + eps * eps) - eps); // force fixed time step if input diff --git a/lib/kokkos/example/multi_fem/HexExplicitFunctions.hpp b/lib/kokkos/example/multi_fem/HexExplicitFunctions.hpp index 3c4ca582245f687c360eb8812d899ff33a6a1644..5de8a51fe885b587d693fd92cfa165fde43806c4 100644 --- a/lib/kokkos/example/multi_fem/HexExplicitFunctions.hpp +++ b/lib/kokkos/example/multi_fem/HexExplicitFunctions.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef KOKKOS_HEXEXPLICITFUNCTIONS_HPP #define KOKKOS_HEXEXPLICITFUNCTIONS_HPP -#include <math.h> +#include <cmath> namespace Explicit { diff --git a/lib/kokkos/example/multi_fem/Nonlinear.hpp b/lib/kokkos/example/multi_fem/Nonlinear.hpp index ad25131fa748e030ee9a1ba9288ec632b42f141d..58b9661170717d9f67795a4d8a52f1ad5cad8e74 100644 --- a/lib/kokkos/example/multi_fem/Nonlinear.hpp +++ b/lib/kokkos/example/multi_fem/Nonlinear.hpp @@ -141,8 +141,8 @@ public: , zmax( arg_zmax ) , T_zmin( arg_T_zmin ) , T_zmax( arg_T_zmax ) - , a( ( 1.0 / sqrt(T_zmax) - 1.0 / sqrt(T_zmin) ) / ( zmax - zmin ) ) - , b( 1.0 / sqrt(T_zmin) ) + , a( ( 1.0 / std::sqrt(T_zmax) - 1.0 / std::sqrt(T_zmin) ) / ( zmax - zmin ) ) + , b( 1.0 / std::sqrt(T_zmin) ) , K( 1.0 / ( 6.0 * a * a ) ) {} diff --git a/lib/kokkos/example/multi_fem/NonlinearElement_Cuda.hpp b/lib/kokkos/example/multi_fem/NonlinearElement_Cuda.hpp index b2adc2adab302ec05f4ca2218e0321583f52a044..295a971e9d921773ddf2da4de97555b18d44ab83 100644 --- a/lib/kokkos/example/multi_fem/NonlinearElement_Cuda.hpp +++ b/lib/kokkos/example/multi_fem/NonlinearElement_Cuda.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,11 +35,11 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER -#include <stdio.h> +#include <cstdio> #include <iostream> #include <fstream> @@ -131,7 +131,7 @@ public: , coeff_K( arg_coeff_K ) , elem_count( arg_mesh.elem_node_ids.dimension_0() ) { - const unsigned jInvJ[9][4] = + const unsigned jInvJ[9][4] = { { j22 , j33 , j23 , j32 } , { j13 , j32 , j12 , j33 } , { j12 , j23 , j13 , j22 } , @@ -164,7 +164,7 @@ public: public: //------------------------------------ - // Sum among the threadIdx.x + // Sum among the threadIdx.x template< typename Type > __device__ inline static @@ -320,8 +320,8 @@ public: sum_x_clear(); // Make sure summation scratch is zero - // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$ - // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$ + // $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$ + // $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$ const unsigned iInt = threadIdx.x ; @@ -354,7 +354,7 @@ public: for ( unsigned iCol = 0 ; iCol < FunctionCount ; ++iCol ) { - const float jac_del = + const float jac_del = dpsidx_row * work_data->dpsidx[iCol][iInt] + dpsidy_row * work_data->dpsidy[iCol][iInt] + dpsidz_row * work_data->dpsidz[iCol][iInt] ; @@ -367,7 +367,7 @@ public: } } - __syncthreads(); // All warps finish before refilling shared data + __syncthreads(); // All warps finish before refilling shared data } __device__ inline diff --git a/lib/kokkos/example/multi_fem/ParallelMachine.cpp b/lib/kokkos/example/multi_fem/ParallelMachine.cpp index 07ceccb4999709dfb3e3ec6ce6bbcd69890cf94c..f9a05754b67d59c168828a3479ca42b52b35b45a 100644 --- a/lib/kokkos/example/multi_fem/ParallelMachine.cpp +++ b/lib/kokkos/example/multi_fem/ParallelMachine.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,15 +36,15 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ #if 0 -#include <stdlib.h> -#include <string.h> +#include <cstdlib> +#include <cstring> #include <ParallelMachine.hpp> @@ -122,7 +122,7 @@ Machine::Machine( int * argc , char *** argv ) unsigned team_count = Kokkos::hwloc::get_available_numa_count(); unsigned threads_per_team = Kokkos::hwloc::get_available_cores_per_numa() * Kokkos::hwloc::get_available_threads_per_core(); - + if ( i + 2 < *argc ) { team_count = atoi( (*argv)[i+1] ); threads_per_team = atoi( (*argv)[i+2] ); diff --git a/lib/kokkos/example/multi_fem/SparseLinearSystem.hpp b/lib/kokkos/example/multi_fem/SparseLinearSystem.hpp index b13528bdacbd03bef3dcc04cbd44285cf6aa1568..e50b088a8c514a6cfaca52b9308fc124c5c59fec 100644 --- a/lib/kokkos/example/multi_fem/SparseLinearSystem.hpp +++ b/lib/kokkos/example/multi_fem/SparseLinearSystem.hpp @@ -240,7 +240,7 @@ void cgsolve( double old_rdot = dot( count_owned , r , data_map.machine ); - normr = sqrt( old_rdot ); + normr = std::sqrt( old_rdot ); iteration = 0 ; Kokkos::Timer wall_clock ; @@ -262,7 +262,7 @@ void cgsolve( /* p = r + beta * p ; */ xpby( count_owned , r , beta , p ); - normr = sqrt( old_rdot = r_dot ); + normr = std::sqrt( old_rdot = r_dot ); ++iteration ; } diff --git a/lib/kokkos/example/multi_fem/TestBoxMeshFixture.hpp b/lib/kokkos/example/multi_fem/TestBoxMeshFixture.hpp index 3c37331b38523d8764dc931bc053aa6f6c4f492f..d01aceb28847fda5de02407f0f24389e1ddfc483 100644 --- a/lib/kokkos/example/multi_fem/TestBoxMeshFixture.hpp +++ b/lib/kokkos/example/multi_fem/TestBoxMeshFixture.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef TESTFEMESHBOXFIXTURE_HPP #define TESTFEMESHBOXFIXTURE_HPP -#include <stdio.h> +#include <cstdio> #include <iostream> #include <stdexcept> #include <limits> diff --git a/lib/kokkos/example/sort_array/main.cpp b/lib/kokkos/example/sort_array/main.cpp index 61e96cc6af2f8ff1341203300ff1662af3c07b89..b091a55d705d2672f0b549f70b4bba4f9fa23f47 100644 --- a/lib/kokkos/example/sort_array/main.cpp +++ b/lib/kokkos/example/sort_array/main.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,13 +36,13 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include <string.h> -#include <stdlib.h> +#include <cstring> +#include <cstdlib> #include <iostream> #include <sstream> @@ -53,7 +53,7 @@ int main( int argc , char ** argv ) { -#if defined( KOKKOS_ENABLE_CUDA ) || defined( KOKKOS_ENABLE_PTHREAD ) || defined( KOKKOS_ENABLE_OPENMP ) +#if defined( KOKKOS_ENABLE_CUDA ) || defined( KOKKOS_ENABLE_THREADS ) || defined( KOKKOS_ENABLE_OPENMP ) Kokkos::initialize( argc , argv ); int length_array = 100000 ; @@ -73,7 +73,7 @@ int main( int argc , char ** argv ) } #endif -#if defined( KOKKOS_ENABLE_PTHREAD ) +#if defined( KOKKOS_ENABLE_THREADS ) if ( Kokkos::Threads::is_initialized() ) { std::cout << "Kokkos::Threads" << std::endl ; Example::sort_array< Kokkos::Threads >( length_array , length_total_array ); diff --git a/lib/kokkos/example/sort_array/sort_array.hpp b/lib/kokkos/example/sort_array/sort_array.hpp index 5dff64259ff0e80fb23b2cc5e9515315d4c25d9c..fc607b81dbf368547cd3bffcc0d47b3059609ee8 100644 --- a/lib/kokkos/example/sort_array/sort_array.hpp +++ b/lib/kokkos/example/sort_array/sort_array.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,7 +44,7 @@ #ifndef EXAMPLE_SORT_ARRAY #define EXAMPLE_SORT_ARRAY -#include <stdlib.h> +#include <cstdlib> #include <algorithm> #include <Kokkos_Core.hpp> diff --git a/lib/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp b/lib/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp index de9c219d5b0b9576b042caaefcc2f7b149901647..8acd2464c7c0d96beeaa951ab10cf3c83c8b7cec 100644 --- a/lib/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp +++ b/lib/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp @@ -82,7 +82,7 @@ struct findprimes { // Test all numbers from 3 to ceiling(sqrt(data(i))), to see if // they are factors of data(i). It's not the most efficient prime // test, but it works. - const int upper_bound = sqrt(1.0*number)+1; + const int upper_bound = std::sqrt(1.0*number)+1; bool is_prime = !(number%2 == 0); int k = 3; while (k < upper_bound && is_prime) { diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash index e671293ff11ad8120766ed014128b25fb39089bc..5f2442102d0117a28cea52183e74d3fca08cc3c8 100755 --- a/lib/kokkos/generate_makefile.bash +++ b/lib/kokkos/generate_makefile.bash @@ -3,6 +3,8 @@ KOKKOS_DEVICES="" MAKE_J_OPTION="32" +KOKKOS_DO_EXAMPLES="1" + while [[ $# > 0 ]] do key="$1" @@ -70,6 +72,9 @@ do --make-j*) MAKE_J_OPTION="${key#*=}" ;; + --no-examples) + KOKKOS_DO_EXAMPLES="0" + ;; --compiler*) COMPILER="${key#*=}" CNUM=`which ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l` @@ -85,9 +90,9 @@ do if [ ${CNUM} -eq 0 ]; then echo "Invalid compiler by --compiler command: '${COMPILER}'" exit - fi + fi ;; - --with-options*) + --with-options*) KOKKOS_OPT="${key#*=}" ;; --help) @@ -164,7 +169,7 @@ fi if [ "${KOKKOS_PATH}" = "${PWD}" ] || [ "${KOKKOS_PATH}" = "${PWD}/" ]; then echo "Running generate_makefile.sh in the Kokkos root directory is not allowed" - exit + exit fi KOKKOS_SRC_PATH=${KOKKOS_PATH} @@ -232,7 +237,6 @@ else KOKKOS_INSTALL_PATH=${KOKKOS_TEST_INSTALL_PATH} fi - mkdir install echo "#Makefile to satisfy existens of target kokkos-clean before installing the library" > install/Makefile.kokkos echo "kokkos-clean:" >> install/Makefile.kokkos @@ -360,7 +364,6 @@ echo "" >> example/tutorial/Makefile echo "clean:" >> example/tutorial/Makefile echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} clean" >> example/tutorial/Makefile - if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/ichol/Makefile echo "" >> example/ichol/Makefile @@ -402,10 +405,12 @@ echo -e "\tmake -C core/perf_test" >> Makefile echo -e "\tmake -C containers/unit_tests" >> Makefile echo -e "\tmake -C containers/performance_tests" >> Makefile echo -e "\tmake -C algorithms/unit_tests" >> Makefile +if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then echo -e "\tmake -C example/fixture" >> Makefile echo -e "\tmake -C example/feint" >> Makefile echo -e "\tmake -C example/fenl" >> Makefile echo -e "\tmake -C example/tutorial build" >> Makefile +fi echo "" >> Makefile echo "test: build-test" >> Makefile echo -e "\tmake -C core/unit_test test" >> Makefile @@ -413,10 +418,12 @@ echo -e "\tmake -C core/perf_test test" >> Makefile echo -e "\tmake -C containers/unit_tests test" >> Makefile echo -e "\tmake -C containers/performance_tests test" >> Makefile echo -e "\tmake -C algorithms/unit_tests test" >> Makefile +if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then echo -e "\tmake -C example/fixture test" >> Makefile echo -e "\tmake -C example/feint test" >> Makefile echo -e "\tmake -C example/fenl test" >> Makefile echo -e "\tmake -C example/tutorial test" >> Makefile +fi echo "" >> Makefile echo "unit-tests-only:" >> Makefile echo -e "\tmake -C core/unit_test test" >> Makefile @@ -429,9 +436,11 @@ echo -e "\tmake -C core/perf_test clean" >> Makefile echo -e "\tmake -C containers/unit_tests clean" >> Makefile echo -e "\tmake -C containers/performance_tests clean" >> Makefile echo -e "\tmake -C algorithms/unit_tests clean" >> Makefile +if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then echo -e "\tmake -C example/fixture clean" >> Makefile echo -e "\tmake -C example/feint clean" >> Makefile echo -e "\tmake -C example/fenl clean" >> Makefile echo -e "\tmake -C example/tutorial clean" >> Makefile +fi echo -e "\tcd core; \\" >> Makefile echo -e "\tmake -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} clean" >> Makefile diff --git a/lib/meam/meam_dens_final.F b/lib/meam/meam_dens_final.F index 92195dcaf4321f4cad5f0b3ba03014cf8c576a5b..cdc47067e24217b1585fc429dde1ec98aa70ea5c 100644 --- a/lib/meam/meam_dens_final.F +++ b/lib/meam/meam_dens_final.F @@ -98,13 +98,14 @@ c Complete the calculation of density call G_gam(Gamma(i),ibar_meam(elti), $ gsmooth_factor,G,errorflag) if (errorflag.ne.0) return + call get_shpfcn(shp,lattce_meam(elti,elti)) if (ibar_meam(elti).le.0) then Gbar = 1.d0 + dGbar = 0.d0 else - call get_shpfcn(shp,lattce_meam(elti,elti)) if (mix_ref_t.eq.1) then - gam = (t_ave(1,i)*shpi(1)+t_ave(2,i)*shpi(2) - $ +t_ave(3,i)*shpi(3))/(Z*Z) + gam = (t_ave(1,i)*shp(1)+t_ave(2,i)*shp(2) + $ +t_ave(3,i)*shp(3))/(Z*Z) else gam = (t1_meam(elti)*shp(1)+t2_meam(elti)*shp(2) $ +t3_meam(elti)*shp(3))/(Z*Z) @@ -119,9 +120,8 @@ c Complete the calculation of density Gbar = 1.d0 dGbar = 0.d0 else - call get_shpfcn(shpi,lattce_meam(elti,elti)) - gam = (t_ave(1,i)*shpi(1)+t_ave(2,i)*shpi(2) - $ +t_ave(3,i)*shpi(3))/(Z*Z) + gam = (t_ave(1,i)*shp(1)+t_ave(2,i)*shp(2) + $ +t_ave(3,i)*shp(3))/(Z*Z) call dG_gam(gam,ibar_meam(elti),gsmooth_factor, $ Gbar,dGbar) endif diff --git a/src/.gitignore b/src/.gitignore index 0cddfa6951c76e832ed1de043ff8b54c118fd85b..3bdf64a4cb5c2c30b508d76fd6a11bd2c2cd5eed 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -177,8 +177,8 @@ /compute_basal_atom.h /compute_body_local.cpp /compute_body_local.h -/compute_cna_atom2.cpp -/compute_cna_atom2.h +/compute_cnp_atom.cpp +/compute_cnp_atom.h /compute_damage_atom.cpp /compute_damage_atom.h /compute_dilatation_atom.cpp @@ -740,8 +740,6 @@ /pair_lj_sdk_coul_long.h /pair_lj_sdk_coul_msm.cpp /pair_lj_sdk_coul_msm.h -/pair_lj_sf.cpp -/pair_lj_sf.h /pair_lj_sf_dipole_sf.cpp /pair_lj_sf_dipole_sf.h /pair_lubricateU.cpp diff --git a/src/Depend.sh b/src/Depend.sh index 520d9ae2bffd9f25ce364f80618592c0079006bc..0962dace510b8cdbeb2e0c3c93779f15907a6fb0 100644 --- a/src/Depend.sh +++ b/src/Depend.sh @@ -126,4 +126,5 @@ fi if (test $1 = "USER-REAXC") then depend KOKKOS + depend USER-OMP fi diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh index ca73adbf82c37dbe2ce2da55f3aa8c81d4163bd9..f4aeaa2706c7e66bce6e04fe91985a80d7f7f1e0 100644 --- a/src/GPU/Install.sh +++ b/src/GPU/Install.sh @@ -113,6 +113,8 @@ action pair_soft_gpu.cpp action pair_soft_gpu.h action pair_sw_gpu.cpp pair_sw.cpp action pair_sw_gpu.h pair_sw.h +action pair_vashishta_gpu.cpp pair_vashishta.cpp +action pair_vashishta_gpu.h pair_vashishta.h action pair_table_gpu.cpp pair_table.cpp action pair_table_gpu.h pair_table.cpp action pair_tersoff_gpu.cpp pair_tersoff.cpp diff --git a/src/GPU/pair_vashishta_gpu.cpp b/src/GPU/pair_vashishta_gpu.cpp new file mode 100644 index 0000000000000000000000000000000000000000..19e0799671a9a05e2b722d32d2f33319f08e35d1 --- /dev/null +++ b/src/GPU/pair_vashishta_gpu.cpp @@ -0,0 +1,258 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Anders Hafreager (UiO) +------------------------------------------------------------------------- */ +#include <limits> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "pair_vashishta_gpu.h" +#include "atom.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "force.h" +#include "comm.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "memory.h" +#include "error.h" +#include "domain.h" +#include "gpu_extra.h" + +using namespace LAMMPS_NS; + +// External functions from cuda library for atom decomposition + +int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors, + const double cell_size, int &gpu_mode, FILE *screen, + int* host_map, const int nelements, int*** host_elem2param, const int nparams, + const double* cutsq, const double* r0, + const double* gamma, const double* eta, + const double* lam1inv, const double* lam4inv, + const double* zizj, const double* mbigd, + const double* dvrc, const double* big6w, + const double* heta, const double* bigh, + const double* bigw, const double* c0, + const double* costheta, const double* bigb, + const double* big2b, const double* bigc); +void vashishta_gpu_clear(); +int ** vashishta_gpu_compute_n(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, + const double cpu_time, bool &success); +void vashishta_gpu_compute(const int ago, const int nloc, const int nall, const int ln, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); +double vashishta_gpu_bytes(); +extern double lmp_gpu_forces(double **f, double **tor, double *eatom, + double **vatom, double *virial, double &ecoul); + +/* ---------------------------------------------------------------------- */ + +PairVashishtaGPU::PairVashishtaGPU(LAMMPS *lmp) : PairVashishta(lmp), gpu_mode(GPU_FORCE) +{ + cpu_time = 0.0; + reinitflag = 0; + gpu_allocated = false; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); + + cutghost = NULL; + ghostneigh = 1; +} + +/* ---------------------------------------------------------------------- + check if allocated, since class can be destructed when incomplete +------------------------------------------------------------------------- */ + +PairVashishtaGPU::~PairVashishtaGPU() +{ + vashishta_gpu_clear(); + if (allocated) + memory->destroy(cutghost); +} + +/* ---------------------------------------------------------------------- */ + +void PairVashishtaGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = vashishta_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + vashishta_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum, + atom->x, atom->type, ilist, numneigh, firstneigh, eflag, + vflag, eflag_atom, vflag_atom, host_start, cpu_time, + success); + } + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); +} + +/* ---------------------------------------------------------------------- */ + +void PairVashishtaGPU::allocate() +{ + if(!allocated) { + PairVashishta::allocate(); + } + int n = atom->ntypes; + + memory->create(cutghost,n+1,n+1,"pair:cutghost"); + gpu_allocated = true; +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairVashishtaGPU::init_style() +{ + double cell_size = cutmax + neighbor->skin; + + if (atom->tag_enable == 0) + error->all(FLERR,"Pair style vashishta/gpu requires atom IDs"); + if (force->newton_pair != 0) + error->all(FLERR,"Pair style vashishta/gpu requires newton pair off"); + + double *cutsq, *r0, *r0eps, *gamma, *eta; + double *lam1inv, *lam4inv, *zizj, *mbigd; + double *dvrc, *big6w, *heta, *bigh; + double *bigw, *c0, *costheta, *bigb; + double *big2b, *bigc; + + cutsq = r0 = gamma = eta = NULL; + lam1inv = lam4inv = zizj = mbigd = NULL; + dvrc = big6w = heta = bigh = NULL; + bigw = c0 = costheta = bigb = NULL; + big2b = bigc = NULL; + + memory->create(cutsq,nparams,"pair:cutsq"); + memory->create(r0,nparams,"pair:r0"); + memory->create(gamma,nparams,"pair:gamma"); + memory->create(eta,nparams,"pair:eta"); + memory->create(lam1inv,nparams,"pair:lam1inv"); + memory->create(lam4inv,nparams,"pair:lam4inv"); + memory->create(zizj,nparams,"pair:zizj"); + memory->create(mbigd,nparams,"pair:mbigd"); + memory->create(dvrc,nparams,"pair:dvrc"); + memory->create(big6w,nparams,"pair:big6w"); + memory->create(heta,nparams,"pair:heta"); + memory->create(bigh,nparams,"pair:bigh"); + memory->create(bigw,nparams,"pair:bigw"); + memory->create(c0,nparams,"pair:c0"); + memory->create(costheta,nparams,"pair:costheta"); + memory->create(bigb,nparams,"pair:bigb"); + memory->create(big2b,nparams,"pair:big2b"); + memory->create(bigc,nparams,"pair:bigc"); + + for (int i = 0; i < nparams; i++) { + cutsq[i] = params[i].cutsq; + r0[i] = params[i].r0; + gamma[i] = params[i].gamma; + eta[i] = params[i].eta; + lam1inv[i] = params[i].lam1inv; + lam4inv[i] = params[i].lam4inv; + zizj[i] = params[i].zizj; + mbigd[i] = params[i].mbigd; + dvrc[i] = params[i].dvrc; + big6w[i] = params[i].big6w; + heta[i] = params[i].heta; + bigh[i] = params[i].bigh; + bigw[i] = params[i].bigw; + c0[i] = params[i].c0; + costheta[i] = params[i].costheta; + bigb[i] = params[i].bigb; + big2b[i] = params[i].big2b; + bigc[i] = params[i].bigc; + } + int success = vashishta_gpu_init(atom->ntypes+1, atom->nlocal, atom->nlocal+atom->nghost, 500, + cell_size, gpu_mode, screen, map, nelements, + elem2param, nparams, cutsq, r0, gamma, eta, lam1inv, + lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw, + c0, costheta, bigb, big2b, bigc); + memory->destroy(cutsq); + memory->destroy(r0); + memory->destroy(gamma); + memory->destroy(eta); + memory->destroy(lam1inv); + memory->destroy(lam4inv); + memory->destroy(zizj); + memory->destroy(mbigd); + memory->destroy(dvrc); + memory->destroy(big6w); + memory->destroy(heta); + memory->destroy(bigh); + memory->destroy(bigw); + memory->destroy(c0); + memory->destroy(costheta); + memory->destroy(bigb); + memory->destroy(big2b); + memory->destroy(bigc); + + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->ghost = 1; + } + + if (comm->cutghostuser < (2.0*cutmax + neighbor->skin) ) + comm->cutghostuser=2.0*cutmax + neighbor->skin; + +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairVashishtaGPU::init_one(int i, int j) +{ + if(!gpu_allocated) { + allocate(); + } + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + cutghost[i][j] = cutmax; + cutghost[j][i] = cutmax; + + return cutmax; +} + diff --git a/src/GPU/pair_vashishta_gpu.h b/src/GPU/pair_vashishta_gpu.h new file mode 100644 index 0000000000000000000000000000000000000000..d54ede505bf6b68f8096dbf6fc0913c2763cbfcc --- /dev/null +++ b/src/GPU/pair_vashishta_gpu.h @@ -0,0 +1,69 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(vashishta/gpu,PairVashishtaGPU) + +#else + +#ifndef LMP_PAIR_VASHISHTA_GPU_H +#define LMP_PAIR_VASHISHTA_GPU_H + +#include "pair_vashishta.h" + +namespace LAMMPS_NS { + +class PairVashishtaGPU : public PairVashishta { + public: + PairVashishtaGPU(class LAMMPS *); + ~PairVashishtaGPU(); + void compute(int, int); + double init_one(int, int); + void init_style(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + protected: + void allocate(); + int gpu_allocated; + int gpu_mode; + double cpu_time; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Insufficient memory on accelerator + +There is insufficient memory on one of the devices specified for the gpu +package + +E: Pair style vashishta/gpu requires atom IDs + +This is a requirement to use this potential. + +E: Pair style vashishta/gpu requires newton pair off + +See the newton command. This is a restriction to use this potential. + +E: All pair coeffs are not set + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +*/ diff --git a/src/KOKKOS/angle_charmm_kokkos.cpp b/src/KOKKOS/angle_charmm_kokkos.cpp index 346077e07119c4b3f5f45eb535f5ca698083ce30..8dd22022d8f7272d342cf6e86f36700e9086adf6 100644 --- a/src/KOKKOS/angle_charmm_kokkos.cpp +++ b/src/KOKKOS/angle_charmm_kokkos.cpp @@ -111,7 +111,6 @@ void AngleCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagAngleCharmmCompute<0,0> >(0,nanglelist),*this); } } - DeviceType::fence(); if (eflag_global) energy += ev.evdwl; if (vflag_global) { diff --git a/src/KOKKOS/bond_class2_kokkos.cpp b/src/KOKKOS/bond_class2_kokkos.cpp index b01af92b5fddfdecf00ab2d00ec709a14c6a3741..b3c11c9a06c468be08f9ce2b2a29919c949ff475 100644 --- a/src/KOKKOS/bond_class2_kokkos.cpp +++ b/src/KOKKOS/bond_class2_kokkos.cpp @@ -110,7 +110,6 @@ void BondClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondClass2Compute<0,0> >(0,nbondlist),*this); } } - //DeviceType::fence(); if (eflag_global) energy += ev.evdwl; if (vflag_global) { diff --git a/src/KOKKOS/bond_fene_kokkos.cpp b/src/KOKKOS/bond_fene_kokkos.cpp index cfc37bfa9f6e26afb65d97e985580d485bb65a91..025838340bbb6945b24e51c8bd185c72b1afa7d1 100644 --- a/src/KOKKOS/bond_fene_kokkos.cpp +++ b/src/KOKKOS/bond_fene_kokkos.cpp @@ -125,7 +125,6 @@ void BondFENEKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondFENECompute<0,0> >(0,nbondlist),*this); } } - DeviceType::fence(); k_warning_flag.template modify<DeviceType>(); k_warning_flag.template sync<LMPHostType>(); diff --git a/src/KOKKOS/bond_harmonic_kokkos.cpp b/src/KOKKOS/bond_harmonic_kokkos.cpp index 408f59c5632b87c6177f41d7b623dbc852370eee..da45c70d6c7739f08ad0fb559e6d9d7f768437c4 100644 --- a/src/KOKKOS/bond_harmonic_kokkos.cpp +++ b/src/KOKKOS/bond_harmonic_kokkos.cpp @@ -111,7 +111,6 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagBondHarmonicCompute<0,0> >(0,nbondlist),*this); } } - //DeviceType::fence(); if (eflag_global) energy += ev.evdwl; if (vflag_global) { diff --git a/src/KOKKOS/compute_temp_kokkos.cpp b/src/KOKKOS/compute_temp_kokkos.cpp index 6a24591d6cbb66842df6cddefeef873f8f78203b..2ea67a1fb135a695a1e37b3c5d3e0de2ae82762e 100644 --- a/src/KOKKOS/compute_temp_kokkos.cpp +++ b/src/KOKKOS/compute_temp_kokkos.cpp @@ -63,7 +63,6 @@ double ComputeTempKokkos<DeviceType>::compute_scalar() Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<1> >(0,nlocal),*this,t_kk); else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempScalar<0> >(0,nlocal),*this,t_kk); - DeviceType::fence(); copymode = 0; t = t_kk.t0; // could make this more efficient @@ -118,7 +117,6 @@ void ComputeTempKokkos<DeviceType>::compute_vector() Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<1> >(0,nlocal),*this,t_kk); else Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagComputeTempVector<0> >(0,nlocal),*this,t_kk); - DeviceType::fence(); copymode = 0; t[0] = t_kk.t0; diff --git a/src/KOKKOS/dihedral_charmm_kokkos.cpp b/src/KOKKOS/dihedral_charmm_kokkos.cpp index ee9e3d1244b0c4c22bddee191751471803c4a08e..a8a8aade604a001cc8e2499b2be9c06f74f500af 100644 --- a/src/KOKKOS/dihedral_charmm_kokkos.cpp +++ b/src/KOKKOS/dihedral_charmm_kokkos.cpp @@ -132,7 +132,6 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralCharmmCompute<0,0> >(0,ndihedrallist),*this); } } - DeviceType::fence(); // error check diff --git a/src/KOKKOS/dihedral_class2_kokkos.cpp b/src/KOKKOS/dihedral_class2_kokkos.cpp index edfd1b3395ba909aec568fdd86d5867665efcad1..89e42c6f836a1f3fa9e502875332bffb8bb4cd08 100644 --- a/src/KOKKOS/dihedral_class2_kokkos.cpp +++ b/src/KOKKOS/dihedral_class2_kokkos.cpp @@ -159,7 +159,6 @@ void DihedralClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralClass2Compute<0,0> >(0,ndihedrallist),*this); } } - DeviceType::fence(); // error check diff --git a/src/KOKKOS/dihedral_opls_kokkos.cpp b/src/KOKKOS/dihedral_opls_kokkos.cpp index 8e222ad86083e762df7e698be81b6e080d42fcbf..e37d4d2ef5636b867e927da9421b06f959d0b093 100644 --- a/src/KOKKOS/dihedral_opls_kokkos.cpp +++ b/src/KOKKOS/dihedral_opls_kokkos.cpp @@ -121,7 +121,6 @@ void DihedralOPLSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralOPLSCompute<0,0> >(0,ndihedrallist),*this); } } - DeviceType::fence(); // error check diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index 0572dcedbecbfab7634285d2dff7b73ac967e008..fb0f329a91bb398bad243493e9d834ac3ab1aa8d 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -506,7 +506,6 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) Kokkos::parallel_for(nlocal,post_functor); } - DeviceType::fence(); if(tbiasflag == BIAS){ atomKK->sync(temperature->execution_space,temperature->datamask_read); @@ -531,7 +530,6 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) // set total force zero in parallel on the device FixLangevinKokkosZeroForceFunctor<DeviceType> zero_functor(this); Kokkos::parallel_for(nlocal,zero_functor); - DeviceType::fence(); } // f is modified by both post_force and zero_force functors atomKK->modified(execution_space,datamask_modify); @@ -726,7 +724,6 @@ double FixLangevinKokkos<DeviceType>::compute_scalar() k_flangevin.template sync<DeviceType>(); FixLangevinKokkosTallyEnergyFunctor<DeviceType> scalar_functor(this); Kokkos::parallel_reduce(nlocal,scalar_functor,energy_onestep); - DeviceType::fence(); energy = 0.5*energy_onestep*update->dt; } @@ -770,7 +767,6 @@ void FixLangevinKokkos<DeviceType>::end_of_step() k_flangevin.template sync<DeviceType>(); FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this); Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep); - DeviceType::fence(); energy += energy_onestep*update->dt; } diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp index 2b55259365d5100ced33316ea81f272b49bdb0cc..fb03bf68c6308163a2126a0e9f9a2ec82f9139c2 100644 --- a/src/KOKKOS/fix_nh_kokkos.cpp +++ b/src/KOKKOS/fix_nh_kokkos.cpp @@ -495,7 +495,6 @@ void FixNHKokkos<DeviceType>::nh_v_press() Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_press<1> >(0,nlocal),*this); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_press<0> >(0,nlocal),*this); - DeviceType::fence(); copymode = 0; atomKK->modified(execution_space,V_MASK); @@ -550,7 +549,6 @@ void FixNHKokkos<DeviceType>::nve_v() Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<1> >(0,nlocal),*this); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_v<0> >(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -595,7 +593,6 @@ void FixNHKokkos<DeviceType>::nve_x() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nve_x>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -631,7 +628,6 @@ void FixNHKokkos<DeviceType>::nh_v_temp() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixNH_nh_v_temp>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; atomKK->modified(execution_space,V_MASK); diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp index 4c041f85b0794a2f63ff2d803c27ed84db275b61..eb41443bab6c410044ad6a2f32a0cd7c3ee2feb0 100644 --- a/src/KOKKOS/fix_nve_kokkos.cpp +++ b/src/KOKKOS/fix_nve_kokkos.cpp @@ -76,7 +76,6 @@ void FixNVEKokkos<DeviceType>::initial_integrate(int vflag) FixNVEKokkosInitialIntegrateFunctor<DeviceType,0> functor(this); Kokkos::parallel_for(nlocal,functor); } - DeviceType::fence(); } template<class DeviceType> @@ -133,7 +132,6 @@ void FixNVEKokkos<DeviceType>::final_integrate() FixNVEKokkosFinalIntegrateFunctor<DeviceType,0> functor(this); Kokkos::parallel_for(nlocal,functor); } - DeviceType::fence(); // debug //atomKK->sync(Host,datamask_read); diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 2e46b85fd2e7800b1883d0925f1285f061edc12e..5cafbd2ef3d2b1cf0626c7f736e33f4c02ec2e8f 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -234,12 +234,10 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag) // compute_H FixQEqReaxKokkosComputeHFunctor<DeviceType> computeH_functor(this); Kokkos::parallel_scan(inum,computeH_functor); - DeviceType::fence(); // init_matvec FixQEqReaxKokkosMatVecFunctor<DeviceType> matvec_functor(this); Kokkos::parallel_for(inum,matvec_functor); - DeviceType::fence(); // comm->forward_comm_fix(this); //Dist_vector( s ); pack_flag = 2; @@ -259,15 +257,12 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int vflag) // 1st cg solve over b_s, s cg_solve1(); - DeviceType::fence(); // 2nd cg solve over b_t, t cg_solve2(); - DeviceType::fence(); // calculate_Q(); calculate_q(); - DeviceType::fence(); copymode = 0; @@ -354,7 +349,6 @@ void FixQEqReaxKokkos<DeviceType>::allocate_array() const int ignum = atom->nlocal + atom->nghost; FixQEqReaxKokkosZeroFunctor<DeviceType> zero_functor(this); Kokkos::parallel_for(ignum,zero_functor); - DeviceType::fence(); } /* ---------------------------------------------------------------------- */ @@ -499,10 +493,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse12Functor<DeviceType> sparse12_functor(this); Kokkos::parallel_for(inum,sparse12_functor); - DeviceType::fence(); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this); - DeviceType::fence(); if (neighflag == HALF) { FixQEqReaxKokkosSparse13Functor<DeviceType,HALF> sparse13_functor(this); Kokkos::parallel_for(inum,sparse13_functor); @@ -513,7 +505,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() } else { Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec1> (inum, teamsize), *this); } - DeviceType::fence(); if (neighflag != FULL) { k_o.template modify<DeviceType>(); @@ -529,21 +520,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() F_FLOAT my_norm = 0.0; FixQEqReaxKokkosNorm1Functor<DeviceType> norm1_functor(this); Kokkos::parallel_reduce(inum,norm1_functor,my_norm); - DeviceType::fence(); F_FLOAT norm_sqr = 0.0; MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); b_norm = sqrt(norm_sqr); - DeviceType::fence(); // sig_new = parallel_dot( r, d, nn); F_FLOAT my_dot = 0.0; FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this); Kokkos::parallel_reduce(inum,dot1_functor,my_dot); - DeviceType::fence(); F_FLOAT dot_sqr = 0.0; MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); F_FLOAT sig_new = dot_sqr; - DeviceType::fence(); int loop; const int loopmax = 200; @@ -560,10 +547,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() // sparse_matvec( &H, d, q ); FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this); Kokkos::parallel_for(inum,sparse22_functor); - DeviceType::fence(); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this); - DeviceType::fence(); if (neighflag == HALF) { FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this); Kokkos::parallel_for(inum,sparse23_functor); @@ -574,7 +559,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() } else { Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this); } - DeviceType::fence(); if (neighflag != FULL) { @@ -589,7 +573,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() my_dot = dot_sqr = 0.0; FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this); Kokkos::parallel_reduce(inum,dot2_functor,my_dot); - DeviceType::fence(); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); tmp = dot_sqr; @@ -602,12 +585,10 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() my_dot = dot_sqr = 0.0; FixQEqReaxKokkosPrecon1Functor<DeviceType> precon1_functor(this); Kokkos::parallel_for(inum,precon1_functor); - DeviceType::fence(); // preconditioning: p[j] = r[j] * Hdia_inv[j]; // sig_new = parallel_dot( r, p, nn); FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this); Kokkos::parallel_reduce(inum,precon_functor,my_dot); - DeviceType::fence(); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); sig_new = dot_sqr; @@ -616,7 +597,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1() // vector_sum( d, 1., p, beta, d, nn ); FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this); Kokkos::parallel_for(inum,vecsum2_functor); - DeviceType::fence(); } if (loop >= loopmax && comm->me == 0) { @@ -644,10 +624,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() // sparse_matvec( &H, x, q ); FixQEqReaxKokkosSparse32Functor<DeviceType> sparse32_functor(this); Kokkos::parallel_for(inum,sparse32_functor); - DeviceType::fence(); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this); - DeviceType::fence(); if (neighflag == HALF) { FixQEqReaxKokkosSparse33Functor<DeviceType,HALF> sparse33_functor(this); Kokkos::parallel_for(inum,sparse33_functor); @@ -658,7 +636,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() } else { Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec3> (inum, teamsize), *this); } - DeviceType::fence(); if (neighflag != FULL) { k_o.template modify<DeviceType>(); @@ -674,21 +651,17 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() F_FLOAT my_norm = 0.0; FixQEqReaxKokkosNorm2Functor<DeviceType> norm2_functor(this); Kokkos::parallel_reduce(inum,norm2_functor,my_norm); - DeviceType::fence(); F_FLOAT norm_sqr = 0.0; MPI_Allreduce( &my_norm, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); b_norm = sqrt(norm_sqr); - DeviceType::fence(); // sig_new = parallel_dot( r, d, nn); F_FLOAT my_dot = 0.0; FixQEqReaxKokkosDot1Functor<DeviceType> dot1_functor(this); Kokkos::parallel_reduce(inum,dot1_functor,my_dot); - DeviceType::fence(); F_FLOAT dot_sqr = 0.0; MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); F_FLOAT sig_new = dot_sqr; - DeviceType::fence(); int loop; const int loopmax = 200; @@ -705,10 +678,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() // sparse_matvec( &H, d, q ); FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this); Kokkos::parallel_for(inum,sparse22_functor); - DeviceType::fence(); if (neighflag != FULL) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagZeroQGhosts>(nlocal,nlocal+atom->nghost),*this); - DeviceType::fence(); if (neighflag == HALF) { FixQEqReaxKokkosSparse23Functor<DeviceType,HALF> sparse23_functor(this); Kokkos::parallel_for(inum,sparse23_functor); @@ -719,7 +690,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() } else { Kokkos::parallel_for(Kokkos::TeamPolicy <DeviceType, TagSparseMatvec2> (inum, teamsize), *this); } - DeviceType::fence(); if (neighflag != FULL) { k_o.template modify<DeviceType>(); @@ -733,10 +703,8 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() my_dot = dot_sqr = 0.0; FixQEqReaxKokkosDot2Functor<DeviceType> dot2_functor(this); Kokkos::parallel_reduce(inum,dot2_functor,my_dot); - DeviceType::fence(); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); tmp = dot_sqr; - DeviceType::fence(); alpha = sig_new / tmp; @@ -747,12 +715,10 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() my_dot = dot_sqr = 0.0; FixQEqReaxKokkosPrecon2Functor<DeviceType> precon2_functor(this); Kokkos::parallel_for(inum,precon2_functor); - DeviceType::fence(); // preconditioning: p[j] = r[j] * Hdia_inv[j]; // sig_new = parallel_dot( r, p, nn); FixQEqReaxKokkosPreconFunctor<DeviceType> precon_functor(this); Kokkos::parallel_reduce(inum,precon_functor,my_dot); - DeviceType::fence(); MPI_Allreduce( &my_dot, &dot_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); sig_new = dot_sqr; @@ -761,7 +727,6 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2() // vector_sum( d, 1., p, beta, d, nn ); FixQEqReaxKokkosVecSum2Functor<DeviceType> vecsum2_functor(this); Kokkos::parallel_for(inum,vecsum2_functor); - DeviceType::fence(); } if (loop >= loopmax && comm->me == 0) { @@ -786,7 +751,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q() sum = sum_all = 0.0; FixQEqReaxKokkosVecAcc1Functor<DeviceType> vecacc1_functor(this); Kokkos::parallel_reduce(inum,vecacc1_functor,sum); - DeviceType::fence(); MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world ); const F_FLOAT s_sum = sum_all; @@ -794,7 +758,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q() sum = sum_all = 0.0; FixQEqReaxKokkosVecAcc2Functor<DeviceType> vecacc2_functor(this); Kokkos::parallel_reduce(inum,vecacc2_functor,sum); - DeviceType::fence(); MPI_Allreduce(&sum, &sum_all, 1, MPI_DOUBLE, MPI_SUM, world ); const F_FLOAT t_sum = sum_all; @@ -804,7 +767,6 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q() // q[i] = s[i] - u * t[i]; FixQEqReaxKokkosCalculateQFunctor<DeviceType> calculateQ_functor(this); Kokkos::parallel_for(inum,calculateQ_functor); - DeviceType::fence(); pack_flag = 4; //comm->forward_comm_fix( this ); //Dist_vector( atom->q ); diff --git a/src/KOKKOS/fix_reaxc_species_kokkos.cpp b/src/KOKKOS/fix_reaxc_species_kokkos.cpp index ce84de30cb2eaf83cf8230e916f82ef19754de33..8b778ecf650e582b53dd757e460e0e937e115f34 100644 --- a/src/KOKKOS/fix_reaxc_species_kokkos.cpp +++ b/src/KOKKOS/fix_reaxc_species_kokkos.cpp @@ -48,7 +48,7 @@ FixReaxCSpeciesKokkos::FixReaxCSpeciesKokkos(LAMMPS *lmp, int narg, char **arg) { kokkosable = 1; atomKK = (AtomKokkos *) atom; - + // NOTE: Could improve performance if a Kokkos version of ComputeSpecAtom is added datamask_read = X_MASK | V_MASK | Q_MASK | MASK_MASK; @@ -116,35 +116,30 @@ void FixReaxCSpeciesKokkos::FindMolecule() done = 1; for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - if (!(mask[i] & groupbit)) continue; + i = ilist[ii]; + if (!(mask[i] & groupbit)) continue; - itype = atom->type[i]; + itype = atom->type[i]; for (jj = 0; jj < MAXSPECBOND; jj++) { - j = reaxc->tmpid[i][jj]; + j = reaxc->tmpid[i][jj]; - if (j < i) continue; - if (!(mask[j] & groupbit)) continue; + if ((j == 0) && (j < i)) continue; + if (!(mask[j] & groupbit)) continue; - if (clusterID[i] == clusterID[j] && PBCconnected[i] == PBCconnected[j] - && x0[i].x == x0[j].x && x0[i].y == x0[j].y && x0[i].z == x0[j].z) continue; + if (clusterID[i] == clusterID[j] + && x0[i].x == x0[j].x && x0[i].y == x0[j].y && x0[i].z == x0[j].z) continue; jtype = atom->type[j]; - bo_cut = BOCut[itype][jtype]; - bo_tmp = spec_atom[i][jj+7]; + bo_cut = BOCut[itype][jtype]; + bo_tmp = spec_atom[i][jj+7]; - if (bo_tmp > bo_cut) { + if (bo_tmp > bo_cut) { clusterID[i] = clusterID[j] = MIN(clusterID[i], clusterID[j]); - PBCconnected[i] = PBCconnected[j] = MAX(PBCconnected[i], PBCconnected[j]); x0[i] = x0[j] = chAnchor(x0[i], x0[j]); - if ((fabs(spec_atom[i][1] - spec_atom[j][1]) > reaxc->control->bond_cut) - || (fabs(spec_atom[i][2] - spec_atom[j][2]) > reaxc->control->bond_cut) - || (fabs(spec_atom[i][3] - spec_atom[j][3]) > reaxc->control->bond_cut)) - PBCconnected[i] = PBCconnected[j] = 1; - done = 0; - } - } + done = 0; + } + } } if (!done) change = 1; if (done) break; diff --git a/src/KOKKOS/fix_setforce_kokkos.cpp b/src/KOKKOS/fix_setforce_kokkos.cpp index 27f7d100fa57f0a86ddc3c311f6f3b4a98cd6d5d..5e26ef3610535a74b9e24ba2fc9f9c9822589c04 100644 --- a/src/KOKKOS/fix_setforce_kokkos.cpp +++ b/src/KOKKOS/fix_setforce_kokkos.cpp @@ -108,7 +108,6 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag) if (varflag == CONSTANT) { copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixSetForceConstant>(0,nlocal),*this,foriginal_kk); - DeviceType::fence(); copymode = 0; // variable force, wrap with clear/add @@ -138,7 +137,6 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag) copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixSetForceNonConstant>(0,nlocal),*this,foriginal_kk); - DeviceType::fence(); copymode = 0; } diff --git a/src/KOKKOS/fix_wall_reflect_kokkos.cpp b/src/KOKKOS/fix_wall_reflect_kokkos.cpp index 55be7e5cd7c5c86335a0a8208cbf8ca39005c520..cd7a2c59b78ae84c3c0ae58f34c3998705b442d1 100644 --- a/src/KOKKOS/fix_wall_reflect_kokkos.cpp +++ b/src/KOKKOS/fix_wall_reflect_kokkos.cpp @@ -79,7 +79,6 @@ void FixWallReflectKokkos<DeviceType>::post_integrate() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixWallReflectPostIntegrate>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } diff --git a/src/KOKKOS/improper_class2_kokkos.cpp b/src/KOKKOS/improper_class2_kokkos.cpp index 25bd2c732f6621278661e3fb891338a263992466..c2cb7dfe2bf55df1f64988abe60a152d42984654 100644 --- a/src/KOKKOS/improper_class2_kokkos.cpp +++ b/src/KOKKOS/improper_class2_kokkos.cpp @@ -140,7 +140,6 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperClass2Compute<0,0> >(0,nimproperlist),*this); } } - DeviceType::fence(); if (eflag_global) energy += ev.evdwl; // error check @@ -165,7 +164,6 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperClass2AngleAngle<0,0> >(0,nimproperlist),*this); } } - DeviceType::fence(); if (eflag_global) energy += ev.evdwl; if (vflag_global) { diff --git a/src/KOKKOS/improper_harmonic_kokkos.cpp b/src/KOKKOS/improper_harmonic_kokkos.cpp index 9c99b35bd9ec374953985eeec173dcea58d40c76..1e58e18c5189766dad20958c535f0eb123169950 100644 --- a/src/KOKKOS/improper_harmonic_kokkos.cpp +++ b/src/KOKKOS/improper_harmonic_kokkos.cpp @@ -128,7 +128,6 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagImproperHarmonicCompute<0,0> >(0,nimproperlist),*this); } } - //DeviceType::fence(); // error check diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 3614a82cfed29a3e9d55988a0c57c553ad37aeea..b7b550369d26efadaf1d2ff69671d00409f2f6d2 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -287,12 +287,12 @@ int NeighborKokkosExecute<DeviceType>::exclusion(const int &i,const int &j, if (nex_mol) { for (m = 0; m < nex_mol; m++) - if (ex_mol_intra[m]) { // intra-chain: exclude i-j pair if on same molecule + if (ex_mol_intra[m]) { // intra-chain: exclude i-j pair if on same molecule if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && - molecule[i] == molecule[j]) return 1; - } else // exclude i-j pair if on different molecules - if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && - molecule[i] != molecule[j]) return 1; + molecule[i] == molecule[j]) return 1; + } else // exclude i-j pair if on different molecules + if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && + molecule[i] != molecule[j]) return 1; } return 0; diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 52cdfe0d53bdfde4c5aa2cf6dab82c78209609da..8e81c576186c76130487b2d6d1cb1897826d3c23 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -385,7 +385,7 @@ struct NPairKokkosBuildFunctor { } #ifdef KOKKOS_HAVE_CUDA __device__ inline - + void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const { c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON,TRI>(dev); } diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index 45c320bc516fcfd13816a6dfe809ec7296d035c5..76c701213d623303f354c3e8536ef85e121e951b 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -409,7 +409,6 @@ int PairEAMAlloyKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_i iswap = iswap_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyPackForwardComm>(0,n),*this); - DeviceType::fence(); return n; } @@ -428,7 +427,6 @@ void PairEAMAlloyKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first first = first_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMAlloyUnpackForwardComm>(0,n),*this); - DeviceType::fence(); } template<class DeviceType> diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index b9fa82740aeb0d59bba5e1c5d5d9f6e762292941..9b565f8edee1b7df915f220b78d2e932d353417c 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -133,7 +133,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nall),*this); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSInitialize>(0,nlocal),*this); - DeviceType::fence(); // loop over neighbors of my atoms @@ -156,7 +155,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelA<HALFTHREAD,0> >(0,inum),*this); } } - DeviceType::fence(); // communicate and sum densities (on the host) @@ -174,7 +172,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelB<0> >(0,inum),*this); - DeviceType::fence(); } else if (neighflag == FULL) { @@ -184,7 +181,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPairEAMFSKernelAB<0> >(0,inum),*this); - DeviceType::fence(); } if (eflag) { @@ -239,7 +235,6 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) } } } - DeviceType::fence(); if (eflag_global) eng_vdwl += ev.evdwl; if (vflag_global) { @@ -414,7 +409,6 @@ int PairEAMFSKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_ iswap = iswap_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSPackForwardComm>(0,n),*this); - DeviceType::fence(); return n; } @@ -433,7 +427,6 @@ void PairEAMFSKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in first = first_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMFSUnpackForwardComm>(0,n),*this); - DeviceType::fence(); } template<class DeviceType> diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index e4128de72265b609e9d86723d8acc9100e446346..7be8e54605fc0443d92eb64f32598b50ef2bf38e 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -409,7 +409,6 @@ int PairEAMKokkos<DeviceType>::pack_forward_comm_kokkos(int n, DAT::tdual_int_2d iswap = iswap_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMPackForwardComm>(0,n),*this); - DeviceType::fence(); return n; } @@ -428,7 +427,6 @@ void PairEAMKokkos<DeviceType>::unpack_forward_comm_kokkos(int n, int first_in, first = first_in; v_buf = buf.view<DeviceType>(); Kokkos::parallel_for(Kokkos::RangePolicy<LMPDeviceType, TagPairEAMUnpackForwardComm>(0,n),*this); - DeviceType::fence(); } template<class DeviceType> diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index 59369b5e082c3b383012670da52697f98e7a0163..6082c932874f314183370fbb9023b3d091b506e6 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -731,7 +731,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputePolar<HALFTHREAD,0> >(0,inum),*this); } - DeviceType::fence(); ev_all += ev; pvector[13] = ev.ecoul; @@ -771,7 +770,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeLJCoulomb<FULL,0> >(0,inum),*this); } } - DeviceType::fence(); ev_all += ev; pvector[10] = ev.evdwl; pvector[11] = ev.ecoul; @@ -800,7 +798,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // zero Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZero>(0,nmax),*this); - DeviceType::fence(); if (neighflag == HALF) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf<HALF> >(0,ignum),*this); @@ -808,7 +805,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsHalf_LessAtomics<HALFTHREAD> >(0,ignum),*this); else //(neighflag == FULL) Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBuildListsFull>(0,ignum),*this); - DeviceType::fence(); k_resize_bo.modify<DeviceType>(); k_resize_bo.sync<LMPHostType>(); @@ -827,15 +823,11 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // Bond order if (neighflag == HALF) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1>(0,ignum),*this); - DeviceType::fence(); } else if (neighflag == HALFTHREAD) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder1_LessAtomics>(0,ignum),*this); - DeviceType::fence(); } Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder2>(0,ignum),*this); - DeviceType::fence(); Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxBondOrder3>(0,ignum),*this); - DeviceType::fence(); // Bond energy if (neighflag == HALF) { @@ -843,7 +835,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; pvector[0] = ev.evdwl; } else { //if (neighflag == HALFTHREAD) { @@ -851,7 +842,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond1<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; pvector[0] = ev.evdwl; } @@ -859,21 +849,17 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // Multi-body corrections if (neighflag == HALF) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALF,0> >(0,inum),*this); - DeviceType::fence(); if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti1<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeMulti2<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } pvector[2] = ev.ereax[0]; @@ -887,14 +873,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeAngular<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } pvector[4] = ev.ereax[3]; @@ -908,14 +892,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeTorsion<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } pvector[8] = ev.ereax[6]; @@ -929,14 +911,12 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALF,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } else { //if (neighflag == HALFTHREAD) { if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,1> >(0,inum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeHydrogen<HALFTHREAD,0> >(0,inum),*this); - DeviceType::fence(); ev_all += ev; } } @@ -946,22 +926,18 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // Bond force if (neighflag == HALF) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALF> >(0,ignum),*this); - DeviceType::fence(); if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,1> >(0,ignum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALF,0> >(0,ignum),*this); - DeviceType::fence(); ev_all += ev; pvector[0] += ev.evdwl; } else { //if (neighflag == HALFTHREAD) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxUpdateBond<HALFTHREAD> >(0,ignum),*this); - DeviceType::fence(); if (evflag) Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,1> >(0,ignum),*this,ev); else Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxComputeBond2<HALFTHREAD,0> >(0,ignum),*this); - DeviceType::fence(); ev_all += ev; pvector[0] += ev.evdwl; } @@ -3945,11 +3921,9 @@ void PairReaxCKokkos<DeviceType>::ev_setup(int eflag, int vflag) if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0; if (eflag_atom) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroEAtom>(0,maxeatom),*this); - DeviceType::fence(); } if (vflag_atom) { Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroVAtom>(0,maxvatom),*this); - DeviceType::fence(); } // if vflag_global = 2 and pair::compute() calls virial_fdotr_compute() @@ -4002,7 +3976,6 @@ void PairReaxCKokkos<DeviceType>::FindBond(int &numbonds) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondZero>(0,nmax),*this); - DeviceType::fence(); bo_cut_bond = control->bg_cut; @@ -4017,7 +3990,6 @@ void PairReaxCKokkos<DeviceType>::FindBond(int &numbonds) numbonds = 0; PairReaxCKokkosFindBondFunctor<DeviceType> find_bond_functor(this); Kokkos::parallel_reduce(inum,find_bond_functor,numbonds); - DeviceType::fence(); copymode = 0; } @@ -4076,7 +4048,6 @@ void PairReaxCKokkos<DeviceType>::PackBondBuffer(DAT::tdual_ffloat_1d k_buf, int nlocal = atomKK->nlocal; PairReaxCKokkosPackBondBufferFunctor<DeviceType> pack_bond_buffer_functor(this); Kokkos::parallel_scan(nlocal,pack_bond_buffer_functor); - DeviceType::fence(); copymode = 0; k_buf.modify<DeviceType>(); @@ -4135,11 +4106,9 @@ void PairReaxCKokkos<DeviceType>::FindBondSpecies() { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondSpeciesZero>(0,nmax),*this); - DeviceType::fence(); nlocal = atomKK->nlocal; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxFindBondSpecies>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; // NOTE: Could improve performance if a Kokkos version of ComputeSpecAtom is added diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 3ad7334d2f55c982e54e410357c7f3deb445d534..bd3ed3644f53373305a995382144a6d5c27a5432 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -403,17 +403,14 @@ void PPPMKokkos<DeviceType>::setup() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup1>(nxlo_fft,nxhi_fft+1),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup2>(nylo_fft,nyhi_fft+1),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup3>(nzlo_fft,nzhi_fft+1),*this); - DeviceType::fence(); copymode = 0; // merge three outer loops into one for better threading @@ -425,7 +422,6 @@ void PPPMKokkos<DeviceType>::setup() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_setup4>(0,inum_fft),*this); - DeviceType::fence(); copymode = 0; compute_gf_ik(); @@ -753,7 +749,6 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag) if (eflag_atom) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_self1>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; //for (i = nlocal; i < ntotal; i++) d_eatom[i] *= 0.5*qscale; } @@ -761,7 +756,6 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag) if (vflag_atom) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_self2>(0,ntotal),*this); - DeviceType::fence(); copymode = 0; } } @@ -1415,7 +1409,6 @@ void PPPMKokkos<DeviceType>::compute_gf_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_compute_gf_ik>(0,inum_fft),*this); - DeviceType::fence(); copymode = 0; } @@ -1495,7 +1488,6 @@ void PPPMKokkos<DeviceType>::compute_gf_ik_triclinic() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_compute_gf_ik_triclinic>(nzlo_fft,nzhi_fft+1),*this); - DeviceType::fence(); copymode = 0; } @@ -1588,7 +1580,6 @@ void PPPMKokkos<DeviceType>::particle_map() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_particle_map>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; k_flag.template modify<DeviceType>(); @@ -1641,7 +1632,6 @@ void PPPMKokkos<DeviceType>::make_rho() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_make_rho_zero>(0,inum_out),*this); - DeviceType::fence(); copymode = 0; // loop over my charges, add their contribution to nearby grid points @@ -1654,7 +1644,6 @@ void PPPMKokkos<DeviceType>::make_rho() #ifdef KOKKOS_HAVE_CUDA copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_make_rho_atomic>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; #else ix = nxhi_out-nxlo_out + 1; @@ -1663,7 +1652,6 @@ void PPPMKokkos<DeviceType>::make_rho() copymode = 1; Kokkos::TeamPolicy<DeviceType, TagPPPM_make_rho> config(lmp->kokkos->num_threads,1); Kokkos::parallel_for(config,*this); - DeviceType::fence(); copymode = 0; #endif } @@ -1794,7 +1782,6 @@ void PPPMKokkos<DeviceType>::brick2fft() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_brick2fft>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; k_density_fft.template modify<DeviceType>(); @@ -1842,7 +1829,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik1>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work1.template modify<DeviceType>(); @@ -1862,14 +1848,12 @@ void PPPMKokkos<DeviceType>::poisson_ik() if (vflag_global) { copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik2>(0,nfft),*this,ev); - DeviceType::fence(); copymode = 0; for (j = 0; j < 6; j++) virial[j] += ev.v[j]; energy += ev.ecoul; } else { copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik3>(0,nfft),*this,ev); - DeviceType::fence(); copymode = 0; energy += ev.ecoul; } @@ -1880,7 +1864,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik4>(0,nfft),*this); - DeviceType::fence(); copymode = 0; // extra FFTs for per-atomKK energy/virial @@ -1914,7 +1897,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik5>(0,inum_fft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -1926,7 +1908,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik6>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; @@ -1934,7 +1915,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik7>(0,inum_fft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -1946,14 +1926,12 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik8>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; // z direction gradient copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik9>(0,inum_fft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -1965,7 +1943,6 @@ void PPPMKokkos<DeviceType>::poisson_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_ik10>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; } @@ -2215,7 +2192,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom() if (eflag_atom) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom1>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2227,7 +2203,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom2>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; } @@ -2238,7 +2213,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom3>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2250,13 +2224,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom4>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom5>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2268,13 +2240,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom6>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom7>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2286,12 +2256,10 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom8>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom9>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2303,13 +2271,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom10>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom11>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2321,13 +2287,11 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom12>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom13>(0,nfft),*this); - DeviceType::fence(); copymode = 0; k_work2.template modify<DeviceType>(); @@ -2339,7 +2303,6 @@ void PPPMKokkos<DeviceType>::poisson_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_poisson_peratom14>(0,inum_inout),*this); - DeviceType::fence(); copymode = 0; } @@ -2545,7 +2508,6 @@ void PPPMKokkos<DeviceType>::fieldforce_ik() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_fieldforce_ik>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -2606,7 +2568,6 @@ void PPPMKokkos<DeviceType>::fieldforce_peratom() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_fieldforce_peratom>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -2682,12 +2643,10 @@ void PPPMKokkos<DeviceType>::pack_forward_kokkos(int flag, Kokkos::DualView<FFT_ if (flag == FORWARD_IK) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_forward1>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } else if (flag == FORWARD_IK_PERATOM) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_forward2>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } } @@ -2740,12 +2699,10 @@ void PPPMKokkos<DeviceType>::unpack_forward_kokkos(int flag, Kokkos::DualView<FF if (flag == FORWARD_IK) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_forward1>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } else if (flag == FORWARD_IK_PERATOM) { copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_forward2>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } } @@ -2798,7 +2755,6 @@ void PPPMKokkos<DeviceType>::pack_reverse_kokkos(int flag, Kokkos::DualView<FFT_ copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_pack_reverse>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } @@ -2829,7 +2785,6 @@ void PPPMKokkos<DeviceType>::unpack_reverse_kokkos(int flag, Kokkos::DualView<FF copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_unpack_reverse>(0,nlist),*this); - DeviceType::fence(); copymode = 0; } @@ -2989,7 +2944,6 @@ void PPPMKokkos<DeviceType>::slabcorr() double dipole = 0.0; copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr1>(0,nlocal),*this,dipole); - DeviceType::fence(); copymode = 0; // sum local contributions to get global dipole moment @@ -3003,7 +2957,6 @@ void PPPMKokkos<DeviceType>::slabcorr() if (eflag_atom || fabs(qsum) > SMALL) { copymode = 1; Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr2>(0,nlocal),*this,dipole_r2); - DeviceType::fence(); copymode = 0; // sum local contributions @@ -3027,7 +2980,6 @@ void PPPMKokkos<DeviceType>::slabcorr() efact = qscale * MY_2PI/volume; copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr3>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -3037,7 +2989,6 @@ void PPPMKokkos<DeviceType>::slabcorr() copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_slabcorr4>(0,nlocal),*this); - DeviceType::fence(); copymode = 0; } @@ -3081,7 +3032,6 @@ int PPPMKokkos<DeviceType>::timing_1d(int n, double &time1d) copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_timing_zero>(0,2*nfft_both),*this); - DeviceType::fence(); copymode = 0; MPI_Barrier(world); @@ -3119,7 +3069,6 @@ int PPPMKokkos<DeviceType>::timing_3d(int n, double &time3d) copymode = 1; Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagPPPM_timing_zero>(0,2*nfft_both),*this); - DeviceType::fence(); copymode = 0; MPI_Barrier(world); diff --git a/src/KSPACE/pair_lj_long_tip4p_long.cpp b/src/KSPACE/pair_lj_long_tip4p_long.cpp index fd318fd75bd56052e23d458e26c7c60f8fd26a7e..d2a6b801fc58d2f515b8fc3cf5c7adf16d0307c4 100644 --- a/src/KSPACE/pair_lj_long_tip4p_long.cpp +++ b/src/KSPACE/pair_lj_long_tip4p_long.cpp @@ -126,7 +126,7 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) int order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6); int ni; - double *lj1i, *lj2i, *lj3i, *lj4i, *offseti; + double *lj1i, *lj2i, *lj3i, *lj4i, *offseti; double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2; inum = list->inum; @@ -158,7 +158,6 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) hneigh[i][0] = iH1; hneigh[i][1] = iH2; hneigh[i][2] = 1; - } else { iH1 = hneigh[i][0]; iH2 = hneigh[i][1]; @@ -191,22 +190,22 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) r2inv = 1.0/rsq; if (order6) { // long-range lj if (!ndisptablebits || rsq <= tabinnerdispsq) { - register double rn = r2inv*r2inv*r2inv; - register double x2 = g2*rsq, a2 = 1.0/x2; - x2 = a2*exp(-x2)*lj4i[jtype]; - if (ni == 0) { - forcelj = - (rn*=rn)*lj1i[jtype]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; - if (eflag) - evdwl = rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2; - } - else { // special case - register double f = special_lj[ni], t = rn*(1.0-f); - forcelj = f*(rn *= rn)*lj1i[jtype]- - g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[jtype]; - if (eflag) - evdwl = f*rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[jtype]; - } + register double rn = r2inv*r2inv*r2inv; + register double x2 = g2*rsq, a2 = 1.0/x2; + x2 = a2*exp(-x2)*lj4i[jtype]; + if (ni == 0) { + forcelj = + (rn*=rn)*lj1i[jtype]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; + if (eflag) + evdwl = rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2; + } + else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + forcelj = f*(rn *= rn)*lj1i[jtype]- + g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[jtype]; + if (eflag) + evdwl = f*rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[jtype]; + } } else { // table real space register union_int_float_t disp_t; @@ -224,31 +223,31 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) if (eflag) evdwl = f*rn*lj3i[jtype]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[jtype]+t*lj4i[jtype]; } } - } - else { // cut lj - register double rn = r2inv*r2inv*r2inv; - if (ni == 0) { - forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); - if (eflag) evdwl = rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]; - } - else { // special case - register double f = special_lj[ni]; - forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); - if (eflag) - evdwl = f * (rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]); - } + } + else { // cut lj + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) { + forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); + if (eflag) evdwl = rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]; + } + else { // special case + register double f = special_lj[ni]; + forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); + if (eflag) + evdwl = f * (rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]); + } } forcelj *= r2inv; - f[i][0] += delx*forcelj; - f[i][1] += dely*forcelj; - f[i][2] += delz*forcelj; - f[j][0] -= delx*forcelj; - f[j][1] -= dely*forcelj; - f[j][2] -= delz*forcelj; + f[i][0] += delx*forcelj; + f[i][1] += dely*forcelj; + f[i][2] += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; if (evflag) ev_tally(i,j,nlocal,newton_pair, - evdwl,0.0,forcelj,delx,dely,delz); + evdwl,0.0,forcelj,delx,dely,delz); } @@ -257,7 +256,7 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) if (rsq < cut_coulsqplus) { if (itype == typeO || jtype == typeO) { - if (jtype == typeO) { + if (jtype == typeO) { if (hneigh[j][0] < 0) { jH1 = atom->map(tag[j] + 1); jH2 = atom->map(tag[j] + 2); @@ -272,7 +271,6 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) hneigh[j][0] = jH1; hneigh[j][1] = jH2; hneigh[j][2] = 1; - } else { jH1 = hneigh[j][0]; jH2 = hneigh[j][1]; @@ -282,63 +280,63 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) } } x2 = newsite[j]; - } else x2 = x[j]; - delx = x1[0] - x2[0]; - dely = x1[1] - x2[1]; - delz = x1[2] - x2[2]; - rsq = delx*delx + dely*dely + delz*delz; + } else x2 = x[j]; + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; } - // test current rsq against cutoff and compute Coulombic force + // test current rsq against cutoff and compute Coulombic force if (rsq < cut_coulsq && order1) { - r2inv = 1.0 / rsq; - if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrt(rsq); - grij = g_ewald * r; - expm2 = exp(-grij*grij); - t = 1.0 / (1.0 + EWALD_P*grij); - erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; - prefactor = qqrd2e * qtmp*q[j]/r; - forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); - if (factor_coul < 1.0) { - forcecoul -= (1.0-factor_coul)*prefactor; - } - } else { - union_int_float_t rsq_lookup; - rsq_lookup.f = rsq; - itable = rsq_lookup.i & ncoulmask; - itable >>= ncoulshiftbits; - fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; - table = ftable[itable] + fraction*dftable[itable]; - forcecoul = qtmp*q[j] * table; - if (factor_coul < 1.0) { - table = ctable[itable] + fraction*dctable[itable]; - prefactor = qtmp*q[j] * table; - forcecoul -= (1.0-factor_coul)*prefactor; - } - } - - cforce = forcecoul * r2inv; - - //if (evflag) ev_tally(i,j,nlocal,newton_pair, - // evdwl,0.0,cforce,delx,dely,delz); + r2inv = 1.0 / rsq; + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) { + forcecoul -= (1.0-factor_coul)*prefactor; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + + cforce = forcecoul * r2inv; - // if i,j are not O atoms, force is applied directly - // if i or j are O atoms, force is on fictitious atom & partitioned - // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) - // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f - // preserves total force and torque on water molecule - // virial = sum(r x F) where each water's atoms are near xi and xj - // vlist stores 2,4,6 atoms whose forces contribute to virial + //if (evflag) ev_tally(i,j,nlocal,newton_pair, + // evdwl,0.0,cforce,delx,dely,delz); - n = 0; + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + n = 0; key = 0; - if (itype != typeO) { - f[i][0] += delx * cforce; - f[i][1] += dely * cforce; - f[i][2] += delz * cforce; + if (itype != typeO) { + f[i][0] += delx * cforce; + f[i][1] += dely * cforce; + f[i][2] += delz * cforce; if (vflag) { v[0] = x[i][0] * delx * cforce; @@ -348,9 +346,9 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) v[4] = x[i][0] * delz * cforce; v[5] = x[i][1] * delz * cforce; } - vlist[n++] = i; + vlist[n++] = i; - } else { + } else { key += 1; fd[0] = delx*cforce; fd[1] = dely*cforce; @@ -376,42 +374,42 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) f[iH2][1] += fH[1]; f[iH2][2] += fH[2]; - if (vflag) { + if (vflag) { xH1 = x[iH1]; xH2 = x[iH2]; - v[0] = x[i][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0]; - v[1] = x[i][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1]; - v[2] = x[i][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2]; - v[3] = x[i][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1]; - v[4] = x[i][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2]; - v[5] = x[i][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2]; - } - vlist[n++] = i; - vlist[n++] = iH1; - vlist[n++] = iH2; - } - - if (jtype != typeO) { - f[j][0] -= delx * cforce; - f[j][1] -= dely * cforce; - f[j][2] -= delz * cforce; - - if (vflag) { - v[0] -= x[j][0] * delx * cforce; - v[1] -= x[j][1] * dely * cforce; - v[2] -= x[j][2] * delz * cforce; - v[3] -= x[j][0] * dely * cforce; - v[4] -= x[j][0] * delz * cforce; - v[5] -= x[j][1] * delz * cforce; + v[0] = x[i][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0]; + v[1] = x[i][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1]; + v[2] = x[i][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2]; + v[3] = x[i][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1]; + v[4] = x[i][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2]; + v[5] = x[i][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2]; + } + vlist[n++] = i; + vlist[n++] = iH1; + vlist[n++] = iH2; + } + + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; + + if (vflag) { + v[0] -= x[j][0] * delx * cforce; + v[1] -= x[j][1] * dely * cforce; + v[2] -= x[j][2] * delz * cforce; + v[3] -= x[j][0] * dely * cforce; + v[4] -= x[j][0] * delz * cforce; + v[5] -= x[j][1] * delz * cforce; } - vlist[n++] = j; + vlist[n++] = j; - } else { + } else { key += 2; - fd[0] = -delx*cforce; - fd[1] = -dely*cforce; - fd[2] = -delz*cforce; + fd[0] = -delx*cforce; + fd[1] = -dely*cforce; + fd[2] = -delz*cforce; fO[0] = fd[0]*(1 - alpha); fO[1] = fd[1]*(1 - alpha); @@ -421,45 +419,45 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) fH[1] = 0.5 * alpha * fd[1]; fH[2] = 0.5 * alpha * fd[2]; - f[j][0] += fO[0]; - f[j][1] += fO[1]; - f[j][2] += fO[2]; + f[j][0] += fO[0]; + f[j][1] += fO[1]; + f[j][2] += fO[2]; - f[jH1][0] += fH[0]; - f[jH1][1] += fH[1]; - f[jH1][2] += fH[2]; + f[jH1][0] += fH[0]; + f[jH1][1] += fH[1]; + f[jH1][2] += fH[2]; - f[jH2][0] += fH[0]; - f[jH2][1] += fH[1]; - f[jH2][2] += fH[2]; + f[jH2][0] += fH[0]; + f[jH2][1] += fH[1]; + f[jH2][2] += fH[2]; - if (vflag) { + if (vflag) { xH1 = x[jH1]; xH2 = x[jH2]; - v[0] += x[j][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0]; - v[1] += x[j][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1]; - v[2] += x[j][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2]; - v[3] += x[j][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1]; - v[4] += x[j][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2]; - v[5] += x[j][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2]; + v[0] += x[j][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0]; + v[1] += x[j][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1]; + v[2] += x[j][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2]; + v[3] += x[j][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1]; + v[4] += x[j][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2]; + v[5] += x[j][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2]; } vlist[n++] = j; - vlist[n++] = jH1; - vlist[n++] = jH2; - } - - if (eflag) { - if (!ncoultablebits || rsq <= tabinnersq) - ecoul = prefactor*erfc; - else { - table = etable[itable] + fraction*detable[itable]; - ecoul = qtmp*q[j] * table; - } - if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + vlist[n++] = jH1; + vlist[n++] = jH2; + } + + if (eflag) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; } else ecoul = 0.0; if (evflag) ev_tally_tip4p(key,vlist,v,ecoul,alpha); - } + } } } } @@ -473,7 +471,7 @@ void PairLJLongTIP4PLong::compute_inner() int iH1,iH2,jH1,jH2; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz; double r2inv,forcecoul,forcelj,cforce; - double fO[3],fH[3],fd[3];// f1[3]; + double fO[3],fH[3],fd[3]; double *x1,*x2; int *ilist,*jlist,*numneigh,**firstneigh; double rsq, qri; @@ -534,14 +532,19 @@ void PairLJLongTIP4PLong::compute_inner() itype = type[i]; if (itype == typeO && order1) { if (hneigh[i][0] < 0) { - hneigh[i][0] = iH1 = atom->map(tag[i] + 1); - hneigh[i][1] = iH2 = atom->map(tag[i] + 2); - hneigh[i][2] = 1; + iH1 = atom->map(tag[i] + 1); + iH2 = atom->map(tag[i] + 2); if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set iH1,iH2 to closest image to O + iH1 = domain->closest_image(i,iH1); + iH2 = domain->closest_image(i,iH2); compute_newsite(x[i],x[iH1],x[iH2],newsite[i]); + hneigh[i][0] = iH1; + hneigh[i][1] = iH2; + hneigh[i][2] = 1; } else { iH1 = hneigh[i][0]; iH2 = hneigh[i][1]; @@ -570,12 +573,12 @@ void PairLJLongTIP4PLong::compute_inner() if (rsq < cut_ljsq[itype][jtype] && rsq < cut_out_off_sq ) { // lj r2inv = 1.0/rsq; - register double rn = r2inv*r2inv*r2inv; - if (ni == 0) forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); - else { // special case - register double f = special_lj[ni]; - forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); - } + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); + else { // special case + register double f = special_lj[ni]; + forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); + } if (rsq > cut_out_on_sq) { // switching register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; @@ -583,12 +586,12 @@ void PairLJLongTIP4PLong::compute_inner() } forcelj *= r2inv; - f[i][0] += delx*forcelj; - f[i][1] += dely*forcelj; - f[i][2] += delz*forcelj; - f[j][0] -= delx*forcelj; - f[j][1] -= dely*forcelj; - f[j][2] -= delz*forcelj; + f[i][0] += delx*forcelj; + f[i][1] += dely*forcelj; + f[i][2] += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; } @@ -597,16 +600,21 @@ void PairLJLongTIP4PLong::compute_inner() if (rsq < cut_coulsqplus && order1) { if (itype == typeO || jtype == typeO) { - if (jtype == typeO) { + if (jtype == typeO) { if (hneigh[j][0] < 0) { - hneigh[j][0] = jH1 = atom->map(tag[j] + 1); - hneigh[j][1] = jH2 = atom->map(tag[j] + 2); - hneigh[j][2] = 1; + jH1 = atom->map(tag[j] + 1); + jH2 = atom->map(tag[j] + 2); if (jH1 == -1 || jH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set jH1,jH2 to closest image to O + jH1 = domain->closest_image(j,jH1); + jH2 = domain->closest_image(j,jH2); compute_newsite(x[j],x[jH1],x[jH2],newsite[j]); + hneigh[j][0] = jH1; + hneigh[j][1] = jH2; + hneigh[j][2] = 1; } else { jH1 = hneigh[j][0]; jH2 = hneigh[j][1]; @@ -616,17 +624,17 @@ void PairLJLongTIP4PLong::compute_inner() } } x2 = newsite[j]; - } else x2 = x[j]; - delx = x1[0] - x2[0]; - dely = x1[1] - x2[1]; - delz = x1[2] - x2[2]; - rsq = delx*delx + dely*dely + delz*delz; + } else x2 = x[j]; + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; } - // test current rsq against cutoff and compute Coulombic force + // test current rsq against cutoff and compute Coulombic force if (rsq < cut_coulsq && rsq < cut_out_off_sq) { - r2inv = 1.0 / rsq; + r2inv = 1.0 / rsq; qri = qqrd2e*qtmp; if (ni == 0) forcecoul = qri*q[j]*sqrt(r2inv); else { @@ -638,25 +646,25 @@ void PairLJLongTIP4PLong::compute_inner() forcecoul *= 1.0 + rsw*rsw*(2.0*rsw-3.0); } - cforce = forcecoul * r2inv; + cforce = forcecoul * r2inv; - //if (evflag) ev_tally(i,j,nlocal,newton_pair, + //if (evflag) ev_tally(i,j,nlocal,newton_pair, // evdwl,0.0,cforce,delx,dely,delz); - // if i,j are not O atoms, force is applied directly - // if i or j are O atoms, force is on fictitious atom & partitioned - // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) - // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f - // preserves total force and torque on water molecule - // virial = sum(r x F) where each water's atoms are near xi and xj - // vlist stores 2,4,6 atoms whose forces contribute to virial - - if (itype != typeO) { - f[i][0] += delx * cforce; - f[i][1] += dely * cforce; - f[i][2] += delz * cforce; + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + if (itype != typeO) { + f[i][0] += delx * cforce; + f[i][1] += dely * cforce; + f[i][2] += delz * cforce; - } else { + } else { fd[0] = delx*cforce; fd[1] = dely*cforce; fd[2] = delz*cforce; @@ -682,15 +690,15 @@ void PairLJLongTIP4PLong::compute_inner() f[iH2][2] += fH[2]; } - if (jtype != typeO) { - f[j][0] -= delx * cforce; - f[j][1] -= dely * cforce; - f[j][2] -= delz * cforce; + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; - } else { - fd[0] = -delx*cforce; - fd[1] = -dely*cforce; - fd[2] = -delz*cforce; + } else { + fd[0] = -delx*cforce; + fd[1] = -dely*cforce; + fd[2] = -delz*cforce; fO[0] = fd[0]*(1 - alpha); fO[1] = fd[1]*(1 - alpha); @@ -700,19 +708,19 @@ void PairLJLongTIP4PLong::compute_inner() fH[1] = 0.5 * alpha * fd[1]; fH[2] = 0.5 * alpha * fd[2]; - f[j][0] += fO[0]; - f[j][1] += fO[1]; - f[j][2] += fO[2]; + f[j][0] += fO[0]; + f[j][1] += fO[1]; + f[j][2] += fO[2]; - f[jH1][0] += fH[0]; - f[jH1][1] += fH[1]; - f[jH1][2] += fH[2]; + f[jH1][0] += fH[0]; + f[jH1][1] += fH[1]; + f[jH1][2] += fH[2]; - f[jH2][0] += fH[0]; - f[jH2][1] += fH[1]; - f[jH2][2] += fH[2]; + f[jH2][0] += fH[0]; + f[jH2][1] += fH[1]; + f[jH2][2] += fH[2]; } - } + } } } } @@ -777,14 +785,19 @@ void PairLJLongTIP4PLong::compute_middle() itype = type[i]; if (itype == typeO && order1) { if (hneigh[i][0] < 0) { - hneigh[i][0] = iH1 = atom->map(tag[i] + 1); - hneigh[i][1] = iH2 = atom->map(tag[i] + 2); - hneigh[i][2] = 1; + iH1 = atom->map(tag[i] + 1); + iH2 = atom->map(tag[i] + 2); if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set iH1,iH2 to closest image to O + iH1 = domain->closest_image(i,iH1); + iH2 = domain->closest_image(i,iH2); compute_newsite(x[i],x[iH1],x[iH2],newsite[i]); + hneigh[i][0] = iH1; + hneigh[i][1] = iH2; + hneigh[i][2] = 1; } else { iH1 = hneigh[i][0]; iH2 = hneigh[i][1]; @@ -813,12 +826,12 @@ void PairLJLongTIP4PLong::compute_middle() if (rsq < cut_ljsq[itype][jtype] && rsq >= cut_in_off_sq && rsq <= cut_out_off_sq ) { // lj r2inv = 1.0/rsq; - register double rn = r2inv*r2inv*r2inv; - if (ni == 0) forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); - else { // special case - register double f = special_lj[ni]; - forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); - } + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); + else { // special case + register double f = special_lj[ni]; + forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); + } if (rsq < cut_in_on_sq) { // switching register double rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; @@ -830,12 +843,12 @@ void PairLJLongTIP4PLong::compute_middle() } forcelj *= r2inv; - f[i][0] += delx*forcelj; - f[i][1] += dely*forcelj; - f[i][2] += delz*forcelj; - f[j][0] -= delx*forcelj; - f[j][1] -= dely*forcelj; - f[j][2] -= delz*forcelj; + f[i][0] += delx*forcelj; + f[i][1] += dely*forcelj; + f[i][2] += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; } @@ -844,16 +857,21 @@ void PairLJLongTIP4PLong::compute_middle() if (rsq < cut_coulsqplus && order1) { if (itype == typeO || jtype == typeO) { - if (jtype == typeO) { + if (jtype == typeO) { if (hneigh[j][0] < 0) { - hneigh[j][0] = jH1 = atom->map(tag[j] + 1); - hneigh[j][1] = jH2 = atom->map(tag[j] + 2); - hneigh[j][2] = 1; + jH1 = atom->map(tag[j] + 1); + jH2 = atom->map(tag[j] + 2); if (jH1 == -1 || jH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set jH1,jH2 to closest image to O + jH1 = domain->closest_image(j,jH1); + jH2 = domain->closest_image(j,jH2); compute_newsite(x[j],x[jH1],x[jH2],newsite[j]); + hneigh[j][0] = jH1; + hneigh[j][1] = jH2; + hneigh[j][2] = 1; } else { jH1 = hneigh[j][0]; jH2 = hneigh[j][1]; @@ -863,17 +881,17 @@ void PairLJLongTIP4PLong::compute_middle() } } x2 = newsite[j]; - } else x2 = x[j]; - delx = x1[0] - x2[0]; - dely = x1[1] - x2[1]; - delz = x1[2] - x2[2]; - rsq = delx*delx + dely*dely + delz*delz; + } else x2 = x[j]; + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; } - // test current rsq against cutoff and compute Coulombic force + // test current rsq against cutoff and compute Coulombic force if (rsq < cut_coulsq && rsq >= cut_in_off_sq && rsq <= cut_out_off_sq) { - r2inv = 1.0 / rsq; + r2inv = 1.0 / rsq; qri = qqrd2e*qtmp; if (ni == 0) forcecoul = qri*q[j]*sqrt(r2inv); else { @@ -889,25 +907,25 @@ void PairLJLongTIP4PLong::compute_middle() forcecoul *= 1.0 + rsw*rsw*(2.0*rsw-3.0); } - cforce = forcecoul * r2inv; + cforce = forcecoul * r2inv; - //if (evflag) ev_tally(i,j,nlocal,newton_pair, + //if (evflag) ev_tally(i,j,nlocal,newton_pair, // evdwl,0.0,cforce,delx,dely,delz); - // if i,j are not O atoms, force is applied directly - // if i or j are O atoms, force is on fictitious atom & partitioned - // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) - // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f - // preserves total force and torque on water molecule - // virial = sum(r x F) where each water's atoms are near xi and xj - // vlist stores 2,4,6 atoms whose forces contribute to virial - - if (itype != typeO) { - f[i][0] += delx * cforce; - f[i][1] += dely * cforce; - f[i][2] += delz * cforce; + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + if (itype != typeO) { + f[i][0] += delx * cforce; + f[i][1] += dely * cforce; + f[i][2] += delz * cforce; - } else { + } else { fd[0] = delx*cforce; fd[1] = dely*cforce; fd[2] = delz*cforce; @@ -933,15 +951,15 @@ void PairLJLongTIP4PLong::compute_middle() f[iH2][2] += fH[2]; } - if (jtype != typeO) { - f[j][0] -= delx * cforce; - f[j][1] -= dely * cforce; - f[j][2] -= delz * cforce; + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; - } else { - fd[0] = -delx*cforce; - fd[1] = -dely*cforce; - fd[2] = -delz*cforce; + } else { + fd[0] = -delx*cforce; + fd[1] = -dely*cforce; + fd[2] = -delz*cforce; fO[0] = fd[0]*(1 - alpha); fO[1] = fd[1]*(1 - alpha); @@ -951,19 +969,19 @@ void PairLJLongTIP4PLong::compute_middle() fH[1] = 0.5 * alpha * fd[1]; fH[2] = 0.5 * alpha * fd[2]; - f[j][0] += fO[0]; - f[j][1] += fO[1]; - f[j][2] += fO[2]; + f[j][0] += fO[0]; + f[j][1] += fO[1]; + f[j][2] += fO[2]; - f[jH1][0] += fH[0]; - f[jH1][1] += fH[1]; - f[jH1][2] += fH[2]; + f[jH1][0] += fH[0]; + f[jH1][1] += fH[1]; + f[jH1][2] += fH[2]; - f[jH2][0] += fH[0]; - f[jH2][1] += fH[1]; - f[jH2][2] += fH[2]; + f[jH2][0] += fH[0]; + f[jH2][1] += fH[1]; + f[jH2][2] += fH[2]; } - } + } } } } @@ -979,8 +997,8 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) int iH1,iH2,jH1,jH2; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul; double r2inv,forcecoul,forcelj,cforce, respa_coul, respa_lj, frespa,fvirial; - double fO[3],fH[3],fd[3],v[6],xH1[3],xH2[3];// f1[3]; - double *x1,*x2; + double fO[3],fH[3],fd[3],v[6]; + double *x1,*x2,*xH1,*xH2; int *ilist,*jlist,*numneigh,**firstneigh; double rsq,qri; int respa_flag; @@ -1048,14 +1066,19 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) itype = type[i]; if (itype == typeO) { if (hneigh[i][0] < 0) { - hneigh[i][0] = iH1 = atom->map(tag[i] + 1); - hneigh[i][1] = iH2 = atom->map(tag[i] + 2); - hneigh[i][2] = 1; + iH1 = atom->map(tag[i] + 1); + iH2 = atom->map(tag[i] + 2); if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set iH1,iH2 to closest image to O + iH1 = domain->closest_image(i,iH1); + iH2 = domain->closest_image(i,iH2); compute_newsite(x[i],x[iH1],x[iH2],newsite[i]); + hneigh[i][0] = iH1; + hneigh[i][1] = iH2; + hneigh[i][2] = 1; } else { iH1 = hneigh[i][0]; iH2 = hneigh[i][1]; @@ -1096,8 +1119,8 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) r2inv = 1.0/rsq; register double rn = r2inv*r2inv*r2inv; if (respa_flag) respa_lj = ni == 0 ? // correct for respa - frespa*rn*(rn*lj1i[jtype]-lj2i[jtype]) : - frespa*rn*(rn*lj1i[jtype]-lj2i[jtype])*special_lj[ni]; + frespa*rn*(rn*lj1i[jtype]-lj2i[jtype]) : + frespa*rn*(rn*lj1i[jtype]-lj2i[jtype])*special_lj[ni]; if (order6) { // long-range form if (!ndisptablebits || rsq <= tabinnerdispsq) { register double x2 = g2*rsq, a2 = 1.0/x2; @@ -1145,17 +1168,17 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) } forcelj *= r2inv; - f[i][0] += delx*forcelj; - f[i][1] += dely*forcelj; - f[i][2] += delz*forcelj; - f[j][0] -= delx*forcelj; - f[j][1] -= dely*forcelj; - f[j][2] -= delz*forcelj; + f[i][0] += delx*forcelj; + f[i][1] += dely*forcelj; + f[i][2] += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; if (evflag) { fvirial = forcelj + respa_lj*r2inv; ev_tally(i,j,nlocal,newton_pair, - evdwl,0.0,fvirial,delx,dely,delz); + evdwl,0.0,fvirial,delx,dely,delz); } } @@ -1165,16 +1188,21 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) if (rsq < cut_coulsqplus) { if (itype == typeO || jtype == typeO) { - if (jtype == typeO) { + if (jtype == typeO) { if (hneigh[j][0] < 0) { - hneigh[j][0] = jH1 = atom->map(tag[j] + 1); - hneigh[j][1] = jH2 = atom->map(tag[j] + 2); - hneigh[j][2] = 1; + jH1 = atom->map(tag[j] + 1); + jH2 = atom->map(tag[j] + 2); if (jH1 == -1 || jH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set jH1,jH2 to closest image to O + jH1 = domain->closest_image(j,jH1); + jH2 = domain->closest_image(j,jH2); compute_newsite(x[j],x[jH1],x[jH2],newsite[j]); + hneigh[j][0] = jH1; + hneigh[j][1] = jH2; + hneigh[j][2] = 1; } else { jH1 = hneigh[j][0]; jH2 = hneigh[j][1]; @@ -1184,14 +1212,14 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) } } x2 = newsite[j]; - } else x2 = x[j]; - delx = x1[0] - x2[0]; - dely = x1[1] - x2[1]; - delz = x1[2] - x2[2]; - rsq = delx*delx + dely*dely + delz*delz; + } else x2 = x[j]; + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; } - // test current rsq against cutoff and compute Coulombic force + // test current rsq against cutoff and compute Coulombic force if ((rsq < cut_coulsq) && order1) { frespa = 1.0; // check whether and how to compute respa corrections @@ -1245,20 +1273,20 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) fvirial = (forcecoul + respa_coul) * r2inv; // if i,j are not O atoms, force is applied directly - // if i or j are O atoms, force is on fictitious atom & partitioned - // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) - // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f - // preserves total force and torque on water molecule - // virial = sum(r x F) where each water's atoms are near xi and xj - // vlist stores 2,4,6 atoms whose forces contribute to virial - - n = 0; + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + n = 0; key = 0; - if (itype != typeO) { - f[i][0] += delx * cforce; + if (itype != typeO) { + f[i][0] += delx * cforce; f[i][1] += dely * cforce; - f[i][2] += delz * cforce; + f[i][2] += delz * cforce; if (vflag) { v[0] = x[i][0] * delx * fvirial; @@ -1268,9 +1296,9 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) v[4] = x[i][0] * delz * fvirial; v[5] = x[i][1] * delz * fvirial; } - vlist[n++] = i; + vlist[n++] = i; - } else { + } else { key += 1; fd[0] = delx*cforce; fd[1] = dely*cforce; @@ -1296,7 +1324,7 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) f[iH2][1] += fH[1]; f[iH2][2] += fH[2]; - if (vflag) { + if (vflag) { fd[0] = delx*fvirial; fd[1] = dely*fvirial; fd[2] = delz*fvirial; @@ -1309,42 +1337,41 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) fH[1] = 0.5 * alpha * fd[1]; fH[2] = 0.5 * alpha * fd[2]; - domain->closest_image(x[i],x[iH1],xH1); - domain->closest_image(x[i],x[iH2],xH2); - - v[0] = x[i][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0]; - v[1] = x[i][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1]; - v[2] = x[i][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2]; - v[3] = x[i][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1]; - v[4] = x[i][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2]; - v[5] = x[i][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2]; - } - vlist[n++] = i; - vlist[n++] = iH1; - vlist[n++] = iH2; - } - - if (jtype != typeO) { - f[j][0] -= delx * cforce; - f[j][1] -= dely * cforce; - f[j][2] -= delz * cforce; - - if (vflag) { - v[0] -= x[j][0] * delx * fvirial; - v[1] -= x[j][1] * dely * fvirial; - v[2] -= x[j][2] * delz * fvirial; - v[3] -= x[j][0] * dely * fvirial; - v[4] -= x[j][0] * delz * fvirial; - v[5] -= x[j][1] * delz * fvirial; + xH1 = x[jH1]; + xH2 = x[jH2]; + v[0] = x[i][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0]; + v[1] = x[i][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1]; + v[2] = x[i][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2]; + v[3] = x[i][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1]; + v[4] = x[i][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2]; + v[5] = x[i][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2]; + } + vlist[n++] = i; + vlist[n++] = iH1; + vlist[n++] = iH2; + } + + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; + + if (vflag) { + v[0] -= x[j][0] * delx * fvirial; + v[1] -= x[j][1] * dely * fvirial; + v[2] -= x[j][2] * delz * fvirial; + v[3] -= x[j][0] * dely * fvirial; + v[4] -= x[j][0] * delz * fvirial; + v[5] -= x[j][1] * delz * fvirial; } - vlist[n++] = j; + vlist[n++] = j; - } else { + } else { key += 2; - fd[0] = -delx*cforce; - fd[1] = -dely*cforce; - fd[2] = -delz*cforce; + fd[0] = -delx*cforce; + fd[1] = -dely*cforce; + fd[2] = -delz*cforce; fO[0] = fd[0]*(1 - alpha); fO[1] = fd[1]*(1 - alpha); @@ -1354,23 +1381,23 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) fH[1] = 0.5 * alpha * fd[1]; fH[2] = 0.5 * alpha * fd[2]; - f[j][0] += fO[0]; - f[j][1] += fO[1]; - f[j][2] += fO[2]; + f[j][0] += fO[0]; + f[j][1] += fO[1]; + f[j][2] += fO[2]; - f[jH1][0] += fH[0]; - f[jH1][1] += fH[1]; - f[jH1][2] += fH[2]; + f[jH1][0] += fH[0]; + f[jH1][1] += fH[1]; + f[jH1][2] += fH[2]; - f[jH2][0] += fH[0]; - f[jH2][1] += fH[1]; - f[jH2][2] += fH[2]; + f[jH2][0] += fH[0]; + f[jH2][1] += fH[1]; + f[jH2][2] += fH[2]; - if (vflag) { + if (vflag) { - fd[0] = -delx*fvirial; - fd[1] = -dely*fvirial; - fd[2] = -delz*fvirial; + fd[0] = -delx*fvirial; + fd[1] = -dely*fvirial; + fd[2] = -delz*fvirial; fO[0] = fd[0]*(1 - alpha); fO[1] = fd[1]*(1 - alpha); @@ -1380,20 +1407,20 @@ void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) fH[1] = 0.5 * alpha * fd[1]; fH[2] = 0.5 * alpha * fd[2]; - domain->closest_image(x[j],x[jH1],xH1); - domain->closest_image(x[j],x[jH2],xH2); + xH1 = x[jH1]; + xH2 = x[jH2]; - v[0] += x[j][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0]; - v[1] += x[j][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1]; - v[2] += x[j][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2]; - v[3] += x[j][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1]; - v[4] += x[j][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2]; - v[5] += x[j][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2]; + v[0] += x[j][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0]; + v[1] += x[j][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1]; + v[2] += x[j][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2]; + v[3] += x[j][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1]; + v[4] += x[j][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2]; + v[5] += x[j][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2]; } vlist[n++] = j; - vlist[n++] = jH1; - vlist[n++] = jH2; - } + vlist[n++] = jH1; + vlist[n++] = jH2; + } if (evflag) ev_tally_tip4p(key,vlist,v,ecoul,alpha); } diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi index 2cb37ed9fed4d36207ac86052a26f6d1b9cea4c1..ac8279949a8abd41aecda963df99bd907b9e629f 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi +++ b/src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi @@ -8,7 +8,7 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ +CCFLAGS = -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \ -fno-alias -ansi-alias -restrict $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor b/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor index b7f3cd68466b5d9601c70e4d8e345b5f2378803c..db5de83a06af6963e9c05a2ad02ce20def04a9f0 100644 --- a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor +++ b/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor @@ -8,7 +8,7 @@ SHELL = /bin/sh CC = mpiicpc MIC_OPT = -qoffload-arch=mic-avx512 -fp-model fast=2 -CCFLAGS = -g -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \ +CCFLAGS = -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \ -xHost -fno-alias -ansi-alias -restrict \ -qoverride-limits $(MIC_OPT) SHFLAGS = -fPIC diff --git a/src/MAKE/OPTIONS/Makefile.knl b/src/MAKE/OPTIONS/Makefile.knl index 3bc777592e0164b4d5df1cd6eb0e95bca210d705..881c51f0e4de91654743c024f0af146459206e4a 100644 --- a/src/MAKE/OPTIONS/Makefile.knl +++ b/src/MAKE/OPTIONS/Makefile.knl @@ -8,7 +8,7 @@ SHELL = /bin/sh CC = mpiicpc OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ +CCFLAGS = -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \ -fno-alias -ansi-alias -restrict $(OPTFLAGS) SHFLAGS = -fPIC DEPFLAGS = -M diff --git a/src/MANYBODY/pair_airebo.cpp b/src/MANYBODY/pair_airebo.cpp index cc7efbcaa62ec76848d247a4f51eab5401c1798c..d83f5a39a8414e7358a68cb4d8d9a25d47602201 100644 --- a/src/MANYBODY/pair_airebo.cpp +++ b/src/MANYBODY/pair_airebo.cpp @@ -1615,6 +1615,10 @@ double PairAIREBO::bondorder(int i, int j, double rij[3], if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik); + // due to kronecker(ktype, 0) term in contribution + // to NconjtmpI and later Nijconj + if (ktype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag; f[atomi][0] -= tmp2*rik[0]; f[atomi][1] -= tmp2*rik[1]; @@ -1678,6 +1682,10 @@ double PairAIREBO::bondorder(int i, int j, double rij[3], if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl); + // due to kronecker(ltype, 0) term in contribution + // to NconjtmpJ and later Nijconj + if (ltype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag; f[atomj][0] -= tmp2*rjl[0]; f[atomj][1] -= tmp2*rjl[1]; @@ -1960,6 +1968,10 @@ double PairAIREBO::bondorder(int i, int j, double rij[3], if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik); + // due to kronecker(ktype, 0) term in contribution + // to NconjtmpI and later Nijconj + if (ktype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag; f[atomi][0] -= tmp2*rik[0]; f[atomi][1] -= tmp2*rik[1]; @@ -2023,6 +2035,10 @@ double PairAIREBO::bondorder(int i, int j, double rij[3], if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl); + // due to kronecker(ltype, 0) term in contribution + // to NconjtmpJ and later Nijconj + if (ltype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag; f[atomj][0] -= tmp2*rjl[0]; f[atomj][1] -= tmp2*rjl[1]; @@ -2560,6 +2576,10 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik); + // due to kronecker(ktype, 0) term in contribution + // to NconjtmpI and later Nijconj + if (ktype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag; f[atomi][0] -= tmp2*rik[0]; f[atomi][1] -= tmp2*rik[1]; @@ -2623,6 +2643,10 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl); + // due to kronecker(ltype, 0) term in contribution + // to NconjtmpJ and later Nijconj + if (ltype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag; f[atomj][0] -= tmp2*rjl[0]; f[atomj][1] -= tmp2*rjl[1]; @@ -2895,6 +2919,10 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, if (vflag_atom) v_tally2(atomi,atomk,-tmp2,rik); + // due to kronecker(ktype, 0) term in contribution + // to NconjtmpI and later Nijconj + if (ktype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag; f[atomi][0] -= tmp2*rik[0]; f[atomi][1] -= tmp2*rik[1]; @@ -2958,6 +2986,10 @@ double PairAIREBO::bondorderLJ(int i, int j, double rij[3], double rijmag, if (vflag_atom) v_tally2(atomj,atoml,-tmp2,rjl); + // due to kronecker(ltype, 0) term in contribution + // to NconjtmpJ and later Nijconj + if (ltype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag; f[atomj][0] -= tmp2*rjl[0]; f[atomj][1] -= tmp2*rjl[1]; diff --git a/src/MC/fix_gcmc.cpp b/src/MC/fix_gcmc.cpp index e0a269b88ccd39af7bb38e80340c8ecff56dd2cd..fbe6b6bb6249c4dcdb6adfd06365b3e90e433908 100644 --- a/src/MC/fix_gcmc.cpp +++ b/src/MC/fix_gcmc.cpp @@ -501,7 +501,7 @@ void FixGCMC::init() if (ifix < 0) error->all(FLERR,"Fix gcmc rigid fix does not exist"); fixrigid = modify->fix[ifix]; int tmp; - if (onemols != (Molecule **) fixrigid->extract("onemol",tmp)) + if (&onemols[imol] != (Molecule **) fixrigid->extract("onemol",tmp)) error->all(FLERR, "Fix gcmc and fix rigid/small not using " "same molecule template ID"); @@ -516,7 +516,7 @@ void FixGCMC::init() if (ifix < 0) error->all(FLERR,"Fix gcmc shake fix does not exist"); fixshake = modify->fix[ifix]; int tmp; - if (onemols != (Molecule **) fixshake->extract("onemol",tmp)) + if (&onemols[imol] != (Molecule **) fixshake->extract("onemol",tmp)) error->all(FLERR,"Fix gcmc and fix shake not using " "same molecule template ID"); } @@ -1397,12 +1397,13 @@ void FixGCMC::attempt_molecule_insertion() // FixRigidSmall::set_molecule stores rigid body attributes // FixShake::set_molecule stores shake info for molecule - - if (rigidflag) - fixrigid->set_molecule(nlocalprev,maxtag_all,imol,com_coord,vnew,quat); - else if (shakeflag) - fixshake->set_molecule(nlocalprev,maxtag_all,imol,com_coord,vnew,quat); + for (int submol = 0; submol < nmol; ++submol) { + if (rigidflag) + fixrigid->set_molecule(nlocalprev,maxtag_all,submol,com_coord,vnew,quat); + else if (shakeflag) + fixshake->set_molecule(nlocalprev,maxtag_all,submol,com_coord,vnew,quat); + } atom->natoms += natoms_per_molecule; if (atom->natoms < 0) error->all(FLERR,"Too many total atoms"); @@ -2058,11 +2059,12 @@ void FixGCMC::attempt_molecule_insertion_full() // FixRigidSmall::set_molecule stores rigid body attributes // FixShake::set_molecule stores shake info for molecule - if (rigidflag) - fixrigid->set_molecule(nlocalprev,maxtag_all,imol,com_coord,vnew,quat); - else if (shakeflag) - fixshake->set_molecule(nlocalprev,maxtag_all,imol,com_coord,vnew,quat); - + for (int submol = 0; submol < nmol; ++submol) { + if (rigidflag) + fixrigid->set_molecule(nlocalprev,maxtag_all,submol,com_coord,vnew,quat); + else if (shakeflag) + fixshake->set_molecule(nlocalprev,maxtag_all,submol,com_coord,vnew,quat); + } atom->natoms += natoms_per_molecule; if (atom->natoms < 0) error->all(FLERR,"Too many total atoms"); diff --git a/src/Purge.list b/src/Purge.list index 3bdfed3ed0ca505954a8069f81b0b19f8cf0cede..340b210b20e4ed325fd87badec04372795c1fe35 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -16,6 +16,11 @@ style_region.h style_neigh_bin.h style_neigh_pair.h style_neigh_stencil.h +# deleted on 6 June 2017 +pair_lj_sf.cpp +pair_lj_sf.h +pair_lj_sf_omp.cpp +pair_lj_sf_omp.h # deleted on 4 May 2017 dump_custom_vtk.cpp dump_custom_vtk.h diff --git a/src/REPLICA/fix_neb.cpp b/src/REPLICA/fix_neb.cpp index 249339191a26f3cf59ad24a9c93aaf762c24f61c..6daaf9471091e22b521dfeafd5b88cbc99d8d409 100644 --- a/src/REPLICA/fix_neb.cpp +++ b/src/REPLICA/fix_neb.cpp @@ -27,25 +27,91 @@ #include "memory.h" #include "error.h" #include "force.h" +#include "math_const.h" using namespace LAMMPS_NS; using namespace FixConst; +using namespace MathConst; enum{SINGLE_PROC_DIRECT,SINGLE_PROC_MAP,MULTI_PROC}; +#define BUFSIZE 8 + /* ---------------------------------------------------------------------- */ FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg), id_pe(NULL), pe(NULL), xprev(NULL), xnext(NULL), - tangent(NULL), xsend(NULL), xrecv(NULL), tagsend(NULL), tagrecv(NULL), - xsendall(NULL), xrecvall(NULL), tagsendall(NULL), tagrecvall(NULL), - counts(NULL), displacements(NULL) + Fix(lmp, narg, arg), + id_pe(NULL), pe(NULL), nlenall(NULL), xprev(NULL), xnext(NULL), + fnext(NULL), springF(NULL), tangent(NULL), xsend(NULL), xrecv(NULL), + fsend(NULL), frecv(NULL), tagsend(NULL), tagrecv(NULL), + xsendall(NULL), xrecvall(NULL), fsendall(NULL), frecvall(NULL), + tagsendall(NULL), tagrecvall(NULL), counts(NULL), + displacements(NULL) { - if (narg != 4) error->all(FLERR,"Illegal fix neb command"); + + if (narg < 4) error->all(FLERR,"Illegal fix neb command"); kspring = force->numeric(FLERR,arg[3]); if (kspring <= 0.0) error->all(FLERR,"Illegal fix neb command"); + // optional params + + NEBLongRange = false; + StandardNEB = true; + PerpSpring = FreeEndIni = FreeEndFinal = false; + FreeEndFinalWithRespToEIni = FinalAndInterWithRespToEIni = false; + kspringPerp = 0.0; + kspringIni = 1.0; + kspringFinal = 1.0; + + int iarg = 4; + while (iarg < narg) { + if (strcmp(arg[iarg],"parallel") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix neb command"); + if (strcmp(arg[iarg+1],"ideal") == 0) { + NEBLongRange = true; + StandardNEB = false; + } else if (strcmp(arg[iarg+1],"neigh") == 0) { + NEBLongRange = false; + StandardNEB = true; + } else error->all(FLERR,"Illegal fix neb command"); + iarg += 2; + + } else if (strcmp(arg[iarg],"perp") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix neb command"); + PerpSpring = true; + kspringPerp = force->numeric(FLERR,arg[iarg+1]); + if (kspringPerp == 0.0) PerpSpring = false; + if (kspringPerp < 0.0) error->all(FLERR,"Illegal fix neb command"); + iarg += 2; + + } else if (strcmp (arg[iarg],"end") == 0) { + if (iarg+3 > narg) error->all(FLERR,"Illegal fix neb command"); + if (strcmp(arg[iarg+1],"first") == 0) { + FreeEndIni = true; + kspringIni = force->numeric(FLERR,arg[iarg+2]); + } else if (strcmp(arg[iarg+1],"last") == 0) { + FreeEndFinal = true; + FinalAndInterWithRespToEIni = false; + FreeEndFinalWithRespToEIni = false; + kspringFinal = force->numeric(FLERR,arg[iarg+2]); + } else if (strcmp(arg[iarg+1],"last/efirst") == 0) { + FreeEndFinal = false; + FinalAndInterWithRespToEIni = false; + FreeEndFinalWithRespToEIni = true; + kspringFinal = force->numeric(FLERR,arg[iarg+2]); + } else if (strcmp(arg[iarg+1],"last/efirst/middle") == 0) { + FreeEndFinal = false; + FinalAndInterWithRespToEIni = true; + FreeEndFinalWithRespToEIni = true; + kspringFinal = force->numeric(FLERR,arg[iarg+2]); + } else error->all(FLERR,"Illegal fix neb command"); + + iarg += 3; + + } else error->all(FLERR,"Illegal fix neb command"); + } + // nreplica = number of partitions // ireplica = which world I am in universe // nprocs_universe = # of procs in all replicase @@ -62,7 +128,18 @@ FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) : else procprev = -1; if (ireplica < nreplica-1) procnext = universe->root_proc[ireplica+1]; else procnext = -1; + uworld = universe->uworld; + int *iroots = new int[nreplica]; + MPI_Group uworldgroup,rootgroup; + if (NEBLongRange) { + for (int i=0; i<nreplica; i++) + iroots[i] = universe->root_proc[i]; + MPI_Comm_group(uworld, &uworldgroup); + MPI_Group_incl(uworldgroup, nreplica, iroots, &rootgroup); + MPI_Comm_create(uworld, rootgroup, &rootworld); + } + delete [] iroots; // create a new compute pe style // id = fix-ID + pe, compute group = all @@ -81,15 +158,8 @@ FixNEB::FixNEB(LAMMPS *lmp, int narg, char **arg) : // initialize local storage - maxlocal = 0; - ntotal = 0; - - xprev = xnext = tangent = NULL; - xsend = xrecv = NULL; - tagsend = tagrecv = NULL; - xsendall = xrecvall = NULL; - tagsendall = tagrecvall = NULL; - counts = displacements = NULL; + maxlocal = -1; + ntotal = -1; } /* ---------------------------------------------------------------------- */ @@ -102,19 +172,29 @@ FixNEB::~FixNEB() memory->destroy(xprev); memory->destroy(xnext); memory->destroy(tangent); - + memory->destroy(fnext); + memory->destroy(springF); memory->destroy(xsend); memory->destroy(xrecv); + memory->destroy(fsend); + memory->destroy(frecv); memory->destroy(tagsend); memory->destroy(tagrecv); memory->destroy(xsendall); memory->destroy(xrecvall); + memory->destroy(fsendall); + memory->destroy(frecvall); memory->destroy(tagsendall); memory->destroy(tagrecvall); memory->destroy(counts); memory->destroy(displacements); + + if (NEBLongRange) { + if (rootworld != MPI_COMM_NULL) MPI_Comm_free(&rootworld); + memory->destroy(nlenall); + } } /* ---------------------------------------------------------------------- */ @@ -148,7 +228,7 @@ void FixNEB::init() // comm mode for inter-replica exchange of coords if (nreplica == nprocs_universe && - nebatoms == atom->natoms && atom->sortfreq == 0) + nebatoms == atom->natoms && atom->sortfreq == 0) cmode = SINGLE_PROC_DIRECT; else if (nreplica == nprocs_universe) cmode = SINGLE_PROC_MAP; else cmode = MULTI_PROC; @@ -158,11 +238,13 @@ void FixNEB::init() if (atom->natoms > MAXSMALLINT) error->all(FLERR,"Too many atoms for NEB"); ntotal = atom->natoms; - if (atom->nlocal > maxlocal) reallocate(); + if (atom->nmax > maxlocal) reallocate(); if (MULTI_PROC && counts == NULL) { memory->create(xsendall,ntotal,3,"neb:xsendall"); memory->create(xrecvall,ntotal,3,"neb:xrecvall"); + memory->create(fsendall,ntotal,3,"neb:fsendall"); + memory->create(frecvall,ntotal,3,"neb:frecvall"); memory->create(tagsendall,ntotal,"neb:tagsendall"); memory->create(tagrecvall,ntotal,"neb:tagrecvall"); memory->create(counts,nprocs,"neb:counts"); @@ -185,19 +267,15 @@ void FixNEB::min_setup(int vflag) void FixNEB::min_post_force(int vflag) { - double vprev,vnext,vmax,vmin; - double delx,dely,delz; - double delta1[3],delta2[3]; - - // veng = PE of this replica - // vprev,vnext = PEs of adjacent replicas - // only proc 0 in each replica communicates + double vprev,vnext; + double delxp,delyp,delzp,delxn,delyn,delzn; + double vIni=0.0; vprev = vnext = veng = pe->compute_scalar(); - if (ireplica < nreplica-1 && me == 0) + if (ireplica < nreplica-1 && me == 0) MPI_Send(&veng,1,MPI_DOUBLE,procnext,0,uworld); - if (ireplica > 0 && me == 0) + if (ireplica > 0 && me == 0) MPI_Recv(&vprev,1,MPI_DOUBLE,procprev,0,uworld,MPI_STATUS_IGNORE); if (ireplica > 0 && me == 0) @@ -210,6 +288,31 @@ void FixNEB::min_post_force(int vflag) MPI_Bcast(&vnext,1,MPI_DOUBLE,0,world); } + if (FreeEndFinal && ireplica == nreplica-1 && (update->ntimestep == 0)) EFinalIni = veng; + + if (ireplica == 0) vIni=veng; + + if (FreeEndFinalWithRespToEIni) { + if (me == 0) { + int procFirst; + procFirst=universe->root_proc[0]; + MPI_Bcast(&vIni,1,MPI_DOUBLE,procFirst,uworld); + } + if (cmode == MULTI_PROC) { + MPI_Bcast(&vIni,1,MPI_DOUBLE,0,world); + } + } + + if (FreeEndIni && ireplica == 0 && (update->ntimestep == 0)) EIniIni = veng; + /* if (FreeEndIni && ireplica == 0) { + // if (me == 0 ) + if (update->ntimestep == 0) { + EIniIni = veng; + // if (cmode == MULTI_PROC) + // MPI_Bcast(&EIniIni,1,MPI_DOUBLE,0,world); + } + }*/ + // communicate atoms to/from adjacent replicas to fill xprev,xnext inter_replica_comm(); @@ -218,108 +321,146 @@ void FixNEB::min_post_force(int vflag) pe->addstep(update->ntimestep+1); - // compute norm of GradV for log output + double **x = atom->x; + int *mask = atom->mask; + double dot = 0.0; + double prefactor = 0.0; double **f = atom->f; int nlocal = atom->nlocal; - double fsq = 0.0; - for (int i = 0; i < nlocal; i++) - fsq += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2]; - - MPI_Allreduce(&fsq,&gradvnorm,1,MPI_DOUBLE,MPI_SUM,world); - gradvnorm = sqrt(gradvnorm); - - // first or last replica has no change to forces, just return + //calculating separation between images - if (ireplica == 0 || ireplica == nreplica-1) { - plen = nlen = 0.0; - return; - } + plen = 0.0; + nlen = 0.0; + double tlen = 0.0; + double gradnextlen = 0.0; - // tangent = unit tangent vector in 3N space - // based on delta vectors between atoms and their images in adjacent replicas - // use one or two delta vecs to compute tangent, - // depending on relative PEs of 3 replicas - // see Henkelman & Jonsson 2000 paper, eqs 8-11 + dotgrad = gradlen = dotpath = dottangrad = 0.0; - double **x = atom->x; - int *mask = atom->mask; + if (ireplica == nreplica-1) { - if (vnext > veng && veng > vprev) { for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { - tangent[i][0] = xnext[i][0] - x[i][0]; - tangent[i][1] = xnext[i][1] - x[i][1]; - tangent[i][2] = xnext[i][2] - x[i][2]; - domain->minimum_image(tangent[i]); + delxp = x[i][0] - xprev[i][0]; + delyp = x[i][1] - xprev[i][1]; + delzp = x[i][2] - xprev[i][2]; + domain->minimum_image(delxp,delyp,delzp); + plen += delxp*delxp + delyp*delyp + delzp*delzp; + dottangrad += delxp* f[i][0]+ delyp*f[i][1]+delzp*f[i][2]; + gradlen += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2]; + if (FreeEndFinal||FreeEndFinalWithRespToEIni) { + tangent[i][0]=delxp; + tangent[i][1]=delyp; + tangent[i][2]=delzp; + tlen += tangent[i][0]*tangent[i][0] + + tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2]; + dot += f[i][0]*tangent[i][0] + f[i][1]*tangent[i][1] + + f[i][2]*tangent[i][2]; + } } - } else if (vnext < veng && veng < vprev) { + + } else if (ireplica == 0) { for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { - tangent[i][0] = x[i][0] - xprev[i][0]; - tangent[i][1] = x[i][1] - xprev[i][1]; - tangent[i][2] = x[i][2] - xprev[i][2]; - domain->minimum_image(tangent[i]); + delxn = xnext[i][0] - x[i][0]; + delyn = xnext[i][1] - x[i][1]; + delzn = xnext[i][2] - x[i][2]; + domain->minimum_image(delxn,delyn,delzn); + nlen += delxn*delxn + delyn*delyn + delzn*delzn; + gradnextlen += fnext[i][0]*fnext[i][0] + + fnext[i][1]*fnext[i][1] +fnext[i][2] * fnext[i][2]; + dotgrad += f[i][0]*fnext[i][0] + + f[i][1]*fnext[i][1] + f[i][2]*fnext[i][2]; + dottangrad += delxn*f[i][0]+ delyn*f[i][1] + delzn*f[i][2]; + gradlen += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2]; + if (FreeEndIni) { + tangent[i][0]=delxn; + tangent[i][1]=delyn; + tangent[i][2]=delzn; + tlen += tangent[i][0]*tangent[i][0] + + tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2]; + dot += f[i][0]*tangent[i][0] + f[i][1]*tangent[i][1] + + f[i][2]*tangent[i][2]; + } } } else { - vmax = MAX(fabs(vnext-veng),fabs(vprev-veng)); - vmin = MIN(fabs(vnext-veng),fabs(vprev-veng)); + + // not the first or last replica + + double vmax = MAX(fabs(vnext-veng),fabs(vprev-veng)); + double vmin = MIN(fabs(vnext-veng),fabs(vprev-veng)); + + for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { - delta1[0] = xnext[i][0] - x[i][0]; - delta1[1] = xnext[i][1] - x[i][1]; - delta1[2] = xnext[i][2] - x[i][2]; - domain->minimum_image(delta1); - delta2[0] = x[i][0] - xprev[i][0]; - delta2[1] = x[i][1] - xprev[i][1]; - delta2[2] = x[i][2] - xprev[i][2]; - domain->minimum_image(delta2); - if (vnext > vprev) { - tangent[i][0] = vmax*delta1[0] + vmin*delta2[0]; - tangent[i][1] = vmax*delta1[1] + vmin*delta2[1]; - tangent[i][2] = vmax*delta1[2] + vmin*delta2[2]; + delxp = x[i][0] - xprev[i][0]; + delyp = x[i][1] - xprev[i][1]; + delzp = x[i][2] - xprev[i][2]; + domain->minimum_image(delxp,delyp,delzp); + plen += delxp*delxp + delyp*delyp + delzp*delzp; + + delxn = xnext[i][0] - x[i][0]; + delyn = xnext[i][1] - x[i][1]; + delzn = xnext[i][2] - x[i][2]; + domain->minimum_image(delxn,delyn,delzn); + + if (vnext > veng && veng > vprev) { + tangent[i][0] = delxn; + tangent[i][1] = delyn; + tangent[i][2] = delzn; + } else if (vnext < veng && veng < vprev) { + tangent[i][0] = delxp; + tangent[i][1] = delyp; + tangent[i][2] = delzp; } else { - tangent[i][0] = vmin*delta1[0] + vmax*delta2[0]; - tangent[i][1] = vmin*delta1[1] + vmax*delta2[1]; - tangent[i][2] = vmin*delta1[2] + vmax*delta2[2]; + if (vnext > vprev) { + tangent[i][0] = vmax*delxn + vmin*delxp; + tangent[i][1] = vmax*delyn + vmin*delyp; + tangent[i][2] = vmax*delzn + vmin*delzp; + } else { + tangent[i][0] = vmin*delxn + vmax*delxp; + tangent[i][1] = vmin*delyn + vmax*delyp; + tangent[i][2] = vmin*delzn + vmax*delzp; + } } + + nlen += delxn*delxn + delyn*delyn + delzn*delzn; + tlen += tangent[i][0]*tangent[i][0] + + tangent[i][1]*tangent[i][1] + tangent[i][2]*tangent[i][2]; + gradlen += f[i][0]*f[i][0] + f[i][1]*f[i][1] + f[i][2]*f[i][2]; + dotpath += delxp*delxn + delyp*delyn + delzp*delzn; + dottangrad += tangent[i][0]*f[i][0] + + tangent[i][1]*f[i][1] + tangent[i][2]*f[i][2]; + gradnextlen += fnext[i][0]*fnext[i][0] + + fnext[i][1]*fnext[i][1] +fnext[i][2] * fnext[i][2]; + dotgrad += f[i][0]*fnext[i][0] + f[i][1]*fnext[i][1] + + f[i][2]*fnext[i][2]; + + springF[i][0] = kspringPerp*(delxn-delxp); + springF[i][1] = kspringPerp*(delyn-delyp); + springF[i][2] = kspringPerp*(delzn-delzp); } } - // tlen,plen,nlen = lengths of tangent, prev, next vectors - - double tlen = 0.0; - plen = 0.0; - nlen = 0.0; - - for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - tlen += tangent[i][0]*tangent[i][0] + tangent[i][1]*tangent[i][1] + - tangent[i][2]*tangent[i][2]; - - delx = x[i][0] - xprev[i][0]; - dely = x[i][1] - xprev[i][1]; - delz = x[i][2] - xprev[i][2]; - domain->minimum_image(delx,dely,delz); - plen += delx*delx + dely*dely + delz*delz; - - delx = xnext[i][0] - x[i][0]; - dely = xnext[i][1] - x[i][1]; - delz = xnext[i][2] - x[i][2]; - domain->minimum_image(delx,dely,delz); - nlen += delx*delx + dely*dely + delz*delz; - } - - double lenall; - MPI_Allreduce(&tlen,&lenall,1,MPI_DOUBLE,MPI_SUM,world); - tlen = sqrt(lenall); - - MPI_Allreduce(&plen,&lenall,1,MPI_DOUBLE,MPI_SUM,world); - plen = sqrt(lenall); - - MPI_Allreduce(&nlen,&lenall,1,MPI_DOUBLE,MPI_SUM,world); - nlen = sqrt(lenall); + double bufin[BUFSIZE], bufout[BUFSIZE]; + bufin[0] = nlen; + bufin[1] = plen; + bufin[2] = tlen; + bufin[3] = gradlen; + bufin[4] = gradnextlen; + bufin[5] = dotpath; + bufin[6] = dottangrad; + bufin[7] = dotgrad; + MPI_Allreduce(bufin,bufout,BUFSIZE,MPI_DOUBLE,MPI_SUM,world); + nlen = sqrt(bufout[0]); + plen = sqrt(bufout[1]); + tlen = sqrt(bufout[2]); + gradlen = sqrt(bufout[3]); + gradnextlen = sqrt(bufout[4]); + dotpath = bufout[5]; + dottangrad = bufout[6]; + dotgrad = bufout[7]; // normalize tangent vector @@ -333,37 +474,158 @@ void FixNEB::min_post_force(int vflag) } } - // reset force on each atom in this replica - // regular NEB for all replicas except rclimber does hill-climbing NEB - // currently have F = -Grad(V) = -Grad(V)_perp - Grad(V)_parallel - // want F = -Grad(V)_perp + Fspring for regular NEB - // thus Fdelta = Grad(V)_parallel + Fspring for regular NEB - // want F = -Grad(V) + 2 Grad(V)_parallel for hill-climbing NEB - // thus Fdelta = 2 Grad(V)_parallel for hill-climbing NEB - // Grad(V)_parallel = (Grad(V) . utan) * utangent = -(F . utan) * utangent - // Fspring = k (nlen - plen) * utangent - // see Henkelman & Jonsson 2000 paper, eqs 3,4,12 - // see Henkelman & Jonsson 2000a paper, eq 5 + // first or last replica has no change to forces, just return + + if (ireplica > 0 && ireplica < nreplica-1) + dottangrad = dottangrad/(tlen*gradlen); + if (ireplica == 0) + dottangrad = dottangrad/(nlen*gradlen); + if (ireplica == nreplica-1) + dottangrad = dottangrad/(plen*gradlen); + if (ireplica < nreplica-1) + dotgrad = dotgrad /(gradlen*gradnextlen); + + if (FreeEndIni && ireplica == 0) { + if (tlen > 0.0) { + double dotall; + MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world); + dot=dotall/tlen; + + if (dot<0) prefactor = -dot - kspringIni*(veng-EIniIni); + else prefactor = -dot + kspringIni*(veng-EIniIni); + + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + f[i][0] += prefactor *tangent[i][0]; + f[i][1] += prefactor *tangent[i][1]; + f[i][2] += prefactor *tangent[i][2]; + } + } + } + + if (FreeEndFinal && ireplica == nreplica -1) { + if (tlen > 0.0) { + double dotall; + MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world); + dot=dotall/tlen; + + if (dot<0) prefactor = -dot - kspringFinal*(veng-EFinalIni); + else prefactor = -dot + kspringFinal*(veng-EFinalIni); + + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + f[i][0] += prefactor *tangent[i][0]; + f[i][1] += prefactor *tangent[i][1]; + f[i][2] += prefactor *tangent[i][2]; + } + } + } + + if (FreeEndFinalWithRespToEIni&&ireplica == nreplica -1) { + if (tlen > 0.0) { + double dotall; + MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world); + dot=dotall/tlen; + + if (dot<0) prefactor = -dot - kspringFinal*(veng-vIni); + else prefactor = -dot + kspringFinal*(veng-vIni); + + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + f[i][0] += prefactor *tangent[i][0]; + f[i][1] += prefactor *tangent[i][1]; + f[i][2] += prefactor *tangent[i][2]; + } + } + } + + double lentot = 0; + double meanDist,idealPos,lenuntilIm,lenuntilClimber; + lenuntilClimber=0; + if (NEBLongRange) { + if (cmode == SINGLE_PROC_DIRECT || cmode == SINGLE_PROC_MAP) { + MPI_Allgather(&nlen,1,MPI_DOUBLE,&nlenall[0],1,MPI_DOUBLE,uworld); + } else { + if (me == 0) + MPI_Allgather(&nlen,1,MPI_DOUBLE,&nlenall[0],1,MPI_DOUBLE,rootworld); + MPI_Bcast(nlenall,nreplica,MPI_DOUBLE,0,world); + } + + lenuntilIm = 0; + for (int i = 0; i < ireplica; i++) + lenuntilIm += nlenall[i]; + + for (int i = 0; i < nreplica; i++) + lentot += nlenall[i]; + + meanDist = lentot/(nreplica -1); + + if (rclimber>0) { + for (int i = 0; i < rclimber; i++) + lenuntilClimber += nlenall[i]; + double meanDistBeforeClimber = lenuntilClimber/rclimber; + double meanDistAfterClimber = + (lentot-lenuntilClimber)/(nreplica-rclimber-1); + if (ireplica<rclimber) + idealPos = ireplica * meanDistBeforeClimber; + else + idealPos = lenuntilClimber+ (ireplica-rclimber)*meanDistAfterClimber; + } else idealPos = ireplica * meanDist; + } + + if (ireplica == 0 || ireplica == nreplica-1) return ; + + double AngularContr; + dotpath = dotpath/(plen*nlen); + AngularContr = 0.5 *(1+cos(MY_PI * dotpath)); + + double dotSpringTangent; + dotSpringTangent=0; - double dot = 0.0; for (int i = 0; i < nlocal; i++) { - if (mask[i] & groupbit) - dot += f[i][0]*tangent[i][0] + f[i][1]*tangent[i][1] + + if (mask[i] & groupbit) { + dot += f[i][0]*tangent[i][0] + f[i][1]*tangent[i][1] + f[i][2]*tangent[i][2]; + dotSpringTangent += springF[i][0]*tangent[i][0] + + springF[i][1]*tangent[i][1] + springF[i][2]*tangent[i][2];} } + double dotSpringTangentall; + MPI_Allreduce(&dotSpringTangent,&dotSpringTangentall,1, + MPI_DOUBLE,MPI_SUM,world); + dotSpringTangent=dotSpringTangentall; double dotall; MPI_Allreduce(&dot,&dotall,1,MPI_DOUBLE,MPI_SUM,world); + dot=dotall; + + if (ireplica == rclimber) prefactor = -2.0*dot; + else { + if (NEBLongRange) { + prefactor = -dot - kspring*(lenuntilIm-idealPos)/(2*meanDist); + } else if (StandardNEB) { + prefactor = -dot + kspring*(nlen-plen); + } - double prefactor; - if (ireplica == rclimber) prefactor = -2.0*dotall; - else prefactor = -dotall + kspring*(nlen-plen); + if (FinalAndInterWithRespToEIni&& veng<vIni) { + for (int i = 0; i < nlocal; i++) + if (mask[i] & groupbit) { + f[i][0] = 0; + f[i][1] = 0; + f[i][2] = 0; + } + prefactor = kspring*(nlen-plen); + AngularContr=0; + } + } for (int i = 0; i < nlocal; i++) if (mask[i] & groupbit) { - f[i][0] += prefactor*tangent[i][0]; - f[i][1] += prefactor*tangent[i][1]; - f[i][2] += prefactor*tangent[i][2]; + f[i][0] += prefactor*tangent[i][0] + + AngularContr*(springF[i][0] - dotSpringTangent*tangent[i][0]); + f[i][1] += prefactor*tangent[i][1] + + AngularContr*(springF[i][1] - dotSpringTangent*tangent[i][1]); + f[i][2] += prefactor*tangent[i][2] + + AngularContr*(springF[i][2] - dotSpringTangent*tangent[i][2]); } } @@ -373,6 +635,7 @@ void FixNEB::min_post_force(int vflag) replicas 0 and N-1 send but do not receive any atoms ------------------------------------------------------------------------- */ + void FixNEB::inter_replica_comm() { int i,m; @@ -382,9 +645,10 @@ void FixNEB::inter_replica_comm() // reallocate memory if necessary - if (atom->nlocal > maxlocal) reallocate(); + if (atom->nmax > maxlocal) reallocate(); double **x = atom->x; + double **f = atom->f; tagint *tag = atom->tag; int *mask = atom->mask; int nlocal = atom->nlocal; @@ -395,7 +659,7 @@ void FixNEB::inter_replica_comm() // ----------------------------------------------------- // single proc per replica - // all atoms are NEB atoms and no atom sorting is enabled + // all atoms are NEB atoms and no atom sorting // direct comm of x -> xprev and x -> xnext if (cmode == SINGLE_PROC_DIRECT) { @@ -404,13 +668,18 @@ void FixNEB::inter_replica_comm() if (ireplica < nreplica-1) MPI_Send(x[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld); if (ireplica > 0) MPI_Wait(&request,MPI_STATUS_IGNORE); - if (ireplica < nreplica-1) MPI_Irecv(xnext[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld,&request); if (ireplica > 0) MPI_Send(x[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld); if (ireplica < nreplica-1) MPI_Wait(&request,MPI_STATUS_IGNORE); + if (ireplica < nreplica-1) + MPI_Irecv(fnext[0],3*nlocal,MPI_DOUBLE,procnext,0,uworld,&request); + if (ireplica > 0) + MPI_Send(f[0],3*nlocal,MPI_DOUBLE,procprev,0,uworld); + if (ireplica < nreplica-1) MPI_Wait(&request,MPI_STATUS_IGNORE); + return; } @@ -427,6 +696,9 @@ void FixNEB::inter_replica_comm() xsend[m][0] = x[i][0]; xsend[m][1] = x[i][1]; xsend[m][2] = x[i][2]; + fsend[m][0] = f[i][0]; + fsend[m][1] = f[i][1]; + fsend[m][2] = f[i][2]; m++; } @@ -448,13 +720,14 @@ void FixNEB::inter_replica_comm() xprev[m][2] = xrecv[i][2]; } } - if (ireplica < nreplica-1) { MPI_Irecv(xrecv[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]); + MPI_Irecv(frecv[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]); MPI_Irecv(tagrecv,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld,&requests[1]); } if (ireplica > 0) { MPI_Send(xsend[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld); + MPI_Send(fsend[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld); MPI_Send(tagsend,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld); } @@ -465,6 +738,9 @@ void FixNEB::inter_replica_comm() xnext[m][0] = xrecv[i][0]; xnext[m][1] = xrecv[i][1]; xnext[m][2] = xrecv[i][2]; + fnext[m][0] = frecv[i][0]; + fnext[m][1] = frecv[i][1]; + fnext[m][2] = frecv[i][2]; } } @@ -484,6 +760,9 @@ void FixNEB::inter_replica_comm() xsend[m][0] = x[i][0]; xsend[m][1] = x[i][1]; xsend[m][2] = x[i][2]; + fsend[m][0] = f[i][0]; + fsend[m][1] = f[i][1]; + fsend[m][2] = f[i][2]; m++; } @@ -496,12 +775,17 @@ void FixNEB::inter_replica_comm() for (i = 0; i < nprocs; i++) counts[i] *= 3; for (i = 0; i < nprocs-1; i++) displacements[i+1] = displacements[i] + counts[i]; - if (xsend) + if (xsend) { MPI_Gatherv(xsend[0],3*m,MPI_DOUBLE, xsendall[0],counts,displacements,MPI_DOUBLE,0,world); - else + MPI_Gatherv(fsend[0],3*m,MPI_DOUBLE, + fsendall[0],counts,displacements,MPI_DOUBLE,0,world); + } else { MPI_Gatherv(NULL,3*m,MPI_DOUBLE, xsendall[0],counts,displacements,MPI_DOUBLE,0,world); + MPI_Gatherv(NULL,3*m,MPI_DOUBLE, + fsendall[0],counts,displacements,MPI_DOUBLE,0,world); + } if (ireplica > 0 && me == 0) { MPI_Irecv(xrecvall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld,&requests[0]); @@ -530,11 +814,13 @@ void FixNEB::inter_replica_comm() if (ireplica < nreplica-1 && me == 0) { MPI_Irecv(xrecvall[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]); + MPI_Irecv(frecvall[0],3*nebatoms,MPI_DOUBLE,procnext,0,uworld,&requests[0]); MPI_Irecv(tagrecvall,nebatoms,MPI_LMP_TAGINT,procnext,0,uworld, &requests[1]); } if (ireplica > 0 && me == 0) { MPI_Send(xsendall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld); + MPI_Send(fsendall[0],3*nebatoms,MPI_DOUBLE,procprev,0,uworld); MPI_Send(tagsendall,nebatoms,MPI_LMP_TAGINT,procprev,0,uworld); } @@ -543,6 +829,7 @@ void FixNEB::inter_replica_comm() MPI_Bcast(tagrecvall,nebatoms,MPI_INT,0,world); MPI_Bcast(xrecvall[0],3*nebatoms,MPI_DOUBLE,0,world); + MPI_Bcast(frecvall[0],3*nebatoms,MPI_DOUBLE,0,world); for (i = 0; i < nebatoms; i++) { m = atom->map(tagrecvall[i]); @@ -550,6 +837,9 @@ void FixNEB::inter_replica_comm() xnext[m][0] = xrecvall[i][0]; xnext[m][1] = xrecvall[i][1]; xnext[m][2] = xrecvall[i][2]; + fnext[m][0] = frecvall[i][0]; + fnext[m][1] = frecvall[i][1]; + fnext[m][2] = frecvall[i][2]; } } } @@ -561,27 +851,37 @@ void FixNEB::inter_replica_comm() void FixNEB::reallocate() { + maxlocal = atom->nmax; + memory->destroy(xprev); memory->destroy(xnext); memory->destroy(tangent); - - if (cmode != SINGLE_PROC_DIRECT) { - memory->destroy(xsend); - memory->destroy(xrecv); - memory->destroy(tagsend); - memory->destroy(tagrecv); - } - - maxlocal = atom->nmax; + memory->destroy(fnext); + memory->destroy(springF); memory->create(xprev,maxlocal,3,"neb:xprev"); memory->create(xnext,maxlocal,3,"neb:xnext"); memory->create(tangent,maxlocal,3,"neb:tangent"); + memory->create(fnext,maxlocal,3,"neb:fnext"); + memory->create(springF,maxlocal,3,"neb:springF"); if (cmode != SINGLE_PROC_DIRECT) { + memory->destroy(xsend); + memory->destroy(fsend); + memory->destroy(xrecv); + memory->destroy(frecv); + memory->destroy(tagsend); + memory->destroy(tagrecv); memory->create(xsend,maxlocal,3,"neb:xsend"); + memory->create(fsend,maxlocal,3,"neb:fsend"); memory->create(xrecv,maxlocal,3,"neb:xrecv"); + memory->create(frecv,maxlocal,3,"neb:frecv"); memory->create(tagsend,maxlocal,"neb:tagsend"); memory->create(tagrecv,maxlocal,"neb:tagrecv"); } + + if (NEBLongRange) { + memory->destroy(nlenall); + memory->create(nlenall,nreplica,"neb:nlenall"); + } } diff --git a/src/REPLICA/fix_neb.h b/src/REPLICA/fix_neb.h index 4026f84f1541aab1255c77f8178b23411ab88b5b..232790a1f00323ddec943962bef4bf1b9589b9d6 100644 --- a/src/REPLICA/fix_neb.h +++ b/src/REPLICA/fix_neb.h @@ -26,9 +26,8 @@ namespace LAMMPS_NS { class FixNEB : public Fix { public: - double veng,plen,nlen; + double veng,plen,nlen,dotpath,dottangrad,gradlen,dotgrad; int rclimber; - double gradvnorm; FixNEB(class LAMMPS *, int, char **); ~FixNEB(); @@ -39,27 +38,32 @@ class FixNEB : public Fix { private: int me,nprocs,nprocs_universe; - double kspring; + double kspring,kspringIni,kspringFinal,kspringPerp,EIniIni,EFinalIni; + bool StandardNEB,NEBLongRange,PerpSpring,FreeEndIni,FreeEndFinal; + bool FreeEndFinalWithRespToEIni,FinalAndInterWithRespToEIni; int ireplica,nreplica; int procnext,procprev; int cmode; MPI_Comm uworld; + MPI_Comm rootworld; + char *id_pe; class Compute *pe; - int nebatoms; // # of active NEB atoms + int nebatoms; int ntotal; // total # of atoms, NEB or not int maxlocal; // size of xprev,xnext,tangent arrays - - double **xprev,**xnext; // coords of my owned atoms in neighbor replicas - double **tangent; // work vector for inter-replica forces - + double *nlenall; + double **xprev,**xnext,**fnext,**springF; + double **tangent; double **xsend,**xrecv; // coords to send/recv to/from other replica + double **fsend,**frecv; // coords to send/recv to/from other replica tagint *tagsend,*tagrecv; // ditto for atom IDs // info gathered from all procs in my replica double **xsendall,**xrecvall; // coords to send/recv to/from other replica + double **fsendall,**frecvall; // force to send/recv to/from other replica tagint *tagsendall,*tagrecvall; // ditto for atom IDs int *counts,*displacements; // used for MPI_Gather diff --git a/src/REPLICA/neb.cpp b/src/REPLICA/neb.cpp index 03bd0b61e18b4ec2f15f7bff8c6d11c23710579b..1388a260ea71d4af72b43c0196e7e044e47cf92d 100644 --- a/src/REPLICA/neb.cpp +++ b/src/REPLICA/neb.cpp @@ -37,8 +37,10 @@ #include "memory.h" #include "error.h" #include "force.h" +#include "math_const.h" using namespace LAMMPS_NS; +using namespace MathConst; #define MAXLINE 256 #define CHUNK 1024 @@ -143,17 +145,19 @@ void NEB::command(int narg, char **arg) // process file-style setting to setup initial configs for all replicas if (strcmp(arg[5],"final") == 0) { - if (narg != 7) error->universe_all(FLERR,"Illegal NEB command"); + if (narg != 7 && narg !=8) error->universe_all(FLERR,"Illegal NEB command"); infile = arg[6]; readfile(infile,0); } else if (strcmp(arg[5],"each") == 0) { - if (narg != 7) error->universe_all(FLERR,"Illegal NEB command"); + if (narg != 7 && narg !=8) error->universe_all(FLERR,"Illegal NEB command"); infile = arg[6]; readfile(infile,1); } else if (strcmp(arg[5],"none") == 0) { - if (narg != 6) error->universe_all(FLERR,"Illegal NEB command"); + if (narg != 6 && narg !=7) error->universe_all(FLERR,"Illegal NEB command"); } else error->universe_all(FLERR,"Illegal NEB command"); + verbose=false; + if (strcmp(arg[narg-1],"verbose") == 0) verbose=true; // run the NEB calculation run(); @@ -178,8 +182,9 @@ void NEB::run() if (ineb == modify->nfix) error->all(FLERR,"NEB requires use of fix neb"); fneb = (FixNEB *) modify->fix[ineb]; - nall = 4; - memory->create(all,nreplica,nall,"neb:all"); + if (verbose) numall =7; + else numall = 4; + memory->create(all,nreplica,numall,"neb:all"); rdist = new double[nreplica]; // initialize LAMMPS @@ -195,9 +200,11 @@ void NEB::run() error->all(FLERR,"NEB requires damped dynamics minimizer"); // setup regular NEB minimization + FILE *uscreen = universe->uscreen; + FILE *ulogfile = universe->ulogfile; - if (me_universe == 0 && universe->uscreen) - fprintf(universe->uscreen,"Setting up regular NEB ...\n"); + if (me_universe == 0 && uscreen) + fprintf(uscreen,"Setting up regular NEB ...\n"); update->beginstep = update->firststep = update->ntimestep; update->endstep = update->laststep = update->firststep + n1steps; @@ -209,16 +216,33 @@ void NEB::run() update->minimize->setup(); if (me_universe == 0) { - if (universe->uscreen) - fprintf(universe->uscreen,"Step MaxReplicaForce MaxAtomForce " - "GradV0 GradV1 GradVc " - "EBF EBR RDT " - "RD1 PE1 RD2 PE2 ... RDN PEN\n"); - if (universe->ulogfile) - fprintf(universe->ulogfile,"Step MaxReplicaForce MaxAtomForce " - "GradV0 GradV1 GradVc " - "EBF EBR RDT " - "RD1 PE1 RD2 PE2 ... RDN PEN\n"); + if (uscreen) { + if (verbose) { + fprintf(uscreen,"Step MaxReplicaForce MaxAtomForce " + "GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... " + "RDN PEN pathangle1 angletangrad1 anglegrad1 gradV1 " + "ReplicaForce1 MaxAtomForce1 pathangle2 angletangrad2 " + "... ReplicaForceN MaxAtomForceN\n"); + } else { + fprintf(uscreen,"Step MaxReplicaForce MaxAtomForce " + "GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... " + "RDN PEN\n"); + } + } + + if (ulogfile) { + if (verbose) { + fprintf(ulogfile,"Step MaxReplicaForce MaxAtomForce " + "GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... " + "RDN PEN pathangle1 angletangrad1 anglegrad1 gradV1 " + "ReplicaForce1 MaxAtomForce1 pathangle2 angletangrad2 " + "... ReplicaForceN MaxAtomForceN\n"); + } else { + fprintf(ulogfile,"Step MaxReplicaForce MaxAtomForce " + "GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... " + "RDN PEN\n"); + } + } } print_status(); @@ -257,14 +281,14 @@ void NEB::run() // setup climbing NEB minimization // must reinitialize minimizer so it re-creates its fix MINIMIZE - if (me_universe == 0 && universe->uscreen) - fprintf(universe->uscreen,"Setting up climbing ...\n"); + if (me_universe == 0 && uscreen) + fprintf(uscreen,"Setting up climbing ...\n"); if (me_universe == 0) { - if (universe->uscreen) - fprintf(universe->uscreen,"Climbing replica = %d\n",top+1); - if (universe->ulogfile) - fprintf(universe->ulogfile,"Climbing replica = %d\n",top+1); + if (uscreen) + fprintf(uscreen,"Climbing replica = %d\n",top+1); + if (ulogfile) + fprintf(ulogfile,"Climbing replica = %d\n",top+1); } update->beginstep = update->firststep = update->ntimestep; @@ -279,16 +303,34 @@ void NEB::run() update->minimize->setup(); if (me_universe == 0) { - if (universe->uscreen) - fprintf(universe->uscreen,"Step MaxReplicaForce MaxAtomForce " - "GradV0 GradV1 GradVc " - "EBF EBR RDT " - "RD1 PE1 RD2 PE2 ... RDN PEN\n"); - if (universe->ulogfile) - fprintf(universe->ulogfile,"Step MaxReplicaForce MaxAtomForce " - "GradV0 GradV1 GradVc " - "EBF EBR RDT " - "RD1 PE1 RD2 PE2 ... RDN PEN\n"); + if (uscreen) + if (verbose) { + fprintf(uscreen,"Step MaxReplicaForce MaxAtomForce " + "GradV0 GradV1 GradVc EBF EBR RDT " + "RD1 PE1 RD2 PE2 ... RDN PEN " + "pathangle1 angletangrad1 anglegrad1 gradV1 " + "ReplicaForce1 MaxAtomForce1 pathangle2 angletangrad2 " + "... ReplicaForceN MaxAtomForceN\n"); + } else { + fprintf(uscreen,"Step MaxReplicaForce MaxAtomForce " + "GradV0 GradV1 GradVc " + "EBF EBR RDT " + "RD1 PE1 RD2 PE2 ... RDN PEN\n"); + } + if (ulogfile) + if (verbose) { + fprintf(ulogfile,"Step MaxReplicaForce MaxAtomForce " + "GradV0 GradV1 GradVc EBF EBR RDT " + "RD1 PE1 RD2 PE2 ... RDN PEN " + "pathangle1 angletangrad1 anglegrad1 gradV1 " + "ReplicaForce1 MaxAtomForce1 pathangle2 angletangrad2 " + "... ReplicaForceN MaxAtomForceN\n"); + } else { + fprintf(ulogfile,"Step MaxReplicaForce MaxAtomForce " + "GradV0 GradV1 GradVc " + "EBF EBR RDT " + "RD1 PE1 RD2 PE2 ... RDN PEN\n"); + } } print_status(); @@ -321,16 +363,16 @@ void NEB::run() /* ---------------------------------------------------------------------- read initial config atom coords from file flag = 0 - only first replica opens file and reads it - first replica bcasts lines to all replicas - final replica stores coords - intermediate replicas interpolate from coords - new coord = replica fraction between current and final state - initial replica does nothing + only first replica opens file and reads it + first replica bcasts lines to all replicas + final replica stores coords + intermediate replicas interpolate from coords + new coord = replica fraction between current and final state + initial replica does nothing flag = 1 - each replica (except first) opens file and reads it - each replica stores coords - initial replica does nothing + each replica (except first) opens file and reads it + each replica stores coords + initial replica does nothing ------------------------------------------------------------------------- */ void NEB::readfile(char *file, int flag) @@ -539,16 +581,29 @@ void NEB::print_status() double fmaxatom; MPI_Allreduce(&fnorminf,&fmaxatom,1,MPI_DOUBLE,MPI_MAX,roots); - double one[4]; + if (verbose) { + freplica = new double[nreplica]; + MPI_Allgather(&fnorm2,1,MPI_DOUBLE,&freplica[0],1,MPI_DOUBLE,roots); + fmaxatomInRepl = new double[nreplica]; + MPI_Allgather(&fnorminf,1,MPI_DOUBLE,&fmaxatomInRepl[0],1,MPI_DOUBLE,roots); + } + + double one[numall]; one[0] = fneb->veng; one[1] = fneb->plen; one[2] = fneb->nlen; - one[nall-1] = fneb->gradvnorm; + one[3] = fneb->gradlen; + + if (verbose) { + one[4] = fneb->dotpath; + one[5] = fneb->dottangrad; + one[6] = fneb->dotgrad; + } if (output->thermo->normflag) one[0] /= atom->natoms; if (me == 0) - MPI_Allgather(one,nall,MPI_DOUBLE,&all[0][0],nall,MPI_DOUBLE,roots); - MPI_Bcast(&all[0][0],nall*nreplica,MPI_DOUBLE,0,world); + MPI_Allgather(one,numall,MPI_DOUBLE,&all[0][0],numall,MPI_DOUBLE,roots); + MPI_Bcast(&all[0][0],numall*nreplica,MPI_DOUBLE,0,world); rdist[0] = 0.0; for (int i = 1; i < nreplica; i++) @@ -588,26 +643,56 @@ void NEB::print_status() } if (me_universe == 0) { - if (universe->uscreen) { - fprintf(universe->uscreen,BIGINT_FORMAT " %12.8g %12.8g ", + const double todeg=180.0/MY_PI; + FILE *uscreen = universe->uscreen; + FILE *ulogfile = universe->ulogfile; + if (uscreen) { + fprintf(uscreen,BIGINT_FORMAT " %12.8g %12.8g ", update->ntimestep,fmaxreplica,fmaxatom); - fprintf(universe->uscreen,"%12.8g %12.8g %12.8g ", + fprintf(uscreen,"%12.8g %12.8g %12.8g ", gradvnorm0,gradvnorm1,gradvnormc); - fprintf(universe->uscreen,"%12.8g %12.8g %12.8g ",ebf,ebr,endpt); + fprintf(uscreen,"%12.8g %12.8g %12.8g ",ebf,ebr,endpt); for (int i = 0; i < nreplica; i++) - fprintf(universe->uscreen,"%12.8g %12.8g ",rdist[i],all[i][0]); - fprintf(universe->uscreen,"\n"); + fprintf(uscreen,"%12.8g %12.8g ",rdist[i],all[i][0]); + if (verbose) { + fprintf(uscreen,"%12.5g %12.5g %12.5g %12.5g %12.5g %12.5g", + NAN,180-acos(all[0][5])*todeg,180-acos(all[0][6])*todeg, + all[0][3],freplica[0],fmaxatomInRepl[0]); + for (int i = 1; i < nreplica-1; i++) + fprintf(uscreen,"%12.5g %12.5g %12.5g %12.5g %12.5g %12.5g", + 180-acos(all[i][4])*todeg,180-acos(all[i][5])*todeg, + 180-acos(all[i][6])*todeg,all[i][3],freplica[i], + fmaxatomInRepl[i]); + fprintf(uscreen,"%12.5g %12.5g %12.5g %12.5g %12.5g %12.5g", + NAN,180-acos(all[nreplica-1][5])*todeg,NAN,all[nreplica-1][3], + freplica[nreplica-1],fmaxatomInRepl[nreplica-1]); + } + fprintf(uscreen,"\n"); } - if (universe->ulogfile) { - fprintf(universe->ulogfile,BIGINT_FORMAT " %12.8g %12.8g ", + + if (ulogfile) { + fprintf(ulogfile,BIGINT_FORMAT " %12.8g %12.8g ", update->ntimestep,fmaxreplica,fmaxatom); - fprintf(universe->ulogfile,"%12.8g %12.8g %12.8g ", + fprintf(ulogfile,"%12.8g %12.8g %12.8g ", gradvnorm0,gradvnorm1,gradvnormc); - fprintf(universe->ulogfile,"%12.8g %12.8g %12.8g ",ebf,ebr,endpt); + fprintf(ulogfile,"%12.8g %12.8g %12.8g ",ebf,ebr,endpt); for (int i = 0; i < nreplica; i++) - fprintf(universe->ulogfile,"%12.8g %12.8g ",rdist[i],all[i][0]); - fprintf(universe->ulogfile,"\n"); - fflush(universe->ulogfile); + fprintf(ulogfile,"%12.8g %12.8g ",rdist[i],all[i][0]); + if (verbose) { + fprintf(ulogfile,"%12.5g %12.5g %12.5g %12.5g %12.5g %12.5g", + NAN,180-acos(all[0][5])*todeg,180-acos(all[0][6])*todeg, + all[0][3],freplica[0],fmaxatomInRepl[0]); + for (int i = 1; i < nreplica-1; i++) + fprintf(ulogfile,"%12.5g %12.5g %12.5g %12.5g %12.5g %12.5g", + 180-acos(all[i][4])*todeg,180-acos(all[i][5])*todeg, + 180-acos(all[i][6])*todeg,all[i][3],freplica[i], + fmaxatomInRepl[i]); + fprintf(ulogfile,"%12.5g %12.5g %12.5g %12.5g %12.5g %12.5g", + NAN,180-acos(all[nreplica-1][5])*todeg,NAN,all[nreplica-1][3], + freplica[nreplica-1],fmaxatomInRepl[nreplica-1]); + } + fprintf(ulogfile,"\n"); + fflush(ulogfile); } } } diff --git a/src/REPLICA/neb.h b/src/REPLICA/neb.h index afedf0cdc5cb79ff891978fc4e20a0caa30cdc4c..8c2bcf9b165d459d8220c4ca5a448e9d2f771de5 100644 --- a/src/REPLICA/neb.h +++ b/src/REPLICA/neb.h @@ -38,6 +38,7 @@ class NEB : protected Pointers { private: int me,me_universe; // my proc ID in world and universe int ireplica,nreplica; + bool verbose; MPI_Comm uworld; MPI_Comm roots; // MPI comm with 1 root proc from each world FILE *fp; @@ -49,9 +50,11 @@ class NEB : protected Pointers { char *infile; // name of file containing final state class FixNEB *fneb; - int nall; // per-replica dimension of array all + int numall; // per-replica dimension of array all double **all; // PE,plen,nlen,gradvnorm from each replica double *rdist; // normalize reaction distance, 0 to 1 + double *freplica; // force on an image + double *fmaxatomInRepl; // force on an image void readfile(char *, int); void open(char *); diff --git a/src/RIGID/fix_rigid.cpp b/src/RIGID/fix_rigid.cpp index e9c06dee5c42b27806fb9133bfc38abf5bcc6fca..d0c931a46609e7ec103ea351bff5e8f79b0bde45 100644 --- a/src/RIGID/fix_rigid.cpp +++ b/src/RIGID/fix_rigid.cpp @@ -50,7 +50,7 @@ enum{ISO,ANISO,TRICLINIC}; #define EPSILON 1.0e-7 #define SINERTIA 0.4 // moment of inertia prefactor for sphere -#define EINERTIA 0.4 // moment of inertia prefactor for ellipsoid +#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid #define LINERTIA (1.0/12.0) // moment of inertia prefactor for line segment /* ---------------------------------------------------------------------- */ @@ -267,6 +267,8 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) : int seed; langflag = 0; + reinitflag = 1; + tstat_flag = 0; pstat_flag = 0; allremap = 1; @@ -501,6 +503,14 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) : infile = new char[n]; strcpy(infile,arg[iarg+1]); restart_file = 1; + reinitflag = 0; + iarg += 2; + + } else if (strcmp(arg[iarg],"reinit") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix rigid/small command"); + if (strcmp("yes",arg[iarg+1]) == 0) reinitflag = 1; + else if (strcmp("no",arg[iarg+1]) == 0) reinitflag = 0; + else error->all(FLERR,"Illegal fix rigid command"); iarg += 2; } else error->all(FLERR,"Illegal fix rigid command"); @@ -679,15 +689,15 @@ void FixRigid::init() if (strstr(update->integrate_style,"respa")) step_respa = ((Respa *) update->integrate)->step; - // setup rigid bodies, using current atom info - // only do initialization once, b/c properties may not be re-computable - // especially if overlapping particles - // do not do dynamic init if read body properties from infile - // this is b/c the infile defines the static and dynamic properties - // and may not be computable if contain overlapping particles + // setup rigid bodies, using current atom info. if reinitflag is not set, + // do the initialization only once, b/c properties may not be re-computable + // especially if overlapping particles. + // do not do dynamic init if read body properties from infile. + // this is b/c the infile defines the static and dynamic properties and may + // not be computable if contain overlapping particles. // setup_bodies_static() reads infile itself - if (!setupflag) { + if (reinitflag || !setupflag) { setup_bodies_static(); if (!infile) setup_bodies_dynamic(); setupflag = 1; diff --git a/src/RIGID/fix_rigid.h b/src/RIGID/fix_rigid.h index a6d1f65e1c089aa9c76125bc46679259105c64c1..12439d42cfbc452022595dd9f6dfd27285096ea0 100644 --- a/src/RIGID/fix_rigid.h +++ b/src/RIGID/fix_rigid.h @@ -104,6 +104,7 @@ class FixRigid : public Fix { int extended; // 1 if any particles have extended attributes int orientflag; // 1 if particles store spatial orientation int dorientflag; // 1 if particles store dipole orientation + int reinitflag; // 1 if re-initialize rigid bodies between runs imageint *xcmimage; // internal image flags for atoms in rigid bodies // set relative to in-box xcm of each body diff --git a/src/RIGID/fix_rigid_small.cpp b/src/RIGID/fix_rigid_small.cpp index 2d8e736a1eef1ebf811c555259695ace712b8038..b4de4f53bb8d27e65341d03ee955285ff2e33630 100644 --- a/src/RIGID/fix_rigid_small.cpp +++ b/src/RIGID/fix_rigid_small.cpp @@ -41,10 +41,6 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -// allocate space for static class variable - -FixRigidSmall *FixRigidSmall::frsptr; - #define MAXLINE 1024 #define CHUNK 1024 #define ATTRIBUTE_PERBODY 20 @@ -54,7 +50,7 @@ FixRigidSmall *FixRigidSmall::frsptr; #define BIG 1.0e20 #define SINERTIA 0.4 // moment of inertia prefactor for sphere -#define EINERTIA 0.4 // moment of inertia prefactor for ellipsoid +#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid #define LINERTIA (1.0/12.0) // moment of inertia prefactor for line segment #define DELTA_BODY 10000 @@ -138,6 +134,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : langflag = 0; infile = NULL; onemols = NULL; + reinitflag = 1; tstat_flag = 0; pstat_flag = 0; @@ -173,6 +170,7 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : error->all(FLERR,"Fix rigid/small langevin period must be > 0.0"); if (seed <= 0) error->all(FLERR,"Illegal fix rigid/small command"); iarg += 5; + } else if (strcmp(arg[iarg],"infile") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix rigid/small command"); delete [] infile; @@ -180,7 +178,16 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) : infile = new char[n]; strcpy(infile,arg[iarg+1]); restart_file = 1; + reinitflag = 0; + iarg += 2; + + } else if (strcmp(arg[iarg],"reinit") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal fix rigid/small command"); + if (strcmp("yes",arg[iarg+1]) == 0) reinitflag = 1; + else if (strcmp("no",arg[iarg+1]) == 0) reinitflag = 0; + else error->all(FLERR,"Illegal fix rigid/small command"); iarg += 2; + } else if (strcmp(arg[iarg],"mol") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal fix rigid/small command"); int imol = atom->find_molecule(arg[iarg+1]); @@ -520,14 +527,15 @@ void FixRigidSmall::init() } /* ---------------------------------------------------------------------- - setup static/dynamic properties of rigid bodies, using current atom info - only do initialization once, b/c properties may not be re-computable - especially if overlapping particles or bodies inserted from mol template - do not do dynamic init if read body properties from infile - this is b/c the infile defines the static and dynamic properties - and may not be computable if contain overlapping particles - setup_bodies_static() reads infile itself - cannot do this until now, b/c requires comm->setup() to have setup stencil + setup static/dynamic properties of rigid bodies, using current atom info. + if reinitflag is not set, do the initialization only once, b/c properties + may not be re-computable especially if overlapping particles or bodies + are inserted from mol template. + do not do dynamic init if read body properties from infile. this + is b/c the infile defines the static and dynamic properties and may not + be computable if contain overlapping particles setup_bodies_static() + reads infile itself. + cannot do this until now, b/c requires comm->setup() to have setup stencil invoke pre_neighbor() to insure body xcmimage flags are reset needed if Verlet::setup::pbc() has remapped/migrated atoms for 2nd run setup_bodies_static() invokes pre_neighbor itself @@ -535,9 +543,13 @@ void FixRigidSmall::init() void FixRigidSmall::setup_pre_neighbor() { - if (!setupflag) setup_bodies_static(); + if (reinitflag || !setupflag) + setup_bodies_static(); else pre_neighbor(); - if (!setupflag && !infile) setup_bodies_dynamic(); + + if ((reinitflag || !setupflag) && !infile) + setup_bodies_dynamic(); + setupflag = 1; } @@ -1031,7 +1043,7 @@ int FixRigidSmall::dof(int tgroup) j = atom2body[i]; counts[j][2]++; if (mask[i] & tgroupbit) { - if (extended && eflags[i]) counts[j][1]++; + if (extended && (eflags[i] & ~(POINT | DIPOLE))) counts[j][1]++; else counts[j][0]++; } } @@ -1499,8 +1511,7 @@ void FixRigidSmall::create_bodies() // func = update bbox with atom coords from every proc // when done, have full bbox for every rigid body my atoms are part of - frsptr = this; - comm->ring(m,sizeof(double),buf,1,ring_bbox,NULL); + comm->ring(m,sizeof(double),buf,1,ring_bbox,NULL,(void *)this); // check if any bbox is size 0.0, meaning rigid body is a single particle @@ -1549,8 +1560,7 @@ void FixRigidSmall::create_bodies() // func = update idclose,rsqclose with atom IDs from every proc // when done, have idclose for every rigid body my atoms are part of - frsptr = this; - comm->ring(m,sizeof(double),buf,2,ring_nearest,NULL); + comm->ring(m,sizeof(double),buf,2,ring_nearest,NULL,(void *)this); // set bodytag of all owned atoms, based on idclose // find max value of rsqclose across all procs @@ -1581,8 +1591,7 @@ void FixRigidSmall::create_bodies() // when done, have rsqfar for all atoms in bodies I own rsqfar = 0.0; - frsptr = this; - comm->ring(m,sizeof(double),buf,3,ring_farthest,NULL); + comm->ring(m,sizeof(double),buf,3,ring_farthest,NULL,(void *)this); // find maxextent of rsqfar across all procs // if defined, include molecule->maxextent @@ -1609,8 +1618,9 @@ void FixRigidSmall::create_bodies() update bounding box for rigid bodies my atoms are part of ------------------------------------------------------------------------- */ -void FixRigidSmall::ring_bbox(int n, char *cbuf) +void FixRigidSmall::ring_bbox(int n, char *cbuf, void *ptr) { + FixRigidSmall *frsptr = (FixRigidSmall *) ptr; std::map<tagint,int> *hash = frsptr->hash; double **bbox = frsptr->bbox; @@ -1641,8 +1651,9 @@ void FixRigidSmall::ring_bbox(int n, char *cbuf) update nearest atom to body center for rigid bodies my atoms are part of ------------------------------------------------------------------------- */ -void FixRigidSmall::ring_nearest(int n, char *cbuf) +void FixRigidSmall::ring_nearest(int n, char *cbuf, void *ptr) { + FixRigidSmall *frsptr = (FixRigidSmall *) ptr; std::map<tagint,int> *hash = frsptr->hash; double **ctr = frsptr->ctr; tagint *idclose = frsptr->idclose; @@ -1681,8 +1692,9 @@ void FixRigidSmall::ring_nearest(int n, char *cbuf) update rsqfar = distance from owning atom to other atom ------------------------------------------------------------------------- */ -void FixRigidSmall::ring_farthest(int n, char *cbuf) +void FixRigidSmall::ring_farthest(int n, char *cbuf, void *ptr) { + FixRigidSmall *frsptr = (FixRigidSmall *) ptr; double **x = frsptr->atom->x; imageint *image = frsptr->atom->image; int nlocal = frsptr->atom->nlocal; diff --git a/src/RIGID/fix_rigid_small.h b/src/RIGID/fix_rigid_small.h index 9c89bab885cf2365a24351b56e3eafc41510d853..b07dea4f333ea3f27b7919fb74a4b2c52fc9585e 100644 --- a/src/RIGID/fix_rigid_small.h +++ b/src/RIGID/fix_rigid_small.h @@ -31,10 +31,6 @@ class FixRigidSmall : public Fix { friend class ComputeRigidLocal; public: - // static variable for ring communication callback to access class data - - static FixRigidSmall *frsptr; - FixRigidSmall(class LAMMPS *, int, char **); virtual ~FixRigidSmall(); virtual int setmask(); @@ -131,6 +127,7 @@ class FixRigidSmall : public Fix { int extended; // 1 if any particles have extended attributes int orientflag; // 1 if particles store spatial orientation int dorientflag; // 1 if particles store dipole orientation + int reinitflag; // 1 if re-initialize rigid bodies between runs int POINT,SPHERE,ELLIPSOID,LINE,TRIANGLE,DIPOLE; // bitmasks for eflags int OMEGA,ANGMOM,TORQUE; @@ -199,9 +196,9 @@ class FixRigidSmall : public Fix { // callback functions for ring communication - static void ring_bbox(int, char *); - static void ring_nearest(int, char *); - static void ring_farthest(int, char *); + static void ring_bbox(int, char *, void *); + static void ring_nearest(int, char *, void *); + static void ring_farthest(int, char *, void *); // debug diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp index 5c993ee85933ff85b47710252be968e6e7905792..36f56aa2957cb7c8dc0de6b12a40ca9378f4505a 100644 --- a/src/RIGID/fix_shake.cpp +++ b/src/RIGID/fix_shake.cpp @@ -39,10 +39,6 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -// allocate space for static class variable - -FixShake *FixShake::fsptr; - #define BIG 1.0e20 #define MASSDELTA 0.1 @@ -844,8 +840,7 @@ void FixShake::find_clusters() // cycle buffer around ring of procs back to self - fsptr = this; - comm->ring(size,sizeof(tagint),buf,1,ring_bonds,buf); + comm->ring(size,sizeof(tagint),buf,1,ring_bonds,buf,(void *)this); // store partner info returned to me @@ -970,8 +965,7 @@ void FixShake::find_clusters() // cycle buffer around ring of procs back to self - fsptr = this; - comm->ring(size,sizeof(tagint),buf,2,ring_nshake,buf); + comm->ring(size,sizeof(tagint),buf,2,ring_nshake,buf,(void *)this); // store partner info returned to me @@ -1123,8 +1117,7 @@ void FixShake::find_clusters() // cycle buffer around ring of procs back to self - fsptr = this; - comm->ring(size,sizeof(tagint),buf,3,ring_shake,NULL); + comm->ring(size,sizeof(tagint),buf,3,ring_shake,NULL,(void *)this); memory->destroy(buf); @@ -1211,8 +1204,9 @@ void FixShake::find_clusters() search for bond with 1st atom and fill in bondtype ------------------------------------------------------------------------- */ -void FixShake::ring_bonds(int ndatum, char *cbuf) +void FixShake::ring_bonds(int ndatum, char *cbuf, void *ptr) { + FixShake *fsptr = (FixShake *)ptr; Atom *atom = fsptr->atom; double *rmass = atom->rmass; double *mass = atom->mass; @@ -1248,8 +1242,9 @@ void FixShake::ring_bonds(int ndatum, char *cbuf) if I own partner, fill in nshake value ------------------------------------------------------------------------- */ -void FixShake::ring_nshake(int ndatum, char *cbuf) +void FixShake::ring_nshake(int ndatum, char *cbuf, void *ptr) { + FixShake *fsptr = (FixShake *)ptr; Atom *atom = fsptr->atom; int nlocal = atom->nlocal; @@ -1269,8 +1264,9 @@ void FixShake::ring_nshake(int ndatum, char *cbuf) if I own partner, fill in nshake value ------------------------------------------------------------------------- */ -void FixShake::ring_shake(int ndatum, char *cbuf) +void FixShake::ring_shake(int ndatum, char *cbuf, void *ptr) { + FixShake *fsptr = (FixShake *)ptr; Atom *atom = fsptr->atom; int nlocal = atom->nlocal; diff --git a/src/RIGID/fix_shake.h b/src/RIGID/fix_shake.h index ff12b9e120cb54f17111e7df618fc971dba39507..12809a254a064524de4043af5d3fdccaa3646ff1 100644 --- a/src/RIGID/fix_shake.h +++ b/src/RIGID/fix_shake.h @@ -134,10 +134,9 @@ class FixShake : public Fix { // static variable for ring communication callback to access class data // callback functions for ring communication - static FixShake *fsptr; - static void ring_bonds(int, char *); - static void ring_nshake(int, char *); - static void ring_shake(int, char *); + static void ring_bonds(int, char *, void *); + static void ring_nshake(int, char *, void *); + static void ring_shake(int, char *, void *); }; } diff --git a/src/USER-CGDNA/README b/src/USER-CGDNA/README index 516f18c36cc67c1dec308bf8cd5114a0a1e7e3d9..735f005f54e03e5024bdc51172453d986876f7c0 100644 --- a/src/USER-CGDNA/README +++ b/src/USER-CGDNA/README @@ -39,10 +39,10 @@ of the LAMMPS manual). The creator of this package is: Dr Oliver Henrich -University of Strathclyde, UK +University of Strathclyde, Glasgow, UK +oliver.henrich at strath.ac.uk University of Edinburgh, UK -ohenrich@ph.ed.ac.uk -o.henrich@epcc.ed.ac.uk +ohenrich at staffmail.ed.ac.uk -------------------------------------------------------------------------- diff --git a/src/USER-DPD/fix_shardlow.cpp b/src/USER-DPD/fix_shardlow.cpp index e85d1e9c8e9ab139ad776e5abe399d9c6f3a9a76..d14c1e0ddbdfe7eaf25548229e467550cc455f88 100644 --- a/src/USER-DPD/fix_shardlow.cpp +++ b/src/USER-DPD/fix_shardlow.cpp @@ -184,12 +184,12 @@ void FixShardlow::setup(int vflag) bool fixShardlow = false; for (int i = 0; i < modify->nfix; i++) - if (strcmp(modify->fix[i]->style,"nvt") == 0 || strcmp(modify->fix[i]->style,"npt") == 0) + if (strncmp(modify->fix[i]->style,"nvt",3) == 0 || strncmp(modify->fix[i]->style,"npt",3) == 0) error->all(FLERR,"Cannot use constant temperature integration routines with DPD."); for (int i = 0; i < modify->nfix; i++){ - if (strcmp(modify->fix[i]->style,"shardlow") == 0) fixShardlow = true; - if (strcmp(modify->fix[i]->style,"nve") == 0 || (strcmp(modify->fix[i]->style,"nph") == 0)){ + if (strncmp(modify->fix[i]->style,"shardlow",3) == 0) fixShardlow = true; + if (strncmp(modify->fix[i]->style,"nve",3) == 0 || (strncmp(modify->fix[i]->style,"nph",3) == 0)){ if(fixShardlow) break; else error->all(FLERR,"The deterministic integrator must follow fix shardlow in the input file."); } diff --git a/src/USER-DRUDE/fix_drude.cpp b/src/USER-DRUDE/fix_drude.cpp index 77253d1bb19e634132d358ef20844451dbc5426c..894757baa087e55c8eca84444ac17ea61478ae3b 100644 --- a/src/USER-DRUDE/fix_drude.cpp +++ b/src/USER-DRUDE/fix_drude.cpp @@ -28,8 +28,6 @@ using namespace LAMMPS_NS; using namespace FixConst; -FixDrude *FixDrude::sptr = NULL; - /* ---------------------------------------------------------------------- */ FixDrude::FixDrude(LAMMPS *lmp, int narg, char **arg) : @@ -111,7 +109,6 @@ void FixDrude::build_drudeid(){ std::vector<tagint> core_drude_vec; partner_set = new std::set<tagint>[nlocal]; // Temporary sets of bond partner tags - sptr = this; if (atom->molecular == 1) { // Build list of my atoms' bond partners @@ -149,7 +146,7 @@ void FixDrude::build_drudeid(){ // Loop on procs to fill my atoms' sets of bond partners comm->ring(core_drude_vec.size(), sizeof(tagint), (char *) core_drude_vec.data(), - 4, ring_build_partner, NULL, 1); + 4, ring_build_partner, NULL, (void *)this, 1); // Build the list of my Drudes' tags // The only bond partners of a Drude particle is its core, @@ -165,7 +162,7 @@ void FixDrude::build_drudeid(){ // so that each core finds its Drude. comm->ring(drude_vec.size(), sizeof(tagint), (char *) drude_vec.data(), - 3, ring_search_drudeid, NULL, 1); + 3, ring_search_drudeid, NULL, (void *)this, 1); delete [] partner_set; } @@ -174,14 +171,15 @@ void FixDrude::build_drudeid(){ * Look in my cores' bond partner tags if there is a Drude tag. * If so fill this core's dureid. ------------------------------------------------------------------------- */ -void FixDrude::ring_search_drudeid(int size, char *cbuf){ +void FixDrude::ring_search_drudeid(int size, char *cbuf, void *ptr){ // Search for the drude partner of my cores - Atom *atom = sptr->atom; + FixDrude *fdptr = (FixDrude *) ptr; + Atom *atom = fdptr->atom; int nlocal = atom->nlocal; int *type = atom->type; - std::set<tagint> *partner_set = sptr->partner_set; - tagint *drudeid = sptr->drudeid; - int *drudetype = sptr->drudetype; + std::set<tagint> *partner_set = fdptr->partner_set; + tagint *drudeid = fdptr->drudeid; + int *drudetype = fdptr->drudetype; tagint *first = (tagint *) cbuf; tagint *last = first + size; @@ -203,11 +201,12 @@ void FixDrude::ring_search_drudeid(int size, char *cbuf){ * buffer contains bond partners. Look for my atoms and add their partner's * tag in its set of bond partners. ------------------------------------------------------------------------- */ -void FixDrude::ring_build_partner(int size, char *cbuf){ +void FixDrude::ring_build_partner(int size, char *cbuf, void *ptr){ // Add partners from incoming list - Atom *atom = sptr->atom; + FixDrude *fdptr = (FixDrude *) ptr; + Atom *atom = fdptr->atom; int nlocal = atom->nlocal; - std::set<tagint> *partner_set = sptr->partner_set; + std::set<tagint> *partner_set = fdptr->partner_set; tagint *it = (tagint *) cbuf; tagint *last = it + size; @@ -338,11 +337,11 @@ void FixDrude::rebuild_special(){ // Remove Drude particles from the special lists of each proc comm->ring(drude_vec.size(), sizeof(tagint), (char *) drude_vec.data(), - 9, ring_remove_drude, NULL, 1); + 9, ring_remove_drude, NULL, (void *)this, 1); // Add back Drude particles in the lists just after their core comm->ring(core_drude_vec.size(), sizeof(tagint), (char *) core_drude_vec.data(), - 10, ring_add_drude, NULL, 1); + 10, ring_add_drude, NULL, (void *)this, 1); // Check size of special list nspecmax_loc = 0; @@ -373,16 +372,17 @@ void FixDrude::rebuild_special(){ // Copy core's list into their drude list comm->ring(core_special_vec.size(), sizeof(tagint), (char *) core_special_vec.data(), - 11, ring_copy_drude, NULL, 1); + 11, ring_copy_drude, NULL, (void *)this, 1); } /* ---------------------------------------------------------------------- * When receive buffer, build a set of drude tags, look into my atoms' * special list if some tags are drude particles. If so, remove it. ------------------------------------------------------------------------- */ -void FixDrude::ring_remove_drude(int size, char *cbuf){ +void FixDrude::ring_remove_drude(int size, char *cbuf, void *ptr){ // Remove all drude particles from special list - Atom *atom = sptr->atom; + FixDrude *fdptr = (FixDrude *) ptr; + Atom *atom = fdptr->atom; int nlocal = atom->nlocal; int **nspecial = atom->nspecial; tagint **special = atom->special; @@ -390,7 +390,7 @@ void FixDrude::ring_remove_drude(int size, char *cbuf){ tagint *first = (tagint *) cbuf; tagint *last = first + size; std::set<tagint> drude_set(first, last); - int *drudetype = sptr->drudetype; + int *drudetype = fdptr->drudetype; for (int i=0; i<nlocal; i++) { if (drudetype[type[i]] == DRUDE_TYPE) continue; @@ -415,16 +415,17 @@ void FixDrude::ring_remove_drude(int size, char *cbuf){ * Loop on my atoms' special list to find core tags. Insert their Drude * particle if they have one. ------------------------------------------------------------------------- */ -void FixDrude::ring_add_drude(int size, char *cbuf){ +void FixDrude::ring_add_drude(int size, char *cbuf, void *ptr){ // Assume special array size is big enough // Add all particle just after their core in the special list - Atom *atom = sptr->atom; + FixDrude *fdptr = (FixDrude *) ptr; + Atom *atom = fdptr->atom; int nlocal = atom->nlocal; int **nspecial = atom->nspecial; tagint **special = atom->special; int *type = atom->type; - tagint *drudeid = sptr->drudeid; - int *drudetype = sptr->drudetype; + tagint *drudeid = fdptr->drudeid; + int *drudetype = fdptr->drudetype; tagint *first = (tagint *) cbuf; tagint *last = first + size; @@ -471,15 +472,16 @@ void FixDrude::ring_add_drude(int size, char *cbuf){ * in the buffer. Loop on my Drude particles and copy their special * info from that of their core if the latter is found in the map. ------------------------------------------------------------------------- */ -void FixDrude::ring_copy_drude(int size, char *cbuf){ +void FixDrude::ring_copy_drude(int size, char *cbuf, void *ptr){ // Copy special list of drude from its core (except itself) - Atom *atom = sptr->atom; + FixDrude *fdptr = (FixDrude *) ptr; + Atom *atom = fdptr->atom; int nlocal = atom->nlocal; int **nspecial = atom->nspecial; tagint **special = atom->special; int *type = atom->type; - tagint *drudeid = sptr->drudeid; - int *drudetype = sptr->drudetype; + tagint *drudeid = fdptr->drudeid; + int *drudetype = fdptr->drudetype; tagint *first = (tagint *) cbuf; tagint *last = first + size; diff --git a/src/USER-DRUDE/fix_drude.h b/src/USER-DRUDE/fix_drude.h index ca2fc9fdb9a25f0c6e344f62282371e2622bb180..6775dcee6f5e25d150fa976fa8e8e2280ad8299f 100644 --- a/src/USER-DRUDE/fix_drude.h +++ b/src/USER-DRUDE/fix_drude.h @@ -50,16 +50,15 @@ class FixDrude : public Fix { private: int rebuildflag; - static FixDrude *sptr; std::set<tagint> * partner_set; void build_drudeid(); - static void ring_search_drudeid(int size, char *cbuf); - static void ring_build_partner(int size, char *cbuf); + static void ring_search_drudeid(int size, char *cbuf, void *ptr); + static void ring_build_partner(int size, char *cbuf, void *ptr); void rebuild_special(); - static void ring_remove_drude(int size, char *cbuf); - static void ring_add_drude(int size, char *cbuf); - static void ring_copy_drude(int size, char *cbuf); + static void ring_remove_drude(int size, char *cbuf, void *ptr); + static void ring_add_drude(int size, char *cbuf, void *ptr); + static void ring_copy_drude(int size, char *cbuf, void *ptr); }; } diff --git a/src/USER-INTEL/README b/src/USER-INTEL/README index e32a09c45c76251f74ab5e0437a8fc84463e7a93..c02014d0ce9b5e9ea16948b97a78b081dd84fefb 100644 --- a/src/USER-INTEL/README +++ b/src/USER-INTEL/README @@ -4,6 +4,7 @@ -------------------------------- W. Michael Brown (Intel) michael.w.brown at intel.com + William McDoniel (RWTH Aachen University) Rodrigo Canales (RWTH Aachen University) Markus H�hnerbach (RWTH Aachen University) Stan Moore (Sandia) @@ -14,15 +15,25 @@ ----------------------------------------------------------------------------- -This package is based on the USER-OMP package and provides LAMMPS styles that: +This package provides LAMMPS styles that: 1. include support for single and mixed precision in addition to double. 2. include modifications to support vectorization for key routines + 3. include modifications for data layouts to improve cache efficiency 3. include modifications to support offload to Intel(R) Xeon Phi(TM) coprocessors ----------------------------------------------------------------------------- +For Intel server processors codenamed "Skylake", the following flags should +be added or changed in the Makefile depending on the version: + +2017 update 2 - No changes needed +2017 updates 3 or 4 - Use -xCOMMON-AVX512 and not -xHost or -xCORE-AVX512 +2018 or newer - Use -xHost or -xCORE-AVX512 and -qopt-zmm-usage=high + +----------------------------------------------------------------------------- + When using the suffix command with "intel", intel styles will be used if they exist. If the suffix command is used with "hybrid intel omp" and the USER-OMP USER-OMP styles will be used whenever USER-INTEL styles are not available. This diff --git a/src/USER-INTEL/TEST/README b/src/USER-INTEL/TEST/README index cf14fb323787d065a4c2a9a75ccbf330ff603279..758c37bf56edd69e7d618ef9d68345a3c9057dfa 100644 --- a/src/USER-INTEL/TEST/README +++ b/src/USER-INTEL/TEST/README @@ -4,6 +4,7 @@ # in.intel.lj - Atomic fluid (LJ Benchmark) # in.intel.rhodo - Protein (Rhodopsin Benchmark) # in.intel.lc - Liquid Crystal w/ Gay-Berne potential +# in.intel.eam - Copper benchmark with Embedded Atom Method # in.intel.sw - Silicon benchmark with Stillinger-Weber # in.intel.tersoff - Silicon benchmark with Tersoff # in.intel.water - Coarse-grain water benchmark using Stillinger-Weber @@ -11,19 +12,26 @@ ############################################################################# ############################################################################# -# Expected Timesteps/second with turbo on and HT enabled, LAMMPS 18-Jun-2016 +# Expected Timesteps/second with turbo on and HT enabled, LAMMPS June-2017 +# - Compiled w/ Intel Parallel Studio 2017u2 and Makefile.intel_cpu_intelmpi # # Xeon E5-2697v4 Xeon Phi 7250 # -# in.intel.lj - 162.764 179.148 -# in.intel.rhodo - 11.633 13.668 -# in.intel.lc - 19.136 24.863 -# in.intel.sw - 139.048 152.026 -# in.intel.tersoff - 82.663 92.985 -# in.intel.water - 59.838 85.704 +# in.intel.lj - 199.5 282.3 +# in.intel.rhodo - 12.4 17.5 +# in.intel.lc - 19.0 25.7 +# in.intel.eam - 59.4 92.8 +# in.intel.sw - 132.4 161.9 +# in.intel.tersoff - 83.3 101.1 +# in.intel.water - 53.4 90.3 # ############################################################################# +############################################################################# +# For Skylake server (Xeon) architectures, see notes in the USER-INTEL/README +# for build flags that should be used. +############################################################################# + ############################################################################# # For Haswell (Xeon v3) architectures, depending on the compiler version, # it may give better performance to compile for an AVX target (with -xAVX @@ -42,7 +50,18 @@ # -v m 0.5 # Run for half as long ############################################################################# -# Example for running benchmarks: +############################################################################# +# The LAMMPS newton setting can be controlled from the commandline for the +# benchmarks with the N variable: +# +# -v N on # newton on +# -v N off # newton off +# +# The default is on for all of the benchmarks except for LJ where the off +# setting performs best with the USER-INTEL package +############################################################################# + +# Example for running benchmarks (see run_benchmarks.sh for script): # Number of physical cores per node not including hyperthreads export LMP_CORES=28 @@ -57,26 +76,35 @@ export LMP_BIN=../../lmp_intel_cpu # LAMMPS root directory export LMP_ROOT=../../../ -source /opt/intel/parallel_studio_xe_2016.2.062/psxevars.sh +source source /opt/intel/parallel_studio_xe_2017.2.050/psxevars.sh +export KMP_BLOCKTIME=0 export I_MPI_PIN_DOMAIN=core export I_MPI_FABRICS=shm # For single node +# ONLY FOR INTEL XEON PHI x200 SERIES PROCESSORS +export I_MPI_SHM_LMT=shm + # Generate the restart file for use with liquid crystal benchmark mpirun -np $LMP_CORES $LMP_BIN -in in.lc_generate_restart -log none # Benchmark to run export bench=in.intel.lj +############################################################################# +# For Intel Xeon Phi x200 series processors best performance is achieved by +# using MCDRAM. In flat mode, this can be achieved with numactl, +# MPI environment variables, or other options provided by batch schedulers +############################################################################# ############################################################################# # To run without a optimization package ############################################################################# -mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none +mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -v N on ############################################################################# # To run with USER-OMP package ############################################################################# -mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk omp 0 -sf omp +mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk omp 0 -sf omp -v N on ############################################################################# # To run with USER-INTEL package and no coprocessor @@ -89,6 +117,9 @@ mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 0 -sf intel mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 1 -sf intel ############################################################################# -# If using PPPM (in.intel.rhodo) on Intel Xeon Phi x200 series processors +# If using PPPM (e.g. in.intel.rhodo) on Intel Xeon Phi x200 series +# or Skylake processors ############################################################################# -mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 0 omp 3 lrt yes -sf intel +export KMP_AFFINITY=none +rthreads=$((OMP_NUM_THREADS-1)) +mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 0 omp $rthreads lrt yes -sf intel diff --git a/src/USER-INTEL/TEST/in.intel.eam b/src/USER-INTEL/TEST/in.intel.eam index e9523a5dd1d062f0ba56c417f3c0cb23c9843541..5a3b3064afae85b831b00b333a726d242b6e105f 100644 --- a/src/USER-INTEL/TEST/in.intel.eam +++ b/src/USER-INTEL/TEST/in.intel.eam @@ -1,4 +1,6 @@ # bulk Cu lattice + +variable N index on # Newton Setting variable w index 10 # Warmup Timesteps variable t index 3100 # Main Run Timesteps variable m index 1 # Main Run Timestep Multiplier @@ -13,6 +15,7 @@ variable z index 2 variable rr equal floor($t*$m) variable root getenv LMP_ROOT +newton $N if "$n > 0" then "processors * * * grid numa" variable xx equal 20*$x diff --git a/src/USER-INTEL/TEST/in.intel.lc b/src/USER-INTEL/TEST/in.intel.lc index 0172ba3b4dbe4f1bf01ac8c1ec3455e3b87689d0..411f5d830df60b966675adc65399984690f347f7 100644 --- a/src/USER-INTEL/TEST/in.intel.lc +++ b/src/USER-INTEL/TEST/in.intel.lc @@ -3,6 +3,7 @@ # shape: 2 1.5 1 # cutoff 4.0 with skin 0.8 +variable N index on # Newton Setting variable w index 10 # Warmup Timesteps variable t index 840 # Main Run Timesteps variable m index 1 # Main Run Timestep Multiplier @@ -15,6 +16,7 @@ variable z index 2 variable rr equal floor($t*$m) +newton $N if "$n > 0" then "processors * * * grid numa" units lj diff --git a/src/USER-INTEL/TEST/in.intel.lj b/src/USER-INTEL/TEST/in.intel.lj index 8931ca24bcf41ffc8868bfa03e6fc7daed6e5340..2b724f6014a6068b63653e5b30da870b03160bc9 100644 --- a/src/USER-INTEL/TEST/in.intel.lj +++ b/src/USER-INTEL/TEST/in.intel.lj @@ -1,5 +1,6 @@ # 3d Lennard-Jones melt +variable N index off # Newton Setting variable w index 10 # Warmup Timesteps variable t index 7900 # Main Run Timesteps variable m index 1 # Main Run Timestep Multiplier @@ -15,6 +16,7 @@ variable yy equal 20*$y variable zz equal 20*$z variable rr equal floor($t*$m) +newton $N if "$n > 0" then "processors * * * grid numa" units lj diff --git a/src/USER-INTEL/TEST/in.intel.rhodo b/src/USER-INTEL/TEST/in.intel.rhodo index 7b3b09260790b662fc6ef47e06d95d32d8a369c8..05145d79c0d507819b2946f74a59d7c645b99459 100644 --- a/src/USER-INTEL/TEST/in.intel.rhodo +++ b/src/USER-INTEL/TEST/in.intel.rhodo @@ -1,5 +1,6 @@ # Rhodopsin model +variable N index on # Newton Setting variable w index 10 # Warmup Timesteps variable t index 520 # Main Run Timesteps variable m index 1 # Main Run Timestep Multiplier @@ -16,10 +17,11 @@ variable z index 2 variable rr equal floor($t*$m) variable root getenv LMP_ROOT +newton $N if "$n > 0" then "processors * * * grid numa" units real -neigh_modify delay 5 every 1 binsize $b +neigh_modify delay 5 every 1 atom_style full bond_style harmonic diff --git a/src/USER-INTEL/TEST/in.intel.sw b/src/USER-INTEL/TEST/in.intel.sw index 077c9bb4fb8831e5e2fdc50bec479267f1135e17..494f58dea39c4d33fa4076c7ac4f241926bdd40e 100644 --- a/src/USER-INTEL/TEST/in.intel.sw +++ b/src/USER-INTEL/TEST/in.intel.sw @@ -1,5 +1,6 @@ # bulk Si via Stillinger-Weber +variable N index on # Newton Setting variable w index 10 # Warmup Timesteps variable t index 6200 # Main Run Timesteps variable m index 1 # Main Run Timestep Multiplier @@ -16,6 +17,7 @@ variable zz equal 10*$z variable rr equal floor($t*$m) variable root getenv LMP_ROOT +newton $N if "$n > 0" then "processors * * * grid numa" units metal diff --git a/src/USER-INTEL/TEST/in.intel.tersoff b/src/USER-INTEL/TEST/in.intel.tersoff index f0c6a88f75c254eac2bb38e44edd976ad1dd7636..574b29f674b27067cc8bffe07af3dd2d755960bd 100644 --- a/src/USER-INTEL/TEST/in.intel.tersoff +++ b/src/USER-INTEL/TEST/in.intel.tersoff @@ -1,5 +1,6 @@ # bulk Si via Tersoff +variable N index on # Newton Setting variable w index 10 # Warmup Timesteps variable t index 2420 # Main Run Timesteps variable m index 1 # Main Run Timestep Multiplier @@ -16,6 +17,7 @@ variable zz equal 10*$z variable rr equal floor($t*$m) variable root getenv LMP_ROOT +newton $N if "$n > 0" then "processors * * * grid numa" units metal diff --git a/src/USER-INTEL/TEST/in.intel.water b/src/USER-INTEL/TEST/in.intel.water index 1c1fca311fa2b75ec090acf5eb1b4afdb100f08c..0643def19ed933119d1040243e4af1d427158e1f 100644 --- a/src/USER-INTEL/TEST/in.intel.water +++ b/src/USER-INTEL/TEST/in.intel.water @@ -1,5 +1,6 @@ # Coarse-grain water simulation using Stillinger-Weber +variable N index on # Newton Setting variable w index 10 # Warmup Timesteps variable t index 2600 # Main Run Timesteps variable m index 1 # Main Run Timestep Multiplier @@ -11,6 +12,7 @@ variable y index 2 variable z index 2 variable rr equal floor($t*$m) +newton $N if "$n > 0" then "processors * * * grid numa" units real diff --git a/src/USER-INTEL/TEST/in.lc_generate_restart b/src/USER-INTEL/TEST/in.lc_generate_restart index 8ae53c5c8e332a30c2b30216f28ed01d0ce532ad..30d593f2cdb8ac039f029b2321e3caf73a2a7216 100644 --- a/src/USER-INTEL/TEST/in.lc_generate_restart +++ b/src/USER-INTEL/TEST/in.lc_generate_restart @@ -4,13 +4,13 @@ # cutoff 4.0 with skin 0.8 # NPT, T=2.4, P=8.0 -variable x index 1 -variable y index 1 -variable z index 1 +variable xt index 1 +variable yt index 1 +variable zt index 1 -variable i equal $x*32 -variable j equal $y*32 -variable k equal $z*32 +variable i equal ${xt}*32 +variable j equal ${yt}*32 +variable k equal ${zt}*32 units lj atom_style ellipsoid diff --git a/src/USER-INTEL/TEST/run_benchmarks.sh b/src/USER-INTEL/TEST/run_benchmarks.sh new file mode 100755 index 0000000000000000000000000000000000000000..10bd79e0d1fd6bd0197c41b555266e7a70fa2815 --- /dev/null +++ b/src/USER-INTEL/TEST/run_benchmarks.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +######################################################################### +# Adjust settings below for your system +######################################################################### + +# --------------------- MPI Launch Command + +export MPI="mpirun" +#export MPI="numactl -p 1 mpirun" # -- Systems w/ MCDRAM in flat mode + +# ------------- Name and location of the LAMMPS binary + +export LMP_BIN=../../lmp_intel_cpu_intelmpi +#export LMP_BIN=../../lmp_knl + +# ------------- Directory containing the LAMMPS installation + +export LMP_ROOT=../../../ + +# ------------- Number of physical cores (not HW threads) + +export LMP_CORES=36 # -- For Intel Xeon E5-2697v4 SKU +#export LMP_CORES=68 # -- For Intel Xeon Phi x200 7250 SKU + +# ------------- Number of HW threads to use in tests + +export LMP_THREAD_LIST="2" # -- For 2 threads per core w/ HT enabled +#export LMP_THREAD_LIST="2 4" # -- For 2 threads per core w/ HT enabled + +# ------------- MPI Tuning Parameters + +#export I_MPI_SHM_LMT=shm # -- Uncomment for Xeon Phi x200 series + +# ------------- Library locations for build + +#source /opt/intel/parallel_studio_xe_2017.2.050/psxevars.sh + +######################################################################### +# End settings for your system +######################################################################### + +export WORKLOADS="lj rhodo rhodo_lrt lc sw water eam" +export LMP_ARGS="-pk intel 0 -sf intel -screen none -v d 1" +export RLMP_ARGS="-pk intel 0 lrt yes -sf intel -screen none -v d 1" + +export LOG_DIR_HEADER=`echo $LMP_BIN | sed 's/\.\.\///g' | sed 's/\.\///g'` +export LOG_DIR_HOST=`hostname` +export DATE_STRING=`date +%s` +export LOG_DIR=$LOG_DIR_HOST"_"$LOG_DIR_HEADER"_"$DATE_STRING +mkdir $LOG_DIR + +export I_MPI_PIN_DOMAIN=core +export I_MPI_FABRICS=shm +export KMP_BLOCKTIME=0 + +echo -n "Creating restart file...." +$MPI -np $LMP_CORES $LMP_BIN -in in.lc_generate_restart -log none $LMP_ARGS +echo "Done." +for threads in $LMP_THREAD_LIST +do + export OMP_NUM_THREADS=$threads + for workload in $WORKLOADS + do + export LOGFILE=$LOG_DIR/$workload.$LMP_CORES"c"$threads"t".log + echo "Running $LOGFILE" + cmd="$MPI -np $LMP_CORES $LMP_BIN -in in.intel.$workload -log $LOGFILE $LMP_ARGS"; + rthreads=$threads + unset KMP_AFFINITY + $cmd + + # - For benchmarks with PPPM, also try LRT mode + if [ $workload = "rhodo" ]; then + export LOGFILE=$LOG_DIR/$workload"_lrt".$LMP_CORES"c"$threads"t".log + cmd="$MPI -np $LMP_CORES $LMP_BIN -in in.intel.$workload -log $LOGFILE $RLMP_ARGS"; + rthreads=$((threads-1)) + export KMP_AFFINITY=none + export OMP_NUM_THREADS=$rthreads + echo " $cmd" >> $LOG_DIR/commands.info + $cmd + fi + done +done + +# Performance reported by LAMMPS (Timesteps/second ignoring warm-up run) +grep Perf $LOG_DIR/*.log | awk 'BEGIN{n=1}n%2==0{print $0}{n++}' | sed 's/\/day//g' | sed 's/steps\/s/steps_s/g' | sed 's/hours\/ns//g' | sed 's/.*\///g' | sed 's/\.log:Performance://g' | awk '{c=NF-1; print $1,$c}' diff --git a/src/USER-INTEL/angle_charmm_intel.cpp b/src/USER-INTEL/angle_charmm_intel.cpp index aafc765c6be9e76216299dacc97996dec219061c..d55afd47427302169ec649f8a12bc1a9e6012681 100644 --- a/src/USER-INTEL/angle_charmm_intel.cpp +++ b/src/USER-INTEL/angle_charmm_intel.cpp @@ -37,7 +37,7 @@ typedef struct { int a,b,c,t; } int4_t; /* ---------------------------------------------------------------------- */ -AngleCharmmIntel::AngleCharmmIntel(LAMMPS *lmp) : AngleCharmm(lmp) +AngleCharmmIntel::AngleCharmmIntel(LAMMPS *lmp) : AngleCharmm(lmp) { suffix_flag |= Suffix::INTEL; } @@ -74,23 +74,23 @@ void AngleCharmmIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void AngleCharmmIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; if (evflag) { - if (eflag) { + if (vflag && !eflag) { if (force->newton_bond) - eval<1,1,1>(vflag, buffers, fc); + eval<0,1,1>(vflag, buffers, fc); else - eval<1,1,0>(vflag, buffers, fc); + eval<0,1,0>(vflag, buffers, fc); } else { if (force->newton_bond) - eval<1,0,1>(vflag, buffers, fc); + eval<1,1,1>(vflag, buffers, fc); else - eval<1,0,0>(vflag, buffers, fc); + eval<1,1,0>(vflag, buffers, fc); } } else { if (force->newton_bond) @@ -102,10 +102,10 @@ void AngleCharmmIntel::compute(int eflag, int vflag, /* ---------------------------------------------------------------------- */ -template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> -void AngleCharmmIntel::eval(const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void AngleCharmmIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { const int inum = neighbor->nanglelist; @@ -126,31 +126,42 @@ void AngleCharmmIntel::eval(const int vflag, const int nthreads = tc; acc_t oeangle, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - if (EFLAG) - oeangle = (acc_t)0.0; - if (vflag) { - ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - } + if (EFLAG) oeangle = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; } #if defined(_OPENMP) #pragma omp parallel default(none) \ - shared(f_start,f_stride,fc) \ + shared(f_start,f_stride,fc) \ reduction(+:oeangle,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int nfrom, nto, tid; + int nfrom, npl, nto, tid; + #ifdef LMP_INTEL_USE_SIMDOFF IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + #else + IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); + #endif FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); - const int4_t * _noalias const anglelist = + const int4_t * _noalias const anglelist = (int4_t *) neighbor->anglelist[0]; - for (int n = nfrom; n < nto; n++) { + #ifdef LMP_INTEL_USE_SIMDOFF + acc_t seangle, sv0, sv1, sv2, sv3, sv4, sv5; + if (EFLAG) seangle = (acc_t)0.0; + if (VFLAG && vflag) { + sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; + } + #pragma simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5) + for (int n = nfrom; n < nto; n ++) { + #else + for (int n = nfrom; n < nto; n += npl) { + #endif const int i1 = anglelist[n].a; const int i2 = anglelist[n].b; const int i3 = anglelist[n].c; @@ -229,40 +240,58 @@ void AngleCharmmIntel::eval(const int vflag, // apply force to each of 3 atoms - if (NEWTON_BOND || i1 < nlocal) { - f[i1].x += f1x; - f[i1].y += f1y; - f[i1].z += f1z; + #ifdef LMP_INTEL_USE_SIMDOFF + #pragma simdoff + #endif + { + if (NEWTON_BOND || i1 < nlocal) { + f[i1].x += f1x; + f[i1].y += f1y; + f[i1].z += f1z; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2].x -= f1x + f3x; + f[i2].y -= f1y + f3y; + f[i2].z -= f1z + f3z; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3].x += f3x; + f[i3].y += f3y; + f[i3].z += f3z; + } } - if (NEWTON_BOND || i2 < nlocal) { - f[i2].x -= f1x + f3x; - f[i2].y -= f1y + f3y; - f[i2].z -= f1z + f3z; - } - - if (NEWTON_BOND || i3 < nlocal) { - f[i3].x += f3x; - f[i3].y += f3y; - f[i3].z += f3z; - } - - if (EVFLAG) { - IP_PRE_ev_tally_angle(EFLAG, eatom, vflag, eangle, i1, i2, i3,f1x, - f1y, f1z, f3x, f3y, f3z, delx1, dely1, delz1, - delx2, dely2, delz2, oeangle, f, NEWTON_BOND, - nlocal, ov0, ov1, ov2, ov3, ov4, ov5); + if (EFLAG || VFLAG) { + #ifdef LMP_INTEL_USE_SIMDOFF + IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2, + i3, f1x, f1y, f1z, f3x, f3y, f3z, delx1, + dely1, delz1, delx2, dely2, delz2, seangle, + f, NEWTON_BOND, nlocal, sv0, sv1, sv2, sv3, + sv4, sv5); + #else + IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2, + i3, f1x, f1y, f1z, f3x, f3y, f3z, delx1, + dely1, delz1, delx2, dely2, delz2, oeangle, + f, NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3, + ov4, ov5); + #endif } } // for n + #ifdef LMP_INTEL_USE_SIMDOFF + if (EFLAG) oeangle += seangle; + if (VFLAG && vflag) { + ov0 += sv0; ov1 += sv1; ov2 += sv2; + ov3 += sv3; ov4 += sv4; ov5 += sv5; + } + #endif } // omp parallel - if (EVFLAG) { - if (EFLAG) - energy += oeangle; - if (vflag) { - virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; - virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; - } + if (EFLAG) energy += oeangle; + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; } fix->set_reduce_flag(); @@ -319,11 +348,11 @@ void AngleCharmmIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void AngleCharmmIntel::ForceConst<flt_t>::set_ntypes(const int nangletypes, - Memory *memory) { + Memory *memory) { if (nangletypes != _nangletypes) { if (_nangletypes > 0) _memory->destroy(fc); - + if (nangletypes > 0) _memory->create(fc,nangletypes,"anglecharmmintel.fc"); } diff --git a/src/USER-INTEL/angle_charmm_intel.h b/src/USER-INTEL/angle_charmm_intel.h index a98007b3ef5c2c43e3b2c55d282ae56c5a4ee984..342af31b8c122a937ab156e71c5ef12a7290bd11 100644 --- a/src/USER-INTEL/angle_charmm_intel.h +++ b/src/USER-INTEL/angle_charmm_intel.h @@ -45,8 +45,8 @@ class AngleCharmmIntel : public AngleCharmm { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> - void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc); + void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t, acc_t> *buffers); diff --git a/src/USER-INTEL/angle_harmonic_intel.cpp b/src/USER-INTEL/angle_harmonic_intel.cpp index f101fd9e1f09226134e127a7b7d4565d5d33800b..47e0add6906836e8549472b9018dd1d6cf6fd487 100644 --- a/src/USER-INTEL/angle_harmonic_intel.cpp +++ b/src/USER-INTEL/angle_harmonic_intel.cpp @@ -37,7 +37,7 @@ typedef struct { int a,b,c,t; } int4_t; /* ---------------------------------------------------------------------- */ -AngleHarmonicIntel::AngleHarmonicIntel(LAMMPS *lmp) : AngleHarmonic(lmp) +AngleHarmonicIntel::AngleHarmonicIntel(LAMMPS *lmp) : AngleHarmonic(lmp) { suffix_flag |= Suffix::INTEL; } @@ -74,23 +74,23 @@ void AngleHarmonicIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void AngleHarmonicIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; if (evflag) { - if (eflag) { + if (vflag && !eflag) { if (force->newton_bond) - eval<1,1,1>(vflag, buffers, fc); + eval<0,1,1>(vflag, buffers, fc); else - eval<1,1,0>(vflag, buffers, fc); + eval<0,1,0>(vflag, buffers, fc); } else { if (force->newton_bond) - eval<1,0,1>(vflag, buffers, fc); + eval<1,1,1>(vflag, buffers, fc); else - eval<1,0,0>(vflag, buffers, fc); + eval<1,1,0>(vflag, buffers, fc); } } else { if (force->newton_bond) @@ -102,10 +102,10 @@ void AngleHarmonicIntel::compute(int eflag, int vflag, /* ---------------------------------------------------------------------- */ -template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> -void AngleHarmonicIntel::eval(const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void AngleHarmonicIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { const int inum = neighbor->nanglelist; @@ -126,31 +126,42 @@ void AngleHarmonicIntel::eval(const int vflag, const int nthreads = tc; acc_t oeangle, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - if (EFLAG) - oeangle = (acc_t)0.0; - if (vflag) { - ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - } + if (EFLAG) oeangle = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; } #if defined(_OPENMP) #pragma omp parallel default(none) \ - shared(f_start,f_stride,fc) \ + shared(f_start,f_stride,fc) \ reduction(+:oeangle,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int nfrom, nto, tid; + int nfrom, npl, nto, tid; + #ifdef LMP_INTEL_USE_SIMDOFF IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + #else + IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); + #endif FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); - const int4_t * _noalias const anglelist = + const int4_t * _noalias const anglelist = (int4_t *) neighbor->anglelist[0]; - for (int n = nfrom; n < nto; n++) { + #ifdef LMP_INTEL_USE_SIMDOFF + acc_t seangle, sv0, sv1, sv2, sv3, sv4, sv5; + if (EFLAG) seangle = (acc_t)0.0; + if (VFLAG && vflag) { + sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; + } + #pragma simd reduction(+:seangle, sv0, sv1, sv2, sv3, sv4, sv5) + for (int n = nfrom; n < nto; n ++) { + #else + for (int n = nfrom; n < nto; n += npl) { + #endif const int i1 = anglelist[n].a; const int i2 = anglelist[n].b; const int i3 = anglelist[n].c; @@ -211,40 +222,58 @@ void AngleHarmonicIntel::eval(const int vflag, // apply force to each of 3 atoms - if (NEWTON_BOND || i1 < nlocal) { - f[i1].x += f1x; - f[i1].y += f1y; - f[i1].z += f1z; + #ifdef LMP_INTEL_USE_SIMDOFF + #pragma simdoff + #endif + { + if (NEWTON_BOND || i1 < nlocal) { + f[i1].x += f1x; + f[i1].y += f1y; + f[i1].z += f1z; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2].x -= f1x + f3x; + f[i2].y -= f1y + f3y; + f[i2].z -= f1z + f3z; + } + + if (NEWTON_BOND || i3 < nlocal) { + f[i3].x += f3x; + f[i3].y += f3y; + f[i3].z += f3z; + } } - if (NEWTON_BOND || i2 < nlocal) { - f[i2].x -= f1x + f3x; - f[i2].y -= f1y + f3y; - f[i2].z -= f1z + f3z; - } - - if (NEWTON_BOND || i3 < nlocal) { - f[i3].x += f3x; - f[i3].y += f3y; - f[i3].z += f3z; - } - - if (EVFLAG) { - IP_PRE_ev_tally_angle(EFLAG, eatom, vflag, eangle, i1, i2, i3,f1x, - f1y, f1z, f3x, f3y, f3z, delx1, dely1, delz1, - delx2, dely2, delz2, oeangle, f, NEWTON_BOND, - nlocal, ov0, ov1, ov2, ov3, ov4, ov5); + if (EFLAG || VFLAG) { + #ifdef LMP_INTEL_USE_SIMDOFF + IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2, i3, + f1x, f1y, f1z, f3x, f3y, f3z, delx1, dely1, + delz1, delx2, dely2, delz2, seangle, f, + NEWTON_BOND, nlocal, sv0, sv1, sv2, sv3, sv4, + sv5); + #else + IP_PRE_ev_tally_angle(EFLAG, VFLAG, eatom, vflag, eangle, i1, i2, i3, + f1x, f1y, f1z, f3x, f3y, f3z, delx1, dely1, + delz1, delx2, dely2, delz2, oeangle, f, + NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3, ov4, + ov5); + #endif } } // for n + #ifdef LMP_INTEL_USE_SIMDOFF + if (EFLAG) oeangle += seangle; + if (VFLAG && vflag) { + ov0 += sv0; ov1 += sv1; ov2 += sv2; + ov3 += sv3; ov4 += sv4; ov5 += sv5; + } + #endif } // omp parallel - if (EVFLAG) { - if (EFLAG) - energy += oeangle; - if (vflag) { - virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; - virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; - } + if (EFLAG) energy += oeangle; + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; } fix->set_reduce_flag(); @@ -299,11 +328,11 @@ void AngleHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void AngleHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nangletypes, - Memory *memory) { + Memory *memory) { if (nangletypes != _nangletypes) { if (_nangletypes > 0) _memory->destroy(fc); - + if (nangletypes > 0) _memory->create(fc,nangletypes,"anglecharmmintel.fc"); } diff --git a/src/USER-INTEL/angle_harmonic_intel.h b/src/USER-INTEL/angle_harmonic_intel.h index 340ea4b974165a24a9d09d1fd027e17d6fbbef7f..301fc7cc066aabbb484f0c55b5065acbb93fd9b8 100644 --- a/src/USER-INTEL/angle_harmonic_intel.h +++ b/src/USER-INTEL/angle_harmonic_intel.h @@ -45,8 +45,8 @@ class AngleHarmonicIntel : public AngleHarmonic { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> - void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc); + void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t, acc_t> *buffers); diff --git a/src/USER-INTEL/bond_fene_intel.cpp b/src/USER-INTEL/bond_fene_intel.cpp index e61ab9be84f4b1a5dbf727f064066ce6cb772d36..bb96135b2d7abeaf1e9a2b7990fae1fb17a314b0 100644 --- a/src/USER-INTEL/bond_fene_intel.cpp +++ b/src/USER-INTEL/bond_fene_intel.cpp @@ -33,7 +33,7 @@ typedef struct { int a,b,t; } int3_t; /* ---------------------------------------------------------------------- */ -BondFENEIntel::BondFENEIntel(LAMMPS *lmp) : BondFENE(lmp) +BondFENEIntel::BondFENEIntel(LAMMPS *lmp) : BondFENE(lmp) { suffix_flag |= Suffix::INTEL; } @@ -70,23 +70,23 @@ void BondFENEIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void BondFENEIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; if (evflag) { - if (eflag) { + if (vflag && !eflag) { if (force->newton_bond) - eval<1,1,1>(vflag, buffers, fc); + eval<0,1,1>(vflag, buffers, fc); else - eval<1,1,0>(vflag, buffers, fc); + eval<0,1,0>(vflag, buffers, fc); } else { if (force->newton_bond) - eval<1,0,1>(vflag, buffers, fc); + eval<1,1,1>(vflag, buffers, fc); else - eval<1,0,0>(vflag, buffers, fc); + eval<1,1,0>(vflag, buffers, fc); } } else { if (force->newton_bond) @@ -96,10 +96,10 @@ void BondFENEIntel::compute(int eflag, int vflag, } } -template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> -void BondFENEIntel::eval(const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void BondFENEIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { const int inum = neighbor->nbondlist; if (inum == 0) return; @@ -119,32 +119,42 @@ void BondFENEIntel::eval(const int vflag, const int nthreads = tc; acc_t oebond, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - if (EFLAG) - oebond = (acc_t)0.0; - if (vflag) { - ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - } + if (EFLAG) oebond = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; } - #if defined(_OPENMP) #pragma omp parallel default(none) \ - shared(f_start,f_stride,fc) \ + shared(f_start,f_stride,fc) \ reduction(+:oebond,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int nfrom, nto, tid; + int nfrom, npl, nto, tid; + #ifdef LMP_INTEL_USE_SIMDOFF IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + #else + IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); + #endif FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); - const int3_t * _noalias const bondlist = + const int3_t * _noalias const bondlist = (int3_t *) neighbor->bondlist[0]; - for (int n = nfrom; n < nto; n++) { + #ifdef LMP_INTEL_USE_SIMDOFF + acc_t sebond, sv0, sv1, sv2, sv3, sv4, sv5; + if (EFLAG) sebond = (acc_t)0.0; + if (VFLAG && vflag) { + sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; + } + #pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5) + for (int n = nfrom; n < nto; n ++) { + #else + for (int n = nfrom; n < nto; n += npl) { + #endif const int i1 = bondlist[n].a; const int i2 = bondlist[n].b; const int type = bondlist[n].t; @@ -166,7 +176,7 @@ void BondFENEIntel::eval(const int vflag, // if r -> r0, then rlogarg < 0.0 which is an error // issue a warning and reset rlogarg = epsilon // if r > 2*r0 something serious is wrong, abort - + if (rlogarg < (flt_t)0.1) { char str[128]; sprintf(str,"FENE bond too long: " BIGINT_FORMAT " " @@ -176,18 +186,18 @@ void BondFENEIntel::eval(const int vflag, if (rlogarg <= (flt_t)-3.0) error->one(FLERR,"Bad FENE bond"); rlogarg = (flt_t)0.1; } - + flt_t fbond = -k/rlogarg; - + // force from LJ term - + flt_t sr2,sr6; if (rsq < (flt_t)TWO_1_3*sigmasq) { - sr2 = sigmasq * irsq; + sr2 = sigmasq * irsq; sr6 = sr2 * sr2 * sr2; fbond += (flt_t)48.0 * epsilon * sr6 * (sr6 - (flt_t)0.5) * irsq; } - + // energy flt_t ebond; @@ -199,33 +209,48 @@ void BondFENEIntel::eval(const int vflag, // apply force to each of 2 atoms - if (NEWTON_BOND || i1 < nlocal) { - f[i1].x += delx*fbond; - f[i1].y += dely*fbond; - f[i1].z += delz*fbond; - } - - if (NEWTON_BOND || i2 < nlocal) { - f[i2].x -= delx*fbond; - f[i2].y -= dely*fbond; - f[i2].z -= delz*fbond; + #ifdef LMP_INTEL_USE_SIMDOFF + #pragma simdoff + #endif + { + if (NEWTON_BOND || i1 < nlocal) { + f[i1].x += delx*fbond; + f[i1].y += dely*fbond; + f[i1].z += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2].x -= delx*fbond; + f[i2].y -= dely*fbond; + f[i2].z -= delz*fbond; + } } - if (EVFLAG) { - IP_PRE_ev_tally_bond(EFLAG, eatom, vflag, ebond, i1, i2, fbond, - delx, dely, delz, oebond, f, NEWTON_BOND, + if (EFLAG || VFLAG) { + #ifdef LMP_INTEL_USE_SIMDOFF + IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, fbond, + delx, dely, delz, sebond, f, NEWTON_BOND, + nlocal, sv0, sv1, sv2, sv3, sv4, sv5); + #else + IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, fbond, + delx, dely, delz, oebond, f, NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3, ov4, ov5); + #endif } } // for n + #ifdef LMP_INTEL_USE_SIMDOFF + if (EFLAG) oebond += sebond; + if (VFLAG && vflag) { + ov0 += sv0; ov1 += sv1; ov2 += sv2; + ov3 += sv3; ov4 += sv4; ov5 += sv5; + } + #endif } // omp parallel - if (EVFLAG) { - if (EFLAG) - energy += oebond; - if (vflag) { - virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; - virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; - } + if (EFLAG) energy += oebond; + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; } fix->set_reduce_flag(); @@ -282,11 +307,11 @@ void BondFENEIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void BondFENEIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes, - Memory *memory) { + Memory *memory) { if (nbondtypes != _nbondtypes) { if (_nbondtypes > 0) _memory->destroy(fc); - + if (nbondtypes > 0) _memory->create(fc,nbondtypes,"bondfeneintel.fc"); } diff --git a/src/USER-INTEL/bond_fene_intel.h b/src/USER-INTEL/bond_fene_intel.h index d64f1e72545ea513471d34f6c5b85b057cb1ea6c..89c3033096e43eba27065220e2b77769cc831591 100644 --- a/src/USER-INTEL/bond_fene_intel.h +++ b/src/USER-INTEL/bond_fene_intel.h @@ -45,8 +45,8 @@ class BondFENEIntel : public BondFENE { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> - void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc); + void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t, acc_t> *buffers); diff --git a/src/USER-INTEL/bond_harmonic_intel.cpp b/src/USER-INTEL/bond_harmonic_intel.cpp index 51a33b1cc341f54185c8cdbff857a73a11c138bc..beb0ebcddaf52e1277859dad00fab1ce25a8aea3 100644 --- a/src/USER-INTEL/bond_harmonic_intel.cpp +++ b/src/USER-INTEL/bond_harmonic_intel.cpp @@ -33,7 +33,7 @@ typedef struct { int a,b,t; } int3_t; /* ---------------------------------------------------------------------- */ -BondHarmonicIntel::BondHarmonicIntel(LAMMPS *lmp) : BondHarmonic(lmp) +BondHarmonicIntel::BondHarmonicIntel(LAMMPS *lmp) : BondHarmonic(lmp) { suffix_flag |= Suffix::INTEL; } @@ -70,23 +70,23 @@ void BondHarmonicIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void BondHarmonicIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; if (evflag) { - if (eflag) { + if (vflag && !eflag) { if (force->newton_bond) - eval<1,1,1>(vflag, buffers, fc); + eval<0,1,1>(vflag, buffers, fc); else - eval<1,1,0>(vflag, buffers, fc); + eval<0,1,0>(vflag, buffers, fc); } else { if (force->newton_bond) - eval<1,0,1>(vflag, buffers, fc); + eval<1,1,1>(vflag, buffers, fc); else - eval<1,0,0>(vflag, buffers, fc); + eval<1,1,0>(vflag, buffers, fc); } } else { if (force->newton_bond) @@ -96,10 +96,10 @@ void BondHarmonicIntel::compute(int eflag, int vflag, } } -template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> -void BondHarmonicIntel::eval(const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void BondHarmonicIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { const int inum = neighbor->nbondlist; if (inum == 0) return; @@ -119,31 +119,42 @@ void BondHarmonicIntel::eval(const int vflag, const int nthreads = tc; acc_t oebond, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - if (EFLAG) - oebond = (acc_t)0.0; - if (vflag) { - ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - } + if (EFLAG) oebond = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; } #if defined(_OPENMP) #pragma omp parallel default(none) \ - shared(f_start,f_stride,fc) \ + shared(f_start,f_stride,fc) \ reduction(+:oebond,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int nfrom, nto, tid; + int nfrom, npl, nto, tid; + #ifdef LMP_INTEL_USE_SIMDOFF IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + #else + IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); + #endif FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); - const int3_t * _noalias const bondlist = + const int3_t * _noalias const bondlist = (int3_t *) neighbor->bondlist[0]; - for (int n = nfrom; n < nto; n++) { + #ifdef LMP_INTEL_USE_SIMDOFF + acc_t sebond, sv0, sv1, sv2, sv3, sv4, sv5; + if (EFLAG) sebond = (acc_t)0.0; + if (VFLAG && vflag) { + sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; + } + #pragma simd reduction(+:sebond, sv0, sv1, sv2, sv3, sv4, sv5) + for (int n = nfrom; n < nto; n ++) { + #else + for (int n = nfrom; n < nto; n += npl) { + #endif const int i1 = bondlist[n].a; const int i2 = bondlist[n].b; const int type = bondlist[n].t; @@ -167,33 +178,50 @@ void BondHarmonicIntel::eval(const int vflag, if (EFLAG) ebond = rk*dr; // apply force to each of 2 atoms - if (NEWTON_BOND || i1 < nlocal) { - f[i1].x += delx*fbond; - f[i1].y += dely*fbond; - f[i1].z += delz*fbond; - } - - if (NEWTON_BOND || i2 < nlocal) { - f[i2].x -= delx*fbond; - f[i2].y -= dely*fbond; - f[i2].z -= delz*fbond; + #ifdef LMP_INTEL_USE_SIMDOFF + #pragma simdoff + #endif + { + if (NEWTON_BOND || i1 < nlocal) { + f[i1].x += delx*fbond; + f[i1].y += dely*fbond; + f[i1].z += delz*fbond; + } + + if (NEWTON_BOND || i2 < nlocal) { + f[i2].x -= delx*fbond; + f[i2].y -= dely*fbond; + f[i2].z -= delz*fbond; + } } - if (EVFLAG) { - IP_PRE_ev_tally_bond(EFLAG, eatom, vflag, ebond, i1, i2, fbond, - delx, dely, delz, oebond, f, NEWTON_BOND, - nlocal, ov0, ov1, ov2, ov3, ov4, ov5); + if (EFLAG || VFLAG) { + #ifdef LMP_INTEL_USE_SIMDOFF + IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, + fbond, delx, dely, delz, sebond, f, + NEWTON_BOND, nlocal, sv0, sv1, sv2, sv3, + sv4, sv5); + #else + IP_PRE_ev_tally_bond(EFLAG, VFLAG, eatom, vflag, ebond, i1, i2, + fbond, delx, dely, delz, oebond, f, + NEWTON_BOND, nlocal, ov0, ov1, ov2, ov3, + ov4, ov5); + #endif } } // for n + #ifdef LMP_INTEL_USE_SIMDOFF + if (EFLAG) oebond += sebond; + if (VFLAG && vflag) { + ov0 += sv0; ov1 += sv1; ov2 += sv2; + ov3 += sv3; ov4 += sv4; ov5 += sv5; + } + #endif } // omp parallel - if (EVFLAG) { - if (EFLAG) - energy += oebond; - if (vflag) { - virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; - virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; - } + if (EFLAG) energy += oebond; + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; } fix->set_reduce_flag(); @@ -248,11 +276,11 @@ void BondHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void BondHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes, - Memory *memory) { + Memory *memory) { if (nbondtypes != _nbondtypes) { if (_nbondtypes > 0) _memory->destroy(fc); - + if (nbondtypes > 0) _memory->create(fc,nbondtypes,"bondharmonicintel.fc"); } diff --git a/src/USER-INTEL/bond_harmonic_intel.h b/src/USER-INTEL/bond_harmonic_intel.h index 0de844cddfaf7b70304594446158d631e12d5dec..8fc04f432a2a5994b08e9807f42161d36b933e62 100644 --- a/src/USER-INTEL/bond_harmonic_intel.h +++ b/src/USER-INTEL/bond_harmonic_intel.h @@ -45,8 +45,8 @@ class BondHarmonicIntel : public BondHarmonic { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> - void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc); + void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t, acc_t> *buffers); diff --git a/src/USER-INTEL/dihedral_charmm_intel.cpp b/src/USER-INTEL/dihedral_charmm_intel.cpp index c07c22661198ec9dd48673e150e4fda2cda34a57..715cef4d37c6a7b0f0e7afcfac57004ff02c9394 100644 --- a/src/USER-INTEL/dihedral_charmm_intel.cpp +++ b/src/USER-INTEL/dihedral_charmm_intel.cpp @@ -80,8 +80,8 @@ void DihedralCharmmIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void DihedralCharmmIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) { ev_setup(eflag,vflag); @@ -93,16 +93,16 @@ void DihedralCharmmIntel::compute(int eflag, int vflag, force->pair->vflag_either = force->pair->vflag_global = 1; if (evflag) { - if (eflag) { + if (vflag && !eflag) { if (force->newton_bond) - eval<1,1,1>(vflag, buffers, fc); + eval<0,1,1>(vflag, buffers, fc); else - eval<1,1,0>(vflag, buffers, fc); + eval<0,1,0>(vflag, buffers, fc); } else { if (force->newton_bond) - eval<1,0,1>(vflag, buffers, fc); + eval<1,1,1>(vflag, buffers, fc); else - eval<1,0,0>(vflag, buffers, fc); + eval<1,1,0>(vflag, buffers, fc); } } else { if (force->newton_bond) @@ -114,10 +114,10 @@ void DihedralCharmmIntel::compute(int eflag, int vflag, #ifndef LMP_USE_AVXCD_DHC -template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> -void DihedralCharmmIntel::eval(const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void DihedralCharmmIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { const int inum = neighbor->ndihedrallist; @@ -140,50 +140,50 @@ void DihedralCharmmIntel::eval(const int vflag, acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5; acc_t oevdwl, oecoul, opv0, opv1, opv2, opv3, opv4, opv5; - if (EVFLAG) { - if (EFLAG) - oevdwl = oecoul = oedihedral = (acc_t)0.0; - if (vflag) { - ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - opv0 = opv1 = opv2 = opv3 = opv4 = opv5 = (acc_t)0.0; - } + if (EFLAG) oevdwl = oecoul = oedihedral = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; + opv0 = opv1 = opv2 = opv3 = opv4 = opv5 = (acc_t)0.0; } #if defined(_OPENMP) #pragma omp parallel default(none) \ - shared(f_start,f_stride,fc) \ + shared(f_start,f_stride,fc) \ reduction(+:oevdwl,oecoul,oedihedral,ov0,ov1,ov2,ov3,ov4,ov5, \ - opv0,opv1,opv2,opv3,opv4,opv5) + opv0,opv1,opv2,opv3,opv4,opv5) #endif { + #if defined(LMP_SIMD_COMPILER_TEST) int nfrom, nto, tid; IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + #else + int nfrom, npl, nto, tid; + IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); + #endif FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); - const int5_t * _noalias const dihedrallist = + const int5_t * _noalias const dihedrallist = (int5_t *) neighbor->dihedrallist[0]; const flt_t qqrd2e = force->qqrd2e; acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5; acc_t sevdwl, secoul, spv0, spv1, spv2, spv3, spv4, spv5; - if (EVFLAG) { - if (EFLAG) - sevdwl = secoul = sedihedral = (acc_t)0.0; - if (vflag) { - sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; - spv0 = spv1 = spv2 = spv3 = spv4 = spv5 = (acc_t)0.0; - } + if (EFLAG) sevdwl = secoul = sedihedral = (acc_t)0.0; + if (VFLAG && vflag) { + sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; + spv0 = spv1 = spv2 = spv3 = spv4 = spv5 = (acc_t)0.0; } #if defined(LMP_SIMD_COMPILER_TEST) #pragma vector aligned #pragma simd reduction(+:sedihedral, sevdwl, secoul, sv0, sv1, sv2, \ - sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, spv5) - #endif + sv3, sv4, sv5, spv0, spv1, spv2, spv3, spv4, spv5) for (int n = nfrom; n < nto; n++) { + #endif + for (int n = nfrom; n < nto; n += npl) { const int i1 = dihedrallist[n].a; const int i2 = dihedrallist[n].b; const int i3 = dihedrallist[n].c; @@ -204,7 +204,7 @@ void DihedralCharmmIntel::eval(const int vflag, const flt_t vb2zm = x[i2].z - x[i3].z; // 3rd bond - + const flt_t vb3x = x[i4].x - x[i3].x; const flt_t vb3y = x[i4].y - x[i3].y; const flt_t vb3z = x[i4].z - x[i3].z; @@ -244,25 +244,25 @@ void DihedralCharmmIntel::eval(const int vflag, // error check #ifndef LMP_SIMD_COMPILER_TEST if (c > PTOLERANCE || c < MTOLERANCE) { - int me = comm->me; - - if (screen) { - char str[128]; - sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " " - TAGINT_FORMAT " " TAGINT_FORMAT " " - TAGINT_FORMAT " " TAGINT_FORMAT, - me,tid,update->ntimestep, - atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); - error->warning(FLERR,str,0); - fprintf(screen," 1st atom: %d %g %g %g\n", - me,x[i1].x,x[i1].y,x[i1].z); - fprintf(screen," 2nd atom: %d %g %g %g\n", - me,x[i2].x,x[i2].y,x[i2].z); - fprintf(screen," 3rd atom: %d %g %g %g\n", - me,x[i3].x,x[i3].y,x[i3].z); - fprintf(screen," 4th atom: %d %g %g %g\n", - me,x[i4].x,x[i4].y,x[i4].z); - } + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " " + TAGINT_FORMAT " " TAGINT_FORMAT " " + TAGINT_FORMAT " " TAGINT_FORMAT, + me,tid,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1].x,x[i1].y,x[i1].z); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2].x,x[i2].y,x[i2].z); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3].x,x[i3].y,x[i3].z); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4].x,x[i4].y,x[i4].z); + } } #endif @@ -279,19 +279,19 @@ void DihedralCharmmIntel::eval(const int vflag, ddf1 = df1 = (flt_t)0.0; for (int i = 0; i < m; i++) { - ddf1 = p*c - df1*s; - df1 = p*s + df1*c; - p = ddf1; + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; } p = p*tcos_shift + df1*tsin_shift; df1 = df1*tcos_shift - ddf1*tsin_shift; df1 *= -m; p += (flt_t)1.0; - + if (m == 0) { - p = (flt_t)1.0 + tcos_shift; - df1 = (flt_t)0.0; + p = (flt_t)1.0 + tcos_shift; + df1 = (flt_t)0.0; } const flt_t fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; @@ -333,14 +333,14 @@ void DihedralCharmmIntel::eval(const int vflag, const flt_t f3y = -sy2 - f4y; const flt_t f3z = -sz2 - f4z; - if (EVFLAG) { - flt_t deng; - if (EFLAG) deng = tk * p; - IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, deng, i1, i2, i3, i4, f1x, - f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, vb1x, - vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, vb3y, - vb3z, sedihedral, f, NEWTON_BOND, nlocal, - sv0, sv1, sv2, sv3, sv4, sv5); + if (EFLAG || VFLAG) { + flt_t deng; + if (EFLAG) deng = tk * p; + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, deng, i1, i2, i3, + i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, + f4z, vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, + vb3x, vb3y, vb3z, sedihedral, f, NEWTON_BOND, + nlocal, sv0, sv1, sv2, sv3, sv4, sv5); } @@ -349,15 +349,15 @@ void DihedralCharmmIntel::eval(const int vflag, #endif { if (NEWTON_BOND || i2 < nlocal) { - f[i2].x += f2x; - f[i2].y += f2y; - f[i2].z += f2z; + f[i2].x += f2x; + f[i2].y += f2y; + f[i2].z += f2z; } if (NEWTON_BOND || i3 < nlocal) { - f[i3].x += f3x; - f[i3].y += f3y; - f[i3].z += f3z; + f[i3].x += f3x; + f[i3].y += f3y; + f[i3].z += f3z; } } @@ -372,54 +372,54 @@ void DihedralCharmmIntel::eval(const int vflag, flt_t forcecoul; if (implicit) forcecoul = qqrd2e * q[i1]*q[i4]*r2inv; else forcecoul = qqrd2e * q[i1]*q[i4]*sqrt(r2inv); - const flt_t forcelj = r6inv * (fc.ljp[itype][jtype].lj1*r6inv - - fc.ljp[itype][jtype].lj2); + const flt_t forcelj = r6inv * (fc.ljp[itype][jtype].lj1*r6inv - + fc.ljp[itype][jtype].lj2); const flt_t fpair = tweight * (forcelj+forcecoul)*r2inv; if (NEWTON_BOND || i1 < nlocal) { - f1x += delx*fpair; - f1y += dely*fpair; - f1z += delz*fpair; + f1x += delx*fpair; + f1y += dely*fpair; + f1z += delz*fpair; } if (NEWTON_BOND || i4 < nlocal) { - f4x -= delx*fpair; - f4y -= dely*fpair; - f4z -= delz*fpair; + f4x -= delx*fpair; + f4y -= dely*fpair; + f4z -= delz*fpair; } - if (EVFLAG) { - flt_t ev_pre = (flt_t)0; - if (NEWTON_BOND || i1 < nlocal) - ev_pre += (flt_t)0.5; - if (NEWTON_BOND || i4 < nlocal) - ev_pre += (flt_t)0.5; - - if (EFLAG) { - flt_t ecoul, evdwl; - ecoul = tweight * forcecoul; - evdwl = tweight * r6inv * (fc.ljp[itype][jtype].lj3*r6inv - - fc.ljp[itype][jtype].lj4); - secoul += ev_pre * ecoul; - sevdwl += ev_pre * evdwl; - if (eatom) { - evdwl *= (flt_t)0.5; - evdwl += (flt_t)0.5 * ecoul; - if (NEWTON_BOND || i1 < nlocal) - f[i1].w += evdwl; - if (NEWTON_BOND || i4 < nlocal) - f[i4].w += evdwl; - } - } - // IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, - // delx, dely, delz); - if (vflag) { - spv0 += ev_pre * delx * delx * fpair; - spv1 += ev_pre * dely * dely * fpair; - spv2 += ev_pre * delz * delz * fpair; - spv3 += ev_pre * delx * dely * fpair; - spv4 += ev_pre * delx * delz * fpair; - spv5 += ev_pre * dely * delz * fpair; - } + if (EFLAG || VFLAG) { + flt_t ev_pre = (flt_t)0; + if (NEWTON_BOND || i1 < nlocal) + ev_pre += (flt_t)0.5; + if (NEWTON_BOND || i4 < nlocal) + ev_pre += (flt_t)0.5; + + if (EFLAG) { + flt_t ecoul, evdwl; + ecoul = tweight * forcecoul; + evdwl = tweight * r6inv * (fc.ljp[itype][jtype].lj3*r6inv - + fc.ljp[itype][jtype].lj4); + secoul += ev_pre * ecoul; + sevdwl += ev_pre * evdwl; + if (eatom) { + evdwl *= (flt_t)0.5; + evdwl += (flt_t)0.5 * ecoul; + if (NEWTON_BOND || i1 < nlocal) + f[i1].w += evdwl; + if (NEWTON_BOND || i4 < nlocal) + f[i4].w += evdwl; + } + } + // IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, + // delx, dely, delz); + if (VFLAG && vflag) { + spv0 += ev_pre * delx * delx * fpair; + spv1 += ev_pre * dely * dely * fpair; + spv2 += ev_pre * delz * delz * fpair; + spv3 += ev_pre * delx * dely * fpair; + spv4 += ev_pre * delx * delz * fpair; + spv5 += ev_pre * dely * delz * fpair; + } } // apply force to each of 4 atoms @@ -428,48 +428,44 @@ void DihedralCharmmIntel::eval(const int vflag, #endif { if (NEWTON_BOND || i1 < nlocal) { - f[i1].x += f1x; - f[i1].y += f1y; - f[i1].z += f1z; + f[i1].x += f1x; + f[i1].y += f1y; + f[i1].z += f1z; } if (NEWTON_BOND || i4 < nlocal) { - f[i4].x += f4x; - f[i4].y += f4y; - f[i4].z += f4z; + f[i4].x += f4x; + f[i4].y += f4y; + f[i4].z += f4z; } } } // for n - if (EVFLAG) { - if (EFLAG) { - oedihedral += sedihedral; - oecoul += secoul; - oevdwl += sevdwl; - } - if (vflag) { - ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5; - opv0 += spv0; opv1 += spv1; opv2 += spv2; - opv3 += spv3; opv4 += spv4; opv5 += spv5; - } - } - } // omp parallel - - if (EVFLAG) { if (EFLAG) { - energy += oedihedral; - force->pair->eng_vdwl += oevdwl; - force->pair->eng_coul += oecoul; + oedihedral += sedihedral; + oecoul += secoul; + oevdwl += sevdwl; } - if (vflag) { - virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; - virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; - force->pair->virial[0] += opv0; - force->pair->virial[1] += opv1; - force->pair->virial[2] += opv2; - force->pair->virial[3] += opv3; - force->pair->virial[4] += opv4; - force->pair->virial[5] += opv5; + if (VFLAG && vflag) { + ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5; + opv0 += spv0; opv1 += spv1; opv2 += spv2; + opv3 += spv3; opv4 += spv4; opv5 += spv5; } + } // omp parallel + + if (EFLAG) { + energy += oedihedral; + force->pair->eng_vdwl += oevdwl; + force->pair->eng_coul += oecoul; + } + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; + force->pair->virial[0] += opv0; + force->pair->virial[1] += opv1; + force->pair->virial[2] += opv2; + force->pair->virial[3] += opv3; + force->pair->virial[4] += opv4; + force->pair->virial[5] += opv5; } fix->set_reduce_flag(); @@ -488,10 +484,10 @@ authors for more details. ------------------------------------------------------------------------- */ -template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> -void DihedralCharmmIntel::eval(const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void DihedralCharmmIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { typedef typename SIMD_type<flt_t>::SIMD_vec SIMD_flt_t; @@ -518,30 +514,28 @@ void DihedralCharmmIntel::eval(const int vflag, acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5; acc_t oevdwl, oecoul, opv0, opv1, opv2, opv3, opv4, opv5; - if (EVFLAG) { - if (EFLAG) - oevdwl = oecoul = oedihedral = (acc_t)0.0; - if (vflag) { - ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - opv0 = opv1 = opv2 = opv3 = opv4 = opv5 = (acc_t)0.0; - } + if (EFLAG) oevdwl = oecoul = oedihedral = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; + opv0 = opv1 = opv2 = opv3 = opv4 = opv5 = (acc_t)0.0; } #if defined(_OPENMP) #pragma omp parallel default(none) \ - shared(f_start,f_stride,fc) \ + shared(f_start,f_stride,fc) \ reduction(+:oevdwl,oecoul,oedihedral,ov0,ov1,ov2,ov3,ov4,ov5, \ - opv0,opv1,opv2,opv3,opv4,opv5) + opv0,opv1,opv2,opv3,opv4,opv5) #endif { - int nfrom, nto, tid; - IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + int nfrom, npl, nto, tid; + IP_PRE_omp_stride_id_vec(nfrom, npl, nto, tid, inum, nthreads, + swidth); FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); - const int * _noalias const dihedrallist = + const int * _noalias const dihedrallist = (int *) neighbor->dihedrallist[0]; const flt_t * _noalias const weight = &(fc.weight[0]); const flt_t * _noalias const x_f = &(x[0].x); @@ -559,36 +553,34 @@ void DihedralCharmmIntel::eval(const int vflag, SIMD_acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5; SIMD_acc_t sevdwl, secoul, spv0, spv1, spv2, spv3, spv4, spv5; - if (EVFLAG) { - if (EFLAG) { - sevdwl = SIMD_set((acc_t)0.0); - secoul = SIMD_set((acc_t)0.0); - sedihedral = SIMD_set((acc_t)0.0); - } - if (vflag) { - sv0 = SIMD_set((acc_t)0.0); - sv1 = SIMD_set((acc_t)0.0); - sv2 = SIMD_set((acc_t)0.0); - sv3 = SIMD_set((acc_t)0.0); - sv4 = SIMD_set((acc_t)0.0); - sv5 = SIMD_set((acc_t)0.0); - spv0 = SIMD_set((acc_t)0.0); - spv1 = SIMD_set((acc_t)0.0); - spv2 = SIMD_set((acc_t)0.0); - spv3 = SIMD_set((acc_t)0.0); - spv4 = SIMD_set((acc_t)0.0); - spv5 = SIMD_set((acc_t)0.0); - } + if (EFLAG) { + sevdwl = SIMD_set((acc_t)0.0); + secoul = SIMD_set((acc_t)0.0); + sedihedral = SIMD_set((acc_t)0.0); + } + if (VFLAG && vflag) { + sv0 = SIMD_set((acc_t)0.0); + sv1 = SIMD_set((acc_t)0.0); + sv2 = SIMD_set((acc_t)0.0); + sv3 = SIMD_set((acc_t)0.0); + sv4 = SIMD_set((acc_t)0.0); + sv5 = SIMD_set((acc_t)0.0); + spv0 = SIMD_set((acc_t)0.0); + spv1 = SIMD_set((acc_t)0.0); + spv2 = SIMD_set((acc_t)0.0); + spv3 = SIMD_set((acc_t)0.0); + spv4 = SIMD_set((acc_t)0.0); + spv5 = SIMD_set((acc_t)0.0); } SIMD_int n_offset = SIMD_set(0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, - 55, 60, 65, 70, 75) + (nfrom * 5); + 55, 60, 65, 70, 75) + (nfrom * 5); const int nto5 = nto * 5; const int nlocals4 = nlocal << 4; const SIMD_int simd_nlocals4 = SIMD_set(nlocals4); const int ntypes = atom->ntypes + 1; - for (int n = nfrom; n < nto; n += swidth) { + for (int n = nfrom; n < nto; n += npl) { SIMD_mask nmask = n_offset < nto5; SIMD_int i1 = SIMD_gather(nmask, dihedrallist, n_offset); const SIMD_flt_t q1 = SIMD_gather(nmask, q, i1); @@ -601,7 +593,7 @@ void DihedralCharmmIntel::eval(const int vflag, SIMD_int type = SIMD_gather(nmask, dihedrallist+4, n_offset); const SIMD_flt_t tweight = SIMD_gather(nmask, weight, type); type = type << 2; - n_offset = n_offset + swidth * 5; + n_offset = n_offset + npl * 5; // 1st bond @@ -626,7 +618,7 @@ void DihedralCharmmIntel::eval(const int vflag, const SIMD_flt_t vb2zm = z2 - z3; // 3rd bond - + SIMD_flt_t x4, y4, z4; SIMD_int jtype; @@ -672,7 +664,7 @@ void DihedralCharmmIntel::eval(const int vflag, const SIMD_flt_t ptol = SIMD_set(PTOLERANCE); const SIMD_flt_t ntol = SIMD_set(MTOLERANCE); if (c > ptol || c < ntol) - if (screen) + if (screen) error->warning(FLERR,"Dihedral problem."); c = SIMD_set(c, c > one, one); @@ -686,14 +678,14 @@ void DihedralCharmmIntel::eval(const int vflag, SIMD_flt_t p(one); SIMD_flt_t ddf1(szero); SIMD_flt_t df1(szero); - + const int m_max = SIMD_max(m); for (int i = 0; i < m_max; i++) { - const SIMD_mask my_m = i < m; - ddf1 = SIMD_set(ddf1, my_m, p*c - df1*s); - df1 = SIMD_set(df1, my_m, p*s + df1*c); - p = SIMD_set(p, my_m, ddf1); + const SIMD_mask my_m = i < m; + ddf1 = SIMD_set(ddf1, my_m, p*c - df1*s); + df1 = SIMD_set(df1, my_m, p*s + df1*c); + p = SIMD_set(p, my_m, ddf1); } SIMD_flt_t multf; @@ -702,7 +694,7 @@ void DihedralCharmmIntel::eval(const int vflag, df1 = df1*tcos_shift - ddf1*tsin_shift; df1 = df1 * multf; p = p + one; - + SIMD_mask mzero = (m == SIMD_set((int)0)); p = SIMD_set(p, mzero, one + tcos_shift); df1 = SIMD_set(df1, mzero, szero); @@ -747,41 +739,41 @@ void DihedralCharmmIntel::eval(const int vflag, SIMD_flt_t f3z = -sz2 - f4z; SIMD_flt_t qdeng; - if (EVFLAG) { - SIMD_flt_t ev_pre; - if (NEWTON_BOND) ev_pre = one; - else { - ev_pre = szero; - const SIMD_flt_t quarter = SIMD_set((flt_t)0.25); - ev_pre = SIMD_add(ev_pre, i1 < simd_nlocals4, ev_pre, quarter); - ev_pre = SIMD_add(ev_pre, i2 < simd_nlocals4, ev_pre, quarter); - ev_pre = SIMD_add(ev_pre, i3 < simd_nlocals4, ev_pre, quarter); - ev_pre = SIMD_add(ev_pre, i4 < simd_nlocals4, ev_pre, quarter); - } - SIMD_zero_masked(nmask, ev_pre); - if (EFLAG) { - const SIMD_flt_t deng = tk * p; - sedihedral = SIMD_ev_add(sedihedral, ev_pre * deng); - if (eatom) { - qdeng = deng * SIMD_set((flt_t)0.25); - SIMD_mask newton_mask; - if (NEWTON_BOND) newton_mask = nmask; - if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i2, simd_nlocals4); - SIMD_flt_t ieng = qdeng; - SIMD_jeng_update(newton_mask, featom, i2, ieng); - ieng = qdeng; - if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i3, simd_nlocals4); - SIMD_jeng_update(newton_mask, featom, i3, ieng); - } - } - if (vflag) { + if (EFLAG || VFLAG) { + SIMD_flt_t ev_pre; + if (NEWTON_BOND) ev_pre = one; + else { + ev_pre = szero; + const SIMD_flt_t quarter = SIMD_set((flt_t)0.25); + ev_pre = SIMD_add(ev_pre, i1 < simd_nlocals4, ev_pre, quarter); + ev_pre = SIMD_add(ev_pre, i2 < simd_nlocals4, ev_pre, quarter); + ev_pre = SIMD_add(ev_pre, i3 < simd_nlocals4, ev_pre, quarter); + ev_pre = SIMD_add(ev_pre, i4 < simd_nlocals4, ev_pre, quarter); + } + SIMD_zero_masked(nmask, ev_pre); + if (EFLAG) { + const SIMD_flt_t deng = tk * p; + sedihedral = SIMD_ev_add(sedihedral, ev_pre * deng); + if (eatom) { + qdeng = deng * SIMD_set((flt_t)0.25); + SIMD_mask newton_mask; + if (NEWTON_BOND) newton_mask = nmask; + if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i2, simd_nlocals4); + SIMD_flt_t ieng = qdeng; + SIMD_jeng_update(newton_mask, featom, i2, ieng); + ieng = qdeng; + if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i3, simd_nlocals4); + SIMD_jeng_update(newton_mask, featom, i3, ieng); + } + } + if (VFLAG && vflag) { sv0 = SIMD_ev_add(sv0, ev_pre*(vb1x*f1x-vb2xm*f3x+(vb3x-vb2xm)*f4x)); - sv1 = SIMD_ev_add(sv1, ev_pre*(vb1y*f1y-vb2ym*f3y+(vb3y-vb2ym)*f4y)); - sv2 = SIMD_ev_add(sv2, ev_pre*(vb1z*f1z-vb2zm*f3z+(vb3z-vb2zm)*f4z)); - sv3 = SIMD_ev_add(sv3, ev_pre*(vb1x*f1y-vb2xm*f3y+(vb3x-vb2xm)*f4y)); - sv4 = SIMD_ev_add(sv4, ev_pre*(vb1x*f1z-vb2xm*f3z+(vb3x-vb2xm)*f4z)); - sv5 = SIMD_ev_add(sv5, ev_pre*(vb1y*f1z-vb2ym*f3z+(vb3y-vb2ym)*f4z)); - } + sv1 = SIMD_ev_add(sv1, ev_pre*(vb1y*f1y-vb2ym*f3y+(vb3y-vb2ym)*f4y)); + sv2 = SIMD_ev_add(sv2, ev_pre*(vb1z*f1z-vb2zm*f3z+(vb3z-vb2zm)*f4z)); + sv3 = SIMD_ev_add(sv3, ev_pre*(vb1x*f1y-vb2xm*f3y+(vb3x-vb2xm)*f4y)); + sv4 = SIMD_ev_add(sv4, ev_pre*(vb1x*f1z-vb2xm*f3z+(vb3x-vb2xm)*f4z)); + sv5 = SIMD_ev_add(sv5, ev_pre*(vb1y*f1z-vb2ym*f3z+(vb3y-vb2ym)*f4z)); + } } SIMD_mask newton_mask; @@ -816,28 +808,28 @@ void DihedralCharmmIntel::eval(const int vflag, f4y = f4y - dely * fpair; f4z = f4z - delz * fpair; - if (EVFLAG) { - SIMD_flt_t ev_pre; - if (NEWTON_BOND) ev_pre = one; - else { - ev_pre = szero; + if (EFLAG || VFLAG) { + SIMD_flt_t ev_pre; + if (NEWTON_BOND) ev_pre = one; + else { + ev_pre = szero; const SIMD_flt_t half = SIMD_set((flt_t)0.5); ev_pre = SIMD_add(ev_pre, i1 < simd_nlocals4,ev_pre,half); ev_pre = SIMD_add(ev_pre, i4 < simd_nlocals4,ev_pre,half); - } - SIMD_zero_masked(nmask, ev_pre); - - if (EFLAG) { - const SIMD_flt_t ecoul = tweight * forcecoul; - const SIMD_flt_t lj3 = SIMD_gather(nmask, plj3, ijtype); - const SIMD_flt_t lj4 = SIMD_gather(nmask, plj4, ijtype); - SIMD_flt_t evdwl = tweight * r6inv * (lj3 * r6inv - lj4); - secoul = SIMD_ev_add(secoul, ev_pre * ecoul); - sevdwl = SIMD_ev_add(sevdwl, ev_pre * evdwl); - if (eatom) { - const SIMD_flt_t half = SIMD_set((flt_t)0.5); - evdwl = evdwl * half; - evdwl = evdwl + half * ecoul + qdeng; + } + SIMD_zero_masked(nmask, ev_pre); + + if (EFLAG) { + const SIMD_flt_t ecoul = tweight * forcecoul; + const SIMD_flt_t lj3 = SIMD_gather(nmask, plj3, ijtype); + const SIMD_flt_t lj4 = SIMD_gather(nmask, plj4, ijtype); + SIMD_flt_t evdwl = tweight * r6inv * (lj3 * r6inv - lj4); + secoul = SIMD_ev_add(secoul, ev_pre * ecoul); + sevdwl = SIMD_ev_add(sevdwl, ev_pre * evdwl); + if (eatom) { + const SIMD_flt_t half = SIMD_set((flt_t)0.5); + evdwl = evdwl * half; + evdwl = evdwl + half * ecoul + qdeng; if (NEWTON_BOND) newton_mask = nmask; if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i1, simd_nlocals4); @@ -846,16 +838,16 @@ void DihedralCharmmIntel::eval(const int vflag, ieng = evdwl; if (!NEWTON_BOND) newton_mask = SIMD_lt(nmask, i4, simd_nlocals4); SIMD_jeng_update(newton_mask, featom, i4, ieng); - } - } - if (vflag) { + } + } + if (VFLAG && vflag) { spv0 = SIMD_ev_add(spv0, ev_pre * delx * delx * fpair); - spv1 = SIMD_ev_add(spv1, ev_pre * dely * dely * fpair); - spv2 = SIMD_ev_add(spv2, ev_pre * delz * delz * fpair); - spv3 = SIMD_ev_add(spv3, ev_pre * delx * dely * fpair); - spv4 = SIMD_ev_add(spv4, ev_pre * delx * delz * fpair); - spv5 = SIMD_ev_add(spv5, ev_pre * dely * delz * fpair); - } + spv1 = SIMD_ev_add(spv1, ev_pre * dely * dely * fpair); + spv2 = SIMD_ev_add(spv2, ev_pre * delz * delz * fpair); + spv3 = SIMD_ev_add(spv3, ev_pre * delx * dely * fpair); + spv4 = SIMD_ev_add(spv4, ev_pre * delx * delz * fpair); + spv5 = SIMD_ev_add(spv5, ev_pre * dely * delz * fpair); + } } if (NEWTON_BOND) newton_mask = nmask; @@ -865,45 +857,41 @@ void DihedralCharmmIntel::eval(const int vflag, SIMD_safe_jforce(newton_mask, pforce, i4, f4x, f4y, f4z); } // for n - if (EVFLAG) { - if (EFLAG) { - oedihedral += SIMD_sum(sedihedral); - oecoul += SIMD_sum(secoul); - oevdwl += SIMD_sum(sevdwl); - } - if (vflag) { - ov0 += SIMD_sum(sv0); - ov1 += SIMD_sum(sv1); - ov2 += SIMD_sum(sv2); - ov3 += SIMD_sum(sv3); - ov4 += SIMD_sum(sv4); - ov5 += SIMD_sum(sv5); - opv0 += SIMD_sum(spv0); - opv1 += SIMD_sum(spv1); - opv2 += SIMD_sum(spv2); - opv3 += SIMD_sum(spv3); - opv4 += SIMD_sum(spv4); - opv5 += SIMD_sum(spv5); - } - } - } // omp parallel - - if (EVFLAG) { if (EFLAG) { - energy += oedihedral; - force->pair->eng_vdwl += oevdwl; - force->pair->eng_coul += oecoul; + oedihedral += SIMD_sum(sedihedral); + oecoul += SIMD_sum(secoul); + oevdwl += SIMD_sum(sevdwl); } - if (vflag) { - virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; - virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; - force->pair->virial[0] += opv0; - force->pair->virial[1] += opv1; - force->pair->virial[2] += opv2; - force->pair->virial[3] += opv3; - force->pair->virial[4] += opv4; - force->pair->virial[5] += opv5; + if (VFLAG && vflag) { + ov0 += SIMD_sum(sv0); + ov1 += SIMD_sum(sv1); + ov2 += SIMD_sum(sv2); + ov3 += SIMD_sum(sv3); + ov4 += SIMD_sum(sv4); + ov5 += SIMD_sum(sv5); + opv0 += SIMD_sum(spv0); + opv1 += SIMD_sum(spv1); + opv2 += SIMD_sum(spv2); + opv3 += SIMD_sum(spv3); + opv4 += SIMD_sum(spv4); + opv5 += SIMD_sum(spv5); } + } // omp parallel + + if (EFLAG) { + energy += oedihedral; + force->pair->eng_vdwl += oevdwl; + force->pair->eng_coul += oecoul; + } + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; + force->pair->virial[0] += opv0; + force->pair->virial[1] += opv1; + force->pair->virial[2] += opv2; + force->pair->virial[3] += opv3; + force->pair->virial[4] += opv4; + force->pair->virial[5] += opv5; } fix->set_reduce_flag(); @@ -945,7 +933,7 @@ void DihedralCharmmIntel::init_style() template <class flt_t, class acc_t> void DihedralCharmmIntel::pack_force_const(ForceConst<flt_t> &fc, - IntelBuffers<flt_t,acc_t> *buffers) + IntelBuffers<flt_t,acc_t> *buffers) { const int tp1 = atom->ntypes + 1; @@ -953,12 +941,14 @@ void DihedralCharmmIntel::pack_force_const(ForceConst<flt_t> &fc, fc.set_ntypes(tp1,bp1,memory); buffers->set_ntypes(tp1); - for (int i = 0; i < tp1; i++) { - for (int j = 0; j < tp1; j++) { - fc.ljp[i][j].lj1 = lj14_1[i][j]; - fc.ljp[i][j].lj2 = lj14_2[i][j]; - fc.ljp[i][j].lj3 = lj14_3[i][j]; - fc.ljp[i][j].lj4 = lj14_4[i][j]; + if (weightflag) { + for (int i = 0; i < tp1; i++) { + for (int j = 0; j < tp1; j++) { + fc.ljp[i][j].lj1 = lj14_1[i][j]; + fc.ljp[i][j].lj2 = lj14_2[i][j]; + fc.ljp[i][j].lj3 = lj14_3[i][j]; + fc.ljp[i][j].lj4 = lj14_4[i][j]; + } } } @@ -975,8 +965,8 @@ void DihedralCharmmIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void DihedralCharmmIntel::ForceConst<flt_t>::set_ntypes(const int npairtypes, - const int nbondtypes, - Memory *memory) { + const int nbondtypes, + Memory *memory) { if (npairtypes != _npairtypes) { if (_npairtypes > 0) _memory->destroy(ljp); @@ -989,7 +979,7 @@ void DihedralCharmmIntel::ForceConst<flt_t>::set_ntypes(const int npairtypes, _memory->destroy(bp); _memory->destroy(weight); } - + if (nbondtypes > 0) { _memory->create(bp,nbondtypes,"dihedralcharmmintel.bp"); _memory->create(weight,nbondtypes,"dihedralcharmmintel.weight"); diff --git a/src/USER-INTEL/dihedral_charmm_intel.h b/src/USER-INTEL/dihedral_charmm_intel.h index 292faea9f961b8bcc3a4a98b3949d0fe923a1a68..d80b32c8ac3a731e9b4df20b012315d89b950e88 100644 --- a/src/USER-INTEL/dihedral_charmm_intel.h +++ b/src/USER-INTEL/dihedral_charmm_intel.h @@ -44,8 +44,8 @@ class DihedralCharmmIntel : public DihedralCharmm { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> - void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc); + void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t, acc_t> *buffers); @@ -58,7 +58,7 @@ class DihedralCharmmIntel : public DihedralCharmm { class ForceConst { public: typedef struct { flt_t lj1, lj2, lj3, lj4; } fc_packed1; - typedef struct { flt_t cos_shift, sin_shift, k; + typedef struct { flt_t cos_shift, sin_shift, k; int multiplicity; } fc_packed3; fc_packed1 **ljp; diff --git a/src/USER-INTEL/dihedral_harmonic_intel.cpp b/src/USER-INTEL/dihedral_harmonic_intel.cpp index 03ab152f49403146c6be49d19a8b4bb794d16d95..196b024fa62d9ed9e192ea9f4b7395b7173c206c 100644 --- a/src/USER-INTEL/dihedral_harmonic_intel.cpp +++ b/src/USER-INTEL/dihedral_harmonic_intel.cpp @@ -69,24 +69,24 @@ void DihedralHarmonicIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void DihedralHarmonicIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = 0; if (evflag) { - if (eflag) { + if (vflag && !eflag) { if (force->newton_bond) - eval<1,1,1>(vflag, buffers, fc); + eval<0,1,1>(vflag, buffers, fc); else - eval<1,1,0>(vflag, buffers, fc); + eval<0,1,0>(vflag, buffers, fc); } else { if (force->newton_bond) - eval<1,0,1>(vflag, buffers, fc); + eval<1,1,1>(vflag, buffers, fc); else - eval<1,0,0>(vflag, buffers, fc); + eval<1,1,0>(vflag, buffers, fc); } } else { if (force->newton_bond) @@ -96,10 +96,10 @@ void DihedralHarmonicIntel::compute(int eflag, int vflag, } } -template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> -void DihedralHarmonicIntel::eval(const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void DihedralHarmonicIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { const int inum = neighbor->ndihedrallist; @@ -120,40 +120,42 @@ void DihedralHarmonicIntel::eval(const int vflag, const int nthreads = tc; acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - if (EFLAG) - oedihedral = (acc_t)0.0; - if (vflag) { - ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - } + if (EFLAG) oedihedral = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; } #if defined(_OPENMP) #pragma omp parallel default(none) \ - shared(f_start,f_stride,fc) \ + shared(f_start,f_stride,fc) \ reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int nfrom, nto, tid; + int nfrom, npl, nto, tid; + #ifdef LMP_INTEL_USE_SIMDOFF IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + #else + IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); + #endif FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); - const int5_t * _noalias const dihedrallist = + const int5_t * _noalias const dihedrallist = (int5_t *) neighbor->dihedrallist[0]; + #ifdef LMP_INTEL_USE_SIMDOFF acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5; - if (EVFLAG) { - if (EFLAG) - sedihedral = (acc_t)0.0; - if (vflag) { - sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; - } + if (EFLAG) sedihedral = (acc_t)0.0; + if (VFLAG && vflag) { + sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; } - - for (int n = nfrom; n < nto; n++) { + #pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5) + for (int n = nfrom; n < nto; n ++) { + #else + for (int n = nfrom; n < nto; n += npl) { + #endif const int i1 = dihedrallist[n].a; const int i2 = dihedrallist[n].b; const int i3 = dihedrallist[n].c; @@ -173,7 +175,7 @@ void DihedralHarmonicIntel::eval(const int vflag, const flt_t vb2zm = x[i2].z - x[i3].z; // 3rd bond - + const flt_t vb3x = x[i4].x - x[i3].x; const flt_t vb3y = x[i4].y - x[i3].y; const flt_t vb3z = x[i4].z - x[i3].z; @@ -203,27 +205,29 @@ void DihedralHarmonicIntel::eval(const int vflag, const flt_t s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z); // error check + #ifndef LMP_INTEL_USE_SIMDOFF if (c > PTOLERANCE || c < MTOLERANCE) { - int me = comm->me; - - if (screen) { - char str[128]; - sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " " - TAGINT_FORMAT " " TAGINT_FORMAT " " - TAGINT_FORMAT " " TAGINT_FORMAT, - me,tid,update->ntimestep, - atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); - error->warning(FLERR,str,0); - fprintf(screen," 1st atom: %d %g %g %g\n", - me,x[i1].x,x[i1].y,x[i1].z); - fprintf(screen," 2nd atom: %d %g %g %g\n", - me,x[i2].x,x[i2].y,x[i2].z); - fprintf(screen," 3rd atom: %d %g %g %g\n", - me,x[i3].x,x[i3].y,x[i3].z); - fprintf(screen," 4th atom: %d %g %g %g\n", - me,x[i4].x,x[i4].y,x[i4].z); - } + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " " + TAGINT_FORMAT " " TAGINT_FORMAT " " + TAGINT_FORMAT " " TAGINT_FORMAT, + me,tid,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1].x,x[i1].y,x[i1].z); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2].x,x[i2].y,x[i2].z); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3].x,x[i3].y,x[i3].z); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4].x,x[i4].y,x[i4].z); + } } + #endif if (c > (flt_t)1.0) c = (flt_t)1.0; if (c < (flt_t)-1.0) c = (flt_t)-1.0; @@ -238,19 +242,19 @@ void DihedralHarmonicIntel::eval(const int vflag, ddf1 = df1 = (flt_t)0.0; for (int i = 0; i < m; i++) { - ddf1 = p*c - df1*s; - df1 = p*s + df1*c; - p = ddf1; + ddf1 = p*c - df1*s; + df1 = p*s + df1*c; + p = ddf1; } p = p*tcos_shift + df1*tsin_shift; df1 = df1*tcos_shift - ddf1*tsin_shift; df1 *= -m; p += (flt_t)1.0; - + if (m == 0) { - p = (flt_t)1.0 + tcos_shift; - df1 = (flt_t)0.0; + p = (flt_t)1.0 + tcos_shift; + df1 = (flt_t)0.0; } const flt_t fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm; @@ -292,56 +296,66 @@ void DihedralHarmonicIntel::eval(const int vflag, const flt_t f3y = -sy2 - f4y; const flt_t f3z = -sz2 - f4z; - if (EVFLAG) { - flt_t deng; - if (EFLAG) deng = tk * p; - IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, deng, i1, i2, i3, i4, f1x, - f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, vb1x, - vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, vb3y, - vb3z, sedihedral, f, NEWTON_BOND, nlocal, - sv0, sv1, sv2, sv3, sv4, sv5); + if (EFLAG || VFLAG) { + flt_t deng; + if (EFLAG) deng = tk * p; + #ifdef LMP_INTEL_USE_SIMDOFF + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, deng, i1, i2, i3, i4, + f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, + vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, + vb3y, vb3z, sedihedral, f, NEWTON_BOND, nlocal, + sv0, sv1, sv2, sv3, sv4, sv5); + #else + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, deng, i1, i2, i3, i4, + f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, + vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, + vb3y, vb3z, oedihedral, f, NEWTON_BOND, nlocal, + ov0, ov1, ov2, ov3, ov4, ov5); + #endif } + #ifdef LMP_INTEL_USE_SIMDOFF + #pragma simdoff + #endif { if (NEWTON_BOND || i1 < nlocal) { - f[i1].x += f1x; - f[i1].y += f1y; - f[i1].z += f1z; + f[i1].x += f1x; + f[i1].y += f1y; + f[i1].z += f1z; } if (NEWTON_BOND || i2 < nlocal) { - f[i2].x += f2x; - f[i2].y += f2y; - f[i2].z += f2z; + f[i2].x += f2x; + f[i2].y += f2y; + f[i2].z += f2z; } if (NEWTON_BOND || i3 < nlocal) { - f[i3].x += f3x; - f[i3].y += f3y; - f[i3].z += f3z; + f[i3].x += f3x; + f[i3].y += f3y; + f[i3].z += f3z; } if (NEWTON_BOND || i4 < nlocal) { - f[i4].x += f4x; - f[i4].y += f4y; - f[i4].z += f4z; + f[i4].x += f4x; + f[i4].y += f4y; + f[i4].z += f4z; } } } // for n - if (EVFLAG) { - if (EFLAG) oedihedral += sedihedral; - if (vflag) { - ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5; - } + #ifdef LMP_INTEL_USE_SIMDOFF + if (EFLAG) oedihedral += sedihedral; + if (VFLAG && vflag) { + ov0 += sv0; ov1 += sv1; ov2 += sv2; + ov3 += sv3; ov4 += sv4; ov5 += sv5; } + #endif } // omp parallel - if (EVFLAG) { - if (EFLAG) energy += oedihedral; - if (vflag) { - virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; - virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; - } + if (EFLAG) energy += oedihedral; + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; } fix->set_reduce_flag(); @@ -381,7 +395,7 @@ void DihedralHarmonicIntel::init_style() template <class flt_t, class acc_t> void DihedralHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc, - IntelBuffers<flt_t,acc_t> *buffers) + IntelBuffers<flt_t,acc_t> *buffers) { const int bp1 = atom->ndihedraltypes + 1; fc.set_ntypes(bp1,memory); @@ -398,11 +412,11 @@ void DihedralHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void DihedralHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes, - Memory *memory) { + Memory *memory) { if (nbondtypes != _nbondtypes) { if (_nbondtypes > 0) _memory->destroy(bp); - + if (nbondtypes > 0) _memory->create(bp,nbondtypes,"dihedralcharmmintel.bp"); } diff --git a/src/USER-INTEL/dihedral_harmonic_intel.h b/src/USER-INTEL/dihedral_harmonic_intel.h index 41e3d20540875fd68c039ceef339491051076965..0a9cfaa042df4cfb33359bc5a8a99c58d8e6e9a9 100644 --- a/src/USER-INTEL/dihedral_harmonic_intel.h +++ b/src/USER-INTEL/dihedral_harmonic_intel.h @@ -44,8 +44,8 @@ class DihedralHarmonicIntel : public DihedralHarmonic { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> - void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc); + void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t, acc_t> *buffers); @@ -57,7 +57,7 @@ class DihedralHarmonicIntel : public DihedralHarmonic { template <class flt_t> class ForceConst { public: - typedef struct { flt_t cos_shift, sin_shift, k; + typedef struct { flt_t cos_shift, sin_shift, k; int multiplicity; } fc_packed1; fc_packed1 *bp; diff --git a/src/USER-INTEL/dihedral_opls_intel.cpp b/src/USER-INTEL/dihedral_opls_intel.cpp index bfd5a539569df52a623adb8f98493b0db552798e..1abeba1d5ea5d71a6ed27033502c233a183c8a71 100644 --- a/src/USER-INTEL/dihedral_opls_intel.cpp +++ b/src/USER-INTEL/dihedral_opls_intel.cpp @@ -73,24 +73,24 @@ void DihedralOPLSIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void DihedralOPLSIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) { ev_setup(eflag,vflag); } else evflag = 0; if (evflag) { - if (eflag) { + if (vflag && !eflag) { if (force->newton_bond) - eval<1,1,1>(vflag, buffers, fc); + eval<0,1,1>(vflag, buffers, fc); else - eval<1,1,0>(vflag, buffers, fc); + eval<0,1,0>(vflag, buffers, fc); } else { if (force->newton_bond) - eval<1,0,1>(vflag, buffers, fc); + eval<1,1,1>(vflag, buffers, fc); else - eval<1,0,0>(vflag, buffers, fc); + eval<1,1,0>(vflag, buffers, fc); } } else { if (force->newton_bond) @@ -100,10 +100,10 @@ void DihedralOPLSIntel::compute(int eflag, int vflag, } } -template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> -void DihedralOPLSIntel::eval(const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void DihedralOPLSIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { const int inum = neighbor->ndihedrallist; @@ -124,40 +124,42 @@ void DihedralOPLSIntel::eval(const int vflag, const int nthreads = tc; acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - if (EFLAG) - oedihedral = (acc_t)0.0; - if (vflag) { - ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - } + if (EFLAG) oedihedral = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; } #if defined(_OPENMP) #pragma omp parallel default(none) \ - shared(f_start,f_stride,fc) \ + shared(f_start,f_stride,fc) \ reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int nfrom, nto, tid; + int nfrom, npl, nto, tid; + #ifdef LMP_INTEL_USE_SIMDOFF IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + #else + IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); + #endif FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); - const int5_t * _noalias const dihedrallist = + const int5_t * _noalias const dihedrallist = (int5_t *) neighbor->dihedrallist[0]; + #ifdef LMP_INTEL_USE_SIMDOFF acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5; - if (EVFLAG) { - if (EFLAG) - sedihedral = (acc_t)0.0; - if (vflag) { - sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; - } + if (EFLAG) sedihedral = (acc_t)0.0; + if (VFLAG && vflag) { + sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; } - - for (int n = nfrom; n < nto; n++) { + #pragma simd reduction(+:sedihedral, sv0, sv1, sv2, sv3, sv4, sv5) + for (int n = nfrom; n < nto; n ++) { + #else + for (int n = nfrom; n < nto; n += npl) { + #endif const int i1 = dihedrallist[n].a; const int i2 = dihedrallist[n].b; const int i3 = dihedrallist[n].c; @@ -177,7 +179,7 @@ void DihedralOPLSIntel::eval(const int vflag, const flt_t vb2zm = x[i2].z - x[i3].z; // 3rd bond - + const flt_t vb3x = x[i4].x - x[i3].x; const flt_t vb3y = x[i4].y - x[i3].y; const flt_t vb3z = x[i4].z - x[i3].z; @@ -207,7 +209,7 @@ void DihedralOPLSIntel::eval(const int vflag, const flt_t c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3; flt_t ctmp = -vb1x*vb2xm - vb1y*vb2ym - vb1z*vb2zm; - const flt_t r12c1 = rb1 * rb2; + const flt_t r12c1 = rb1 * rb2; const flt_t c1mag = ctmp * r12c1; ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z; @@ -236,27 +238,29 @@ void DihedralOPLSIntel::eval(const int vflag, const flt_t dx = (cx*vb3x + cy*vb3y + cz*vb3z)*cmag*rb3; // error check + #ifndef LMP_INTEL_USE_SIMDOFF if (c > PTOLERANCE || c < MTOLERANCE) { - int me = comm->me; - - if (screen) { - char str[128]; - sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " " - TAGINT_FORMAT " " TAGINT_FORMAT " " - TAGINT_FORMAT " " TAGINT_FORMAT, - me,tid,update->ntimestep, - atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); - error->warning(FLERR,str,0); - fprintf(screen," 1st atom: %d %g %g %g\n", - me,x[i1].x,x[i1].y,x[i1].z); - fprintf(screen," 2nd atom: %d %g %g %g\n", - me,x[i2].x,x[i2].y,x[i2].z); - fprintf(screen," 3rd atom: %d %g %g %g\n", - me,x[i3].x,x[i3].y,x[i3].z); - fprintf(screen," 4th atom: %d %g %g %g\n", - me,x[i4].x,x[i4].y,x[i4].z); - } + int me = comm->me; + + if (screen) { + char str[128]; + sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " " + TAGINT_FORMAT " " TAGINT_FORMAT " " + TAGINT_FORMAT " " TAGINT_FORMAT, + me,tid,update->ntimestep, + atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", + me,x[i1].x,x[i1].y,x[i1].z); + fprintf(screen," 2nd atom: %d %g %g %g\n", + me,x[i2].x,x[i2].y,x[i2].z); + fprintf(screen," 3rd atom: %d %g %g %g\n", + me,x[i3].x,x[i3].y,x[i3].z); + fprintf(screen," 4th atom: %d %g %g %g\n", + me,x[i4].x,x[i4].y,x[i4].z); + } } + #endif if (c > (flt_t)1.0) c = (flt_t)1.0; if (c < (flt_t)-1.0) c = (flt_t)-1.0; @@ -279,14 +283,14 @@ void DihedralOPLSIntel::eval(const int vflag, const flt_t sin_4phim = (flt_t)2.0 * cos_2phi * sin_2phim; flt_t p, pd; - p = fc.bp[type].k1*((flt_t)1.0 + c) + - fc.bp[type].k2*((flt_t)1.0 - cos_2phi) + - fc.bp[type].k3*((flt_t)1.0 + cos_3phi) + - fc.bp[type].k4*((flt_t)1.0 - cos_4phi) ; - pd = fc.bp[type].k1 - - (flt_t)2.0 * fc.bp[type].k2 * sin_2phim + - (flt_t)3.0 * fc.bp[type].k3 * sin_3phim - - (flt_t)4.0 * fc.bp[type].k4 * sin_4phim; + p = fc.bp[type].k1*((flt_t)1.0 + c) + + fc.bp[type].k2*((flt_t)1.0 - cos_2phi) + + fc.bp[type].k3*((flt_t)1.0 + cos_3phi) + + fc.bp[type].k4*((flt_t)1.0 - cos_4phi) ; + pd = fc.bp[type].k1 - + (flt_t)2.0 * fc.bp[type].k2 * sin_2phim + + (flt_t)3.0 * fc.bp[type].k3 * sin_3phim - + (flt_t)4.0 * fc.bp[type].k4 * sin_4phim; flt_t edihed; if (EFLAG) edihed = p; @@ -321,54 +325,64 @@ void DihedralOPLSIntel::eval(const int vflag, const flt_t f3y = sy2 - f4y; const flt_t f3z = sz2 - f4z; - if (EVFLAG) { - IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, edihed, i1, i2, i3, i4, f1x, - f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, vb1x, - vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, vb3y, - vb3z, sedihedral, f, NEWTON_BOND, nlocal, - sv0, sv1, sv2, sv3, sv4, sv5); + if (EFLAG || VFLAG) { + #ifdef LMP_INTEL_USE_SIMDOFF + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, edihed, i1, i2, i3, + i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, + vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, + vb3y, vb3z, sedihedral, f, NEWTON_BOND, nlocal, + sv0, sv1, sv2, sv3, sv4, sv5); + #else + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, edihed, i1, i2, i3, + i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, + vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, + vb3y, vb3z, oedihedral, f, NEWTON_BOND, nlocal, + ov0, ov1, ov2, ov3, ov4, ov5); + #endif } + #ifdef LMP_INTEL_USE_SIMDOFF + #pragma simdoff + #endif { if (NEWTON_BOND || i1 < nlocal) { - f[i1].x += f1x; - f[i1].y += f1y; - f[i1].z += f1z; + f[i1].x += f1x; + f[i1].y += f1y; + f[i1].z += f1z; } if (NEWTON_BOND || i2 < nlocal) { - f[i2].x += f2x; - f[i2].y += f2y; - f[i2].z += f2z; + f[i2].x += f2x; + f[i2].y += f2y; + f[i2].z += f2z; } if (NEWTON_BOND || i3 < nlocal) { - f[i3].x += f3x; - f[i3].y += f3y; - f[i3].z += f3z; + f[i3].x += f3x; + f[i3].y += f3y; + f[i3].z += f3z; } if (NEWTON_BOND || i4 < nlocal) { - f[i4].x += f4x; - f[i4].y += f4y; - f[i4].z += f4z; + f[i4].x += f4x; + f[i4].y += f4y; + f[i4].z += f4z; } } } // for n - if (EVFLAG) { - if (EFLAG) oedihedral += sedihedral; - if (vflag) { - ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5; - } + #ifdef LMP_INTEL_USE_SIMDOFF + if (EFLAG) oedihedral += sedihedral; + if (VFLAG && vflag) { + ov0 += sv0; ov1 += sv1; ov2 += sv2; + ov3 += sv3; ov4 += sv4; ov5 += sv5; } + #endif } // omp parallel - if (EVFLAG) { - if (EFLAG) energy += oedihedral; - if (vflag) { - virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; - virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; - } + if (EFLAG) energy += oedihedral; + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; } fix->set_reduce_flag(); @@ -408,7 +422,7 @@ void DihedralOPLSIntel::init_style() template <class flt_t, class acc_t> void DihedralOPLSIntel::pack_force_const(ForceConst<flt_t> &fc, - IntelBuffers<flt_t,acc_t> *buffers) + IntelBuffers<flt_t,acc_t> *buffers) { const int bp1 = atom->ndihedraltypes + 1; fc.set_ntypes(bp1,memory); @@ -425,11 +439,11 @@ void DihedralOPLSIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void DihedralOPLSIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes, - Memory *memory) { + Memory *memory) { if (nbondtypes != _nbondtypes) { if (_nbondtypes > 0) _memory->destroy(bp); - + if (nbondtypes > 0) _memory->create(bp,nbondtypes,"dihedralcharmmintel.bp"); } diff --git a/src/USER-INTEL/dihedral_opls_intel.h b/src/USER-INTEL/dihedral_opls_intel.h index ea0930f4b8b0344bfa6f93cfceca23c8a45ddc2c..1080bfa6c3045d62b6ee2dda7b8390fd4c948d28 100644 --- a/src/USER-INTEL/dihedral_opls_intel.h +++ b/src/USER-INTEL/dihedral_opls_intel.h @@ -44,8 +44,8 @@ class DihedralOPLSIntel : public DihedralOPLS { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> - void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc); + void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t, acc_t> *buffers); diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp index edd33eb72bde86e57b746c436c51dd0f07c2394f..b06f76c90ddf993a74011180e07bd61f4dcc634a 100644 --- a/src/USER-INTEL/fix_intel.cpp +++ b/src/USER-INTEL/fix_intel.cpp @@ -61,6 +61,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) int ncops = force->inumeric(FLERR,arg[3]); _nbor_pack_width = 1; + _three_body_neighbor = 0; _precision_mode = PREC_MODE_MIXED; _offload_balance = -1.0; @@ -178,7 +179,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) _real_space_comm = MPI_COMM_WORLD; if (no_affinity == 0) if (set_host_affinity(nomp) != 0) - error->all(FLERR,"Could not set host affinity for offload tasks"); + error->all(FLERR,"Could not set host affinity for offload tasks"); } int max_offload_threads = 0, offload_cores = 0; @@ -263,7 +264,7 @@ FixIntel::~FixIntel() double *time2 = off_watch_neighbor(); int *overflow = get_off_overflow_flag(); if (_offload_balance != 0.0 && time1 != NULL && time2 != NULL && - overflow != NULL) { + overflow != NULL) { #pragma offload_transfer target(mic:_cop) \ nocopy(time1,time2,overflow:alloc_if(0) free_if(1)) } @@ -319,19 +320,25 @@ void FixIntel::init() if (strstr(hybrid->keywords[i], "/intel") != NULL) nstyles++; else - force->pair->no_virial_fdotr_compute = 1; + force->pair->no_virial_fdotr_compute = 1; } if (nstyles > 1) error->all(FLERR, - "Currently, cannot use more than one intel style with hybrid."); + "Currently, cannot use more than one intel style with hybrid."); check_neighbor_intel(); - if (_precision_mode == PREC_MODE_SINGLE) + int off_mode = 0; + if (_offload_balance != 0.0) off_mode = 1; + if (_precision_mode == PREC_MODE_SINGLE) { _single_buffers->zero_ev(); - else if (_precision_mode == PREC_MODE_MIXED) + _single_buffers->grow_ncache(off_mode,_nthreads); + } else if (_precision_mode == PREC_MODE_MIXED) { _mixed_buffers->zero_ev(); - else + _mixed_buffers->grow_ncache(off_mode,_nthreads); + } else { _double_buffers->zero_ev(); + _double_buffers->grow_ncache(off_mode,_nthreads); + } _need_reduce = 0; } @@ -342,13 +349,13 @@ void FixIntel::setup(int vflag) { if (neighbor->style != BIN) error->all(FLERR, - "Currently, neighbor style BIN must be used with Intel package."); + "Currently, neighbor style BIN must be used with Intel package."); if (neighbor->exclude_setting() != 0) error->all(FLERR, - "Currently, cannot use neigh_modify exclude with Intel package."); + "Currently, cannot use neigh_modify exclude with Intel package."); if (vflag_atom) error->all(FLERR, - "Cannot currently get per-atom virials with Intel package."); + "Cannot currently get per-atom virials with Intel package."); #ifdef _LMP_INTEL_OFFLOAD post_force(vflag); #endif @@ -367,8 +374,6 @@ void FixIntel::pair_init_check(const bool cdmessage) { #ifdef INTEL_VMASK atom->sortfreq = 1; - if (neighbor->binsizeflag && atom->userbinsize <= 0.0) - atom->userbinsize = neighbor->binsize_user; #endif _nbor_pack_width = 1; @@ -376,9 +381,8 @@ void FixIntel::pair_init_check(const bool cdmessage) #ifdef _LMP_INTEL_OFFLOAD if (_offload_balance != 0.0) atom->sortfreq = 1; - if (force->newton_pair == 0) - _offload_noghost = 0; - else if (_offload_ghost == 0) + _offload_noghost = 0; + if (force->newton_pair && _offload_ghost == 0) _offload_noghost = 1; set_offload_affinity(); @@ -388,7 +392,7 @@ void FixIntel::pair_init_check(const bool cdmessage) double *time2 = off_watch_neighbor(); int *overflow = get_off_overflow_flag(); if (_offload_balance !=0.0 && time1 != NULL && time2 != NULL && - overflow != NULL) { + overflow != NULL) { #pragma offload_transfer target(mic:_cop) \ nocopy(time1,time2:length(1) alloc_if(1) free_if(0)) \ in(overflow:length(5) alloc_if(1) free_if(0)) @@ -403,7 +407,7 @@ void FixIntel::pair_init_check(const bool cdmessage) error->warning(FLERR, "Unknown Intel Compiler Version\n"); #else if (__INTEL_COMPILER_BUILD_DATE != 20131008 && - __INTEL_COMPILER_BUILD_DATE < 20141023) + __INTEL_COMPILER_BUILD_DATE < 20141023) error->warning(FLERR, "Unsupported Intel Compiler."); #endif #if !defined(__INTEL_COMPILER) @@ -434,24 +438,24 @@ void FixIntel::pair_init_check(const bool cdmessage) if (comm->me == 0) { if (screen) { fprintf(screen, - "----------------------------------------------------------\n"); + "----------------------------------------------------------\n"); if (_offload_balance != 0.0) { fprintf(screen,"Using Intel Coprocessor with %d threads per core, ", - _offload_tpc); + _offload_tpc); fprintf(screen,"%d threads per task\n",_offload_threads); } else { - fprintf(screen,"Using Intel Package without Coprocessor.\n"); + fprintf(screen,"Using Intel Package without Coprocessor.\n"); } fprintf(screen,"Precision: %s\n",kmode); if (cdmessage) { - #ifdef LMP_USE_AVXCD - fprintf(screen,"AVX512 CD Optimizations: Enabled\n"); - #else - fprintf(screen,"AVX512 CD Optimizations: Disabled\n"); - #endif + #ifdef LMP_USE_AVXCD + fprintf(screen,"AVX512 CD Optimizations: Enabled\n"); + #else + fprintf(screen,"AVX512 CD Optimizations: Disabled\n"); + #endif } fprintf(screen, - "----------------------------------------------------------\n"); + "----------------------------------------------------------\n"); } } } @@ -460,7 +464,7 @@ void FixIntel::pair_init_check(const bool cdmessage) void FixIntel::bond_init_check() { - if (_offload_balance != 0.0 && atom->molecular && + if (_offload_balance != 0.0 && atom->molecular && force->newton_pair != force->newton_bond) error->all(FLERR, "USER-INTEL package requires same setting for newton bond and non-bond."); @@ -535,24 +539,24 @@ void FixIntel::pre_reverse(int eflag, int vflag) { if (_force_array_m != 0) { if (_need_reduce) { - reduce_results(_force_array_m); + reduce_results(&_force_array_m[0].x); _need_reduce = 0; } - add_results(_force_array_m, _ev_array_d, _results_eatom, _results_vatom, 0); + add_results(_force_array_m, _ev_array_d, _results_eatom, _results_vatom,0); _force_array_m = 0; } else if (_force_array_d != 0) { if (_need_reduce) { - reduce_results(_force_array_d); + reduce_results(&_force_array_d[0].x); _need_reduce = 0; } - add_results(_force_array_d, _ev_array_d, _results_eatom, _results_vatom, 0); + add_results(_force_array_d, _ev_array_d, _results_eatom, _results_vatom,0); _force_array_d = 0; } else if (_force_array_s != 0) { if (_need_reduce) { - reduce_results(_force_array_s); + reduce_results(&_force_array_s[0].x); _need_reduce = 0; } - add_results(_force_array_s, _ev_array_s, _results_eatom, _results_vatom, 0); + add_results(_force_array_s, _ev_array_s, _results_eatom, _results_vatom,0); _force_array_s = 0; } @@ -563,47 +567,56 @@ void FixIntel::pre_reverse(int eflag, int vflag) /* ---------------------------------------------------------------------- */ -template <class ft> -void FixIntel::reduce_results(ft * _noalias const f_start) +template <class acc_t> +void FixIntel::reduce_results(acc_t * _noalias const f_scalar) { int o_range, f_stride; if (force->newton_pair) o_range = atom->nlocal + atom->nghost; - else + else o_range = atom->nlocal; - IP_PRE_get_stride(f_stride, o_range, sizeof(ft), lmp->atom->torque); - - #if defined(_OPENMP) - #pragma omp parallel default(none) shared(o_range, f_stride) - #endif - { - int iifrom, iito, tid; - IP_PRE_omp_range_id_align(iifrom, iito, tid, o_range, _nthreads, - sizeof(ft)); - - int t_off = f_stride; - if (_results_eatom) { - for (int t = 1; t < _nthreads; t++) { - _use_simd_pragma("vector nontemporal") - _use_simd_pragma("novector") - for (int n = iifrom; n < iito; n++) { - f_start[n].x += f_start[n + t_off].x; - f_start[n].y += f_start[n + t_off].y; - f_start[n].z += f_start[n + t_off].z; - f_start[n].w += f_start[n + t_off].w; - } - t_off += f_stride; - } + IP_PRE_get_stride(f_stride, o_range, (sizeof(acc_t)*4), lmp->atom->torque); + + o_range *= 4; + const int f_stride4 = f_stride * 4; + + if (_nthreads <= INTEL_HTHREADS) { + acc_t *f_scalar2 = f_scalar + f_stride4; + if (_nthreads == 4) { + acc_t *f_scalar3 = f_scalar2 + f_stride4; + acc_t *f_scalar4 = f_scalar3 + f_stride4; + _use_simd_pragma("vector aligned") + _use_simd_pragma("simd") + for (int n = 0; n < o_range; n++) + f_scalar[n] += f_scalar2[n] + f_scalar3[n] + f_scalar4[n]; + } else if (_nthreads == 2) { + _use_simd_pragma("vector aligned") + _use_simd_pragma("simd") + for (int n = 0; n < o_range; n++) + f_scalar[n] += f_scalar2[n]; } else { + acc_t *f_scalar3 = f_scalar2 + f_stride4; + _use_simd_pragma("vector aligned") + _use_simd_pragma("simd") + for (int n = 0; n < o_range; n++) + f_scalar[n] += f_scalar2[n] + f_scalar3[n]; + } + } else { + #if defined(_OPENMP) + #pragma omp parallel + #endif + { + int iifrom, iito, tid; + IP_PRE_omp_range_id_align(iifrom, iito, tid, o_range, _nthreads, + sizeof(acc_t)); + + acc_t *f_scalar2 = f_scalar + f_stride4; for (int t = 1; t < _nthreads; t++) { - _use_simd_pragma("vector nontemporal") - _use_simd_pragma("novector") - for (int n = iifrom; n < iito; n++) { - f_start[n].x += f_start[n + t_off].x; - f_start[n].y += f_start[n + t_off].y; - f_start[n].z += f_start[n + t_off].z; - } - t_off += f_stride; + _use_simd_pragma("vector aligned") + _use_simd_pragma("simd") + for (int n = iifrom; n < iito; n++) + f_scalar[n] += f_scalar2[n]; + f_scalar2 += f_stride4; } } } @@ -635,46 +648,65 @@ template <class ft, class acc_t> void FixIntel::add_results(const ft * _noalias const f_in, const acc_t * _noalias const ev_global, const int eatom, const int vatom, - const int offload) { + const int offload) { start_watch(TIME_PACK); int f_length; #ifdef _LMP_INTEL_OFFLOAD if (_separate_buffers) { if (offload) { - add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal); if (force->newton_pair) { - const acc_t * _noalias const enull = 0; - int offset = _offload_nlocal; - if (atom->torque) offset *= 2; - add_oresults(f_in + offset, enull, eatom, vatom, - _offload_min_ghost, _offload_nghost); - } + add_oresults(f_in, ev_global, eatom, vatom, 0, _offload_nlocal); + const acc_t * _noalias const enull = 0; + int offset = _offload_nlocal; + if (atom->torque) offset *= 2; + add_oresults(f_in + offset, enull, eatom, vatom, + _offload_min_ghost, _offload_nghost); + } else + add_oresults(f_in, ev_global, eatom, vatom, 0, offload_end_pair()); } else { - add_oresults(f_in, ev_global, eatom, vatom, - _host_min_local, _host_used_local); if (force->newton_pair) { - const acc_t * _noalias const enull = 0; - int offset = _host_used_local; - if (atom->torque) offset *= 2; - add_oresults(f_in + offset, enull, eatom, - vatom, _host_min_ghost, _host_used_ghost); + add_oresults(f_in, ev_global, eatom, vatom, + _host_min_local, _host_used_local); + const acc_t * _noalias const enull = 0; + int offset = _host_used_local; + if (atom->torque) offset *= 2; + add_oresults(f_in + offset, enull, eatom, + vatom, _host_min_ghost, _host_used_ghost); + } else { + int start = host_start_pair(); + add_oresults(f_in, ev_global, eatom, vatom, start, atom->nlocal-start); } } stop_watch(TIME_PACK); return; } - if (force->newton_pair && (_offload_noghost == 0 || offload == 0)) - f_length = atom->nlocal + atom->nghost; - else - f_length = atom->nlocal; + int start; + if (offload) { + start = 0; + if (force->newton_pair) { + if (_offload_noghost == 0) + f_length = atom->nlocal + atom->nghost; + else + f_length = atom->nlocal; + } else + f_length = offload_end_pair(); + } else { + if (force->newton_pair) { + start = 0; + f_length = atom->nlocal + atom->nghost; + } else { + start = host_start_pair(); + f_length = atom->nlocal - start; + } + } + add_oresults(f_in, ev_global, eatom, vatom, start, f_length); #else if (force->newton_pair) f_length = atom->nlocal + atom->nghost; else f_length = atom->nlocal; - #endif - add_oresults(f_in, ev_global, eatom, vatom, 0, f_length); + #endif stop_watch(TIME_PACK); } @@ -682,9 +714,9 @@ void FixIntel::add_results(const ft * _noalias const f_in, template <class ft, class acc_t> void FixIntel::add_oresults(const ft * _noalias const f_in, - const acc_t * _noalias const ev_global, - const int eatom, const int vatom, - const int out_offset, const int nall) { + const acc_t * _noalias const ev_global, + const int eatom, const int vatom, + const int out_offset, const int nall) { lmp_ft * _noalias const f = (lmp_ft *) lmp->atom->f[0] + out_offset; if (atom->torque) { if (f_in[1].w) @@ -695,8 +727,11 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, "Sphere particles not yet supported for gayberne/intel"); } + int packthreads; + if (_nthreads > INTEL_HTHREADS) packthreads = _nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) + #pragma omp parallel if(packthreads > 1) #endif { #if defined(_OPENMP) @@ -705,16 +740,16 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, const int tid = 0; #endif int ifrom, ito; - IP_PRE_omp_range_align(ifrom, ito, tid, nall, _nthreads, sizeof(acc_t)); + IP_PRE_omp_range_align(ifrom, ito, tid, nall, packthreads, sizeof(acc_t)); if (atom->torque) { int ii = ifrom * 2; lmp_ft * _noalias const tor = (lmp_ft *) lmp->atom->torque[0] + - out_offset; + out_offset; if (eatom) { - double * _noalias const lmp_eatom = force->pair->eatom + out_offset; + double * _noalias const lmp_eatom = force->pair->eatom + out_offset; #if defined(LMP_SIMD_COMPILER) - #pragma novector - #endif + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[ii].x; f[i].y += f_in[ii].y; @@ -727,8 +762,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, } } else { #if defined(LMP_SIMD_COMPILER) - #pragma novector - #endif + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[ii].x; f[i].y += f_in[ii].y; @@ -741,10 +776,10 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, } } else { if (eatom) { - double * _noalias const lmp_eatom = force->pair->eatom + out_offset; + double * _noalias const lmp_eatom = force->pair->eatom + out_offset; #if defined(LMP_SIMD_COMPILER) - #pragma novector - #endif + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[i].x; f[i].y += f_in[i].y; @@ -753,8 +788,8 @@ void FixIntel::add_oresults(const ft * _noalias const f_in, } } else { #if defined(LMP_SIMD_COMPILER) - #pragma novector - #endif + #pragma novector + #endif for (int i = ifrom; i < ito; i++) { f[i].x += f_in[i].x; f[i].y += f_in[i].y; @@ -833,6 +868,11 @@ void FixIntel::add_off_results(const ft * _noalias const f_in, _offload_nlocal; } + if (atom->torque) + if (f_in[1].w < 0.0) + error->all(FLERR, "Bad matrix inversion in mldivide3"); + add_results(f_in, ev_global, _off_results_eatom, _off_results_vatom, 1); + // Load balance? if (_offload_balance < 0.0) { if (neighbor->ago == 0) @@ -860,10 +900,6 @@ void FixIntel::add_off_results(const ft * _noalias const f_in, stop_watch(TIME_IMBALANCE); #endif acc_timers(); - if (atom->torque) - if (f_in[1].w < 0.0) - error->all(FLERR, "Bad matrix inversion in mldivide3"); - add_results(f_in, ev_global, _off_results_eatom, _off_results_vatom, 1); } /* ---------------------------------------------------------------------- */ @@ -895,7 +931,7 @@ void FixIntel::output_timing_data() { balance_out[0] = _balance_pair; balance_out[1] = _balance_neighbor; MPI_Reduce(balance_out, balance_in, 2, MPI_DOUBLE, MPI_SUM, - 0, _real_space_comm); + 0, _real_space_comm); balance_in[0] /= size; balance_in[1] /= size; @@ -922,25 +958,25 @@ void FixIntel::output_timing_data() { balance_in[1]); fprintf(_tscreen, " Offload Pair Balance %f\n", balance_in[0]); - fprintf(_tscreen, " Offload Ghost Atoms "); - if (_offload_noghost) fprintf(_tscreen,"No\n"); - else fprintf(_tscreen,"Yes\n"); + fprintf(_tscreen, " Offload Ghost Atoms "); + if (_offload_noghost) fprintf(_tscreen,"No\n"); + else fprintf(_tscreen,"Yes\n"); #ifdef TIME_BALANCE fprintf(_tscreen, " Offload Imbalance Seconds %f\n", timers[TIME_IMBALANCE]); - fprintf(_tscreen, " Offload Min/Max Seconds "); - for (int i = 0; i < NUM_ITIMERS; i++) - fprintf(_tscreen, "[%f, %f] ",timers_min[i],timers_max[i]); - fprintf(_tscreen, "\n"); + fprintf(_tscreen, " Offload Min/Max Seconds "); + for (int i = 0; i < NUM_ITIMERS; i++) + fprintf(_tscreen, "[%f, %f] ",timers_min[i],timers_max[i]); + fprintf(_tscreen, "\n"); #endif - double ht = timers[TIME_HOST_NEIGHBOR] + timers[TIME_HOST_PAIR] + - timers[TIME_OFFLOAD_WAIT]; - double ct = timers[TIME_OFFLOAD_NEIGHBOR] + - timers[TIME_OFFLOAD_PAIR]; - double tt = MAX(ht,ct); - if (timers[TIME_OFFLOAD_LATENCY] / tt > 0.07 && _separate_coi == 0) - error->warning(FLERR, - "Leaving a core free can improve performance for offload"); + double ht = timers[TIME_HOST_NEIGHBOR] + timers[TIME_HOST_PAIR] + + timers[TIME_OFFLOAD_WAIT]; + double ct = timers[TIME_OFFLOAD_NEIGHBOR] + + timers[TIME_OFFLOAD_PAIR]; + double tt = MAX(ht,ct); + if (timers[TIME_OFFLOAD_LATENCY] / tt > 0.07 && _separate_coi == 0) + error->warning(FLERR, + "Leaving a core free can improve performance for offload"); } fprintf(_tscreen, "------------------------------------------------\n"); } @@ -963,14 +999,14 @@ int FixIntel::get_ppn(int &node_rank) { node_name[name_length] = '\0'; char *node_names = new char[MPI_MAX_PROCESSOR_NAME*nprocs]; MPI_Allgather(node_name, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, node_names, - MPI_MAX_PROCESSOR_NAME, MPI_CHAR, _real_space_comm); + MPI_MAX_PROCESSOR_NAME, MPI_CHAR, _real_space_comm); int ppn = 0; node_rank = 0; for (int i = 0; i < nprocs; i++) { if (strcmp(node_name, node_names + i * MPI_MAX_PROCESSOR_NAME) == 0) { ppn++; if (i < rank) - node_rank++; + node_rank++; } } @@ -1032,19 +1068,19 @@ void FixIntel::set_offload_affinity() kmp_create_affinity_mask(&mask); int proc = offload_threads * node_rank + tnum; #ifdef __AVX512F__ - proc = (proc / offload_tpc) + (proc % offload_tpc) * - ((offload_cores) / 4); + proc = (proc / offload_tpc) + (proc % offload_tpc) * + ((offload_cores) / 4); proc += 68; #else if (offload_affinity_balanced) - proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1; + proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1; else - proc += (proc / 4) * (4 - offload_tpc) + 1; + proc += (proc / 4) * (4 - offload_tpc) + 1; #endif kmp_set_affinity_mask_proc(proc, &mask); if (kmp_set_affinity(&mask) != 0) - printf("Could not set affinity on rank %d thread %d to %d\n", - node_rank, tnum, proc); + printf("Could not set affinity on rank %d thread %d to %d\n", + node_rank, tnum, proc); } } @@ -1074,7 +1110,7 @@ int FixIntel::set_host_affinity(const int nomp) char cmd[512]; char readbuf[INTEL_MAX_HOST_CORE_COUNT*5]; sprintf(cmd, "lscpu -p | grep -v '#' |" - "sort -t, -k 3,3n -k 2,2n | awk -F, '{print $1}'"); + "sort -t, -k 3,3n -k 2,2n | awk -F, '{print $1}'"); p = popen(cmd, "r"); if (p == NULL) return -1; ncores = 0; @@ -1111,7 +1147,7 @@ int FixIntel::set_host_affinity(const int nomp) if (subscription > ncores) { if (rank == 0) error->warning(FLERR, - "More MPI tasks/OpenMP threads than available cores"); + "More MPI tasks/OpenMP threads than available cores"); return 0; } if (subscription == ncores) @@ -1137,10 +1173,10 @@ int FixIntel::set_host_affinity(const int nomp) int first = coi_cores + node_rank * mpi_cores; CPU_ZERO(&cpuset); for (int i = first; i < first + mpi_cores; i++) - CPU_SET(proc_list[i], &cpuset); + CPU_SET(proc_list[i], &cpuset); if (sched_setaffinity(lwp, sizeof(cpu_set_t), &cpuset)) { - fail = 1; - break; + fail = 1; + break; } plwp++; } @@ -1153,13 +1189,13 @@ int FixIntel::set_host_affinity(const int nomp) buf1 = (float*) malloc(sizeof(float)*pragma_size); #pragma offload target (mic:0) mandatory \ - in(buf1:length(pragma_size) alloc_if(1) free_if(0)) \ + in(buf1:length(pragma_size) alloc_if(1) free_if(0)) \ signal(&sig1) { buf1[0] = 0.0; } #pragma offload_wait target(mic:0) wait(&sig1) #pragma offload target (mic:0) mandatory \ - out(buf1:length(pragma_size) alloc_if(0) free_if(1)) \ + out(buf1:length(pragma_size) alloc_if(0) free_if(1)) \ signal(&sig2) { buf1[0] = 1.0; } #pragma offload_wait target(mic:0) wait(&sig2) @@ -1175,11 +1211,11 @@ int FixIntel::set_host_affinity(const int nomp) CPU_ZERO(&cpuset); for(int i=0; i<coi_cores; i++) - CPU_SET(proc_list[i], &cpuset); + CPU_SET(proc_list[i], &cpuset); if (sched_setaffinity(lwp, sizeof(cpu_set_t), &cpuset)) { - fail = 1; - break; + fail = 1; + break; } } pclose(p); @@ -1192,7 +1228,7 @@ int FixIntel::set_host_affinity(const int nomp) if (screen && rank == 0) { if (coi_cores) fprintf(screen,"Intel Package: Affinitizing %d Offload Threads to %d Cores\n", - mlwp, coi_cores); + mlwp, coi_cores); fprintf(screen,"Intel Package: Affinitizing MPI Tasks to %d Cores Each\n",mpi_cores); } if (fail) return -1; diff --git a/src/USER-INTEL/fix_intel.h b/src/USER-INTEL/fix_intel.h index f4c02b37b58443ac515363c5b5ae02bd375ad378..068e5ed890354275a3b6b1b5835c530733711d4a 100644 --- a/src/USER-INTEL/fix_intel.h +++ b/src/USER-INTEL/fix_intel.h @@ -70,23 +70,32 @@ class FixIntel : public Fix { inline int nbor_pack_width() const { return _nbor_pack_width; } inline void nbor_pack_width(const int w) { _nbor_pack_width = w; } + inline int three_body_neighbor() { return _three_body_neighbor; } + inline void three_body_neighbor(const int i) { _three_body_neighbor = 1; } inline int need_zero(const int tid) { if (_need_reduce == 0 && tid > 0) return 1; return 0; } - inline void set_reduce_flag() { _need_reduce = 1; } + inline void set_reduce_flag() { if (_nthreads > 1) _need_reduce = 1; } inline int lrt() { if (force->kspace_match("pppm/intel", 0)) return _lrt; else return 0; } + inline int pppm_table() { + if (force->kspace_match("pppm/intel", 0) || + force->kspace_match("pppm/disp/intel",0)) + return INTEL_P3M_TABLE; + else return 0; + } + protected: IntelBuffers<float,float> *_single_buffers; IntelBuffers<float,double> *_mixed_buffers; IntelBuffers<double,double> *_double_buffers; - int _precision_mode, _nthreads, _nbor_pack_width; + int _precision_mode, _nthreads, _nbor_pack_width, _three_body_neighbor; public: inline int* get_overflow_flag() { return _overflow_flag; } @@ -94,17 +103,17 @@ class FixIntel : public Fix { inline void add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in, double *ev_in, const int offload, const int eatom = 0, const int vatom = 0, - const int rflag = 0); + const int rflag = 0); inline void add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in, double *ev_in, const int offload, const int eatom = 0, const int vatom = 0, - const int rflag = 0); + const int rflag = 0); inline void add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in, float *ev_in, const int offload, const int eatom = 0, const int vatom = 0, - const int rflag = 0); + const int rflag = 0); inline void get_buffern(const int offload, int &nlocal, int &nall, - int &minlocal); + int &minlocal); #ifdef _LMP_INTEL_OFFLOAD void post_force(int vflag); @@ -204,13 +213,13 @@ class FixIntel : public Fix { inline void add_results(const ft * _noalias const f_in, const acc_t * _noalias const ev_global, const int eatom, const int vatom, - const int offload); + const int offload); template <class ft, class acc_t> inline void add_oresults(const ft * _noalias const f_in, - const acc_t * _noalias const ev_global, - const int eatom, const int vatom, - const int out_offset, const int nall); + const acc_t * _noalias const ev_global, + const int eatom, const int vatom, + const int out_offset, const int nall); int _offload_affinity_balanced, _offload_threads, _offload_tpc; #ifdef _LMP_INTEL_OFFLOAD @@ -226,22 +235,25 @@ class FixIntel : public Fix { /* ---------------------------------------------------------------------- */ void FixIntel::get_buffern(const int offload, int &nlocal, int &nall, - int &minlocal) { + int &minlocal) { #ifdef _LMP_INTEL_OFFLOAD if (_separate_buffers) { if (offload) { if (neighbor->ago != 0) { - nlocal = _offload_nlocal; - nall = _offload_nall; + nlocal = _offload_nlocal; + nall = _offload_nall; } else { - nlocal = atom->nlocal; - nall = nlocal + atom->nghost; + nlocal = atom->nlocal; + nall = nlocal + atom->nghost; } minlocal = 0; } else { nlocal = atom->nlocal; nall = _host_nall; - minlocal = _host_min_local; + if (force->newton) + minlocal = _host_min_local; + else + minlocal = host_start_pair(); } return; } @@ -259,7 +271,7 @@ void FixIntel::get_buffern(const int offload, int &nlocal, int &nall, void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in, double *ev_in, const int offload, const int eatom, const int vatom, - const int rflag) { + const int rflag) { #ifdef _LMP_INTEL_OFFLOAD if (offload) { _off_results_eatom = eatom; @@ -275,7 +287,7 @@ void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in, _results_eatom = eatom; _results_vatom = vatom; #ifndef _LMP_INTEL_OFFLOAD - if (rflag != 2 && _nthreads > 1) _need_reduce = 1; + if (rflag != 2 && _nthreads > 1 && force->newton) _need_reduce = 1; #endif if (_overflow_flag[LMP_OVERFLOW]) @@ -287,7 +299,7 @@ void FixIntel::add_result_array(IntelBuffers<double,double>::vec3_acc_t *f_in, void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in, double *ev_in, const int offload, const int eatom, const int vatom, - const int rflag) { + const int rflag) { #ifdef _LMP_INTEL_OFFLOAD if (offload) { _off_results_eatom = eatom; @@ -303,7 +315,7 @@ void FixIntel::add_result_array(IntelBuffers<float,double>::vec3_acc_t *f_in, _results_eatom = eatom; _results_vatom = vatom; #ifndef _LMP_INTEL_OFFLOAD - if (rflag != 2 && _nthreads > 1) _need_reduce = 1; + if (rflag != 2 && _nthreads > 1 && force->newton) _need_reduce = 1; #endif if (_overflow_flag[LMP_OVERFLOW]) @@ -331,7 +343,7 @@ void FixIntel::add_result_array(IntelBuffers<float,float>::vec3_acc_t *f_in, _results_eatom = eatom; _results_vatom = vatom; #ifndef _LMP_INTEL_OFFLOAD - if (rflag != 2 && _nthreads > 1) _need_reduce = 1; + if (rflag != 2 && _nthreads > 1 && force->newton) _need_reduce = 1; #endif if (_overflow_flag[LMP_OVERFLOW]) @@ -349,12 +361,12 @@ int FixIntel::offload_end_neighbor() { if (atom->nlocal < 2) error->one(FLERR,"Too few atoms for load balancing offload"); double granularity = 1.0 / atom->nlocal; - if (_balance_neighbor < granularity) + if (_balance_neighbor < granularity) _balance_neighbor = granularity + 1e-10; - else if (_balance_neighbor > 1.0 - granularity) + else if (_balance_neighbor > 1.0 - granularity) _balance_neighbor = 1.0 - granularity + 1e-10; } - return _balance_neighbor * atom->nlocal; + return _balance_neighbor * atom->nlocal; } int FixIntel::offload_end_pair() { @@ -505,7 +517,7 @@ The newton setting must be the same for both pairwise and bonded forces. E: Intel styles for bond/angle/dihedral/improper require intel pair style." -You cannot use the USER-INTEL package for bond calculations without a +You cannot use the USER-INTEL package for bond calculations without a USER-INTEL supported pair style. E: Intel styles for kspace require intel pair style. diff --git a/src/USER-INTEL/fix_nh_intel.cpp b/src/USER-INTEL/fix_nh_intel.cpp index 3f76e53c1f8e18851cac35faa3aaeaa622822725..6e44b38ef17439d6c7080ad30c497e8efca6cd56 100644 --- a/src/USER-INTEL/fix_nh_intel.cpp +++ b/src/USER-INTEL/fix_nh_intel.cpp @@ -45,7 +45,7 @@ typedef struct { double x,y,z; } dbl3_t; NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion ---------------------------------------------------------------------- */ -FixNHIntel::FixNHIntel(LAMMPS *lmp, int narg, char **arg) : +FixNHIntel::FixNHIntel(LAMMPS *lmp, int narg, char **arg) : FixNH(lmp, narg, arg) { _dtfm = 0; @@ -118,12 +118,12 @@ void FixNHIntel::remap() #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & dilate_group_bit) { - const double d0 = x[i].x - b0; - const double d1 = x[i].y - b1; - const double d2 = x[i].z - b2; - x[i].x = hi0*d0 + hi5*d1 + hi4*d2; - x[i].y = hi1*d1 + hi3*d2; - x[i].z = hi2*d2; + const double d0 = x[i].x - b0; + const double d1 = x[i].y - b1; + const double d2 = x[i].z - b2; + x[i].x = hi0*d0 + hi5*d1 + hi4*d2; + x[i].y = hi1*d1 + hi3*d2; + x[i].z = hi2*d2; } } } @@ -294,9 +294,9 @@ void FixNHIntel::remap() #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & dilate_group_bit) { - x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0; - x[i].y = h1*x[i].y + h3*x[i].z + nb1; - x[i].z = h2*x[i].z + nb2; + x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0; + x[i].y = h1*x[i].y + h3*x[i].z + nb1; + x[i].z = h2*x[i].z + nb2; } } } @@ -318,7 +318,7 @@ void FixNHIntel::reset_dt() dto = dthalf; // If using respa, then remap is performed in innermost level - + if (strstr(update->integrate_style,"respa")) dto = 0.5*step_respa[0]; @@ -329,7 +329,7 @@ void FixNHIntel::reset_dt() tdrag_factor = 1.0 - (update->dt * t_freq * drag / nc_tchain); const int * const mask = atom->mask; - const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : + const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal; if (nlocal > _nlocal_max) { @@ -345,9 +345,9 @@ void FixNHIntel::reset_dt() const double * const rmass = atom->rmass; int n = 0; for (int i = 0; i < nlocal; i++) { - _dtfm[n++] = dtf / rmass[i]; - _dtfm[n++] = dtf / rmass[i]; - _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; } } else { const double * const mass = atom->mass; @@ -364,29 +364,29 @@ void FixNHIntel::reset_dt() const double * const rmass = atom->rmass; int n = 0; for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - _dtfm[n++] = dtf / rmass[i]; - _dtfm[n++] = dtf / rmass[i]; - _dtfm[n++] = dtf / rmass[i]; + if (mask[i] & groupbit) { + _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; } else { - _dtfm[n++] = 0.0; - _dtfm[n++] = 0.0; - _dtfm[n++] = 0.0; - } + _dtfm[n++] = 0.0; + _dtfm[n++] = 0.0; + _dtfm[n++] = 0.0; + } } else { const double * const mass = atom->mass; const int * const type = atom->type; int n = 0; for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - _dtfm[n++] = dtf / mass[type[i]]; - _dtfm[n++] = dtf / mass[type[i]]; - _dtfm[n++] = dtf / mass[type[i]]; + if (mask[i] & groupbit) { + _dtfm[n++] = dtf / mass[type[i]]; + _dtfm[n++] = dtf / mass[type[i]]; + _dtfm[n++] = dtf / mass[type[i]]; } else { - _dtfm[n++] = 0.0; - _dtfm[n++] = 0.0; - _dtfm[n++] = 0.0; - } + _dtfm[n++] = 0.0; + _dtfm[n++] = 0.0; + _dtfm[n++] = 0.0; + } } } } @@ -431,9 +431,9 @@ void FixNHIntel::nh_v_press() #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { - v[i].x *= f0; - v[i].y *= f1; - v[i].z *= f2; + v[i].x *= f0; + v[i].y *= f1; + v[i].z *= f2; } } } @@ -506,7 +506,7 @@ void FixNHIntel::nh_v_temp() #pragma simd #endif for (int i = 0; i < _nlocal3; i++) - v[i] *= factor_eta; + v[i] *= factor_eta; } else { #if defined(LMP_SIMD_COMPILER) #pragma vector aligned @@ -514,12 +514,12 @@ void FixNHIntel::nh_v_temp() #endif for (int i = 0; i < _nlocal3; i++) { if (_dtfm[i] != 0.0) - v[i] *= factor_eta; + v[i] *= factor_eta; } } } -double FixNHIntel::memory_usage() +double FixNHIntel::memory_usage() { return FixNH::memory_usage() + _nlocal_max * 3 * sizeof(double); } diff --git a/src/USER-INTEL/fix_nh_intel.h b/src/USER-INTEL/fix_nh_intel.h index 32ed6c8534be31769a7781e61e205940fc030d68..cc6ba8c481b1cb01fe3ba03bc5808618b26885b7 100644 --- a/src/USER-INTEL/fix_nh_intel.h +++ b/src/USER-INTEL/fix_nh_intel.h @@ -35,7 +35,7 @@ class FixNHIntel : public FixNH { int _nlocal3, _nlocal_max; virtual void remap(); - virtual void nve_x(); + virtual void nve_x(); virtual void nve_v(); virtual void nh_v_press(); virtual void nh_v_temp(); diff --git a/src/USER-INTEL/fix_nve_asphere_intel.cpp b/src/USER-INTEL/fix_nve_asphere_intel.cpp index 65631654542ed278aaea99ac74e394b437257fdb..8ad63f7326dfac6df7fa5f4e1e2cb36bb2368ee6 100644 --- a/src/USER-INTEL/fix_nve_asphere_intel.cpp +++ b/src/USER-INTEL/fix_nve_asphere_intel.cpp @@ -36,7 +36,7 @@ using namespace FixConst; /* ---------------------------------------------------------------------- */ FixNVEAsphereIntel::FixNVEAsphereIntel(LAMMPS *lmp, int narg, char **arg) : - FixNVE(lmp, narg, arg) + FixNVE(lmp, narg, arg) { _dtfm = 0; _nlocal3 = 0; @@ -129,9 +129,9 @@ void FixNVEAsphereIntel::initial_integrate(int vflag) #endif for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { - double *quat = bonus[ellipsoid[i]].quat; - ME_omega_richardson(dtf, dtq, angmom[i], quat, torque[i], _inertia0[i], - _inertia1[i], _inertia2[i]); + double *quat = bonus[ellipsoid[i]].quat; + ME_omega_richardson(dtf, dtq, angmom[i], quat, torque[i], _inertia0[i], + _inertia1[i], _inertia2[i]); } } } @@ -168,7 +168,7 @@ void FixNVEAsphereIntel::reset_dt() { dtf = 0.5 * update->dt * force->ftm2v; const int * const mask = atom->mask; - const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : + const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal; if (nlocal > _nlocal_max) { @@ -211,27 +211,27 @@ void FixNVEAsphereIntel::reset_dt() { for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { _dtfm[n++] = dtf / rmass[i]; - _dtfm[n++] = dtf / rmass[i]; - _dtfm[n++] = dtf / rmass[i]; - double *shape = bonus[ellipsoid[i]].shape; - double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]); - if (idot != 0.0) idot = 1.0 / idot; - _inertia0[i] = idot; - idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]); - if (idot != 0.0) idot = 1.0 / idot; - _inertia1[i] = idot; - idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]); - if (idot != 0.0) idot = 1.0 / idot; - _inertia2[i] = idot; + _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; + double *shape = bonus[ellipsoid[i]].shape; + double idot = INERTIA*rmass[i] * (shape[1]*shape[1]+shape[2]*shape[2]); + if (idot != 0.0) idot = 1.0 / idot; + _inertia0[i] = idot; + idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[2]*shape[2]); + if (idot != 0.0) idot = 1.0 / idot; + _inertia1[i] = idot; + idot = INERTIA*rmass[i] * (shape[0]*shape[0]+shape[1]*shape[1]); + if (idot != 0.0) idot = 1.0 / idot; + _inertia2[i] = idot; } else { _dtfm[n++] = 0.0; - _dtfm[n++] = 0.0; - _dtfm[n++] = 0.0; + _dtfm[n++] = 0.0; + _dtfm[n++] = 0.0; } } } } -double FixNVEAsphereIntel::memory_usage() +double FixNVEAsphereIntel::memory_usage() { return FixNVE::memory_usage() + _nlocal_max * 12 * sizeof(double); } diff --git a/src/USER-INTEL/fix_nve_intel.cpp b/src/USER-INTEL/fix_nve_intel.cpp index 3fb290b3abd8e4925edeafc460acca7b8b97ed83..c0f6da06ae9d6f53ed5a875fdb17137f73551665 100644 --- a/src/USER-INTEL/fix_nve_intel.cpp +++ b/src/USER-INTEL/fix_nve_intel.cpp @@ -29,7 +29,7 @@ using namespace FixConst; /* ---------------------------------------------------------------------- */ FixNVEIntel::FixNVEIntel(LAMMPS *lmp, int narg, char **arg) : - FixNVE(lmp, narg, arg) + FixNVE(lmp, narg, arg) { _dtfm = 0; _nlocal3 = 0; @@ -91,7 +91,7 @@ void FixNVEIntel::initial_integrate(int vflag) for (int i = 0; i < _nlocal3; i++) { if (_dtfm[i] != 0.0) { v[i] += _dtfm[i] * f[i]; - x[i] += dtv * v[i]; + x[i] += dtv * v[i]; } } } @@ -130,7 +130,7 @@ void FixNVEIntel::reset_dt() { dtf = 0.5 * update->dt * force->ftm2v; const int * const mask = atom->mask; - const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : + const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst : atom->nlocal; if (nlocal > _nlocal_max) { @@ -146,9 +146,9 @@ void FixNVEIntel::reset_dt() { const double * const rmass = atom->rmass; int n = 0; for (int i = 0; i < nlocal; i++) { - _dtfm[n++] = dtf / rmass[i]; - _dtfm[n++] = dtf / rmass[i]; - _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; } } else { const double * const mass = atom->mass; @@ -165,34 +165,34 @@ void FixNVEIntel::reset_dt() { const double * const rmass = atom->rmass; int n = 0; for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - _dtfm[n++] = dtf / rmass[i]; - _dtfm[n++] = dtf / rmass[i]; - _dtfm[n++] = dtf / rmass[i]; + if (mask[i] & groupbit) { + _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; + _dtfm[n++] = dtf / rmass[i]; } else { - _dtfm[n++] = 0.0; - _dtfm[n++] = 0.0; - _dtfm[n++] = 0.0; - } + _dtfm[n++] = 0.0; + _dtfm[n++] = 0.0; + _dtfm[n++] = 0.0; + } } else { const double * const mass = atom->mass; const int * const type = atom->type; int n = 0; for (int i = 0; i < nlocal; i++) - if (mask[i] & groupbit) { - _dtfm[n++] = dtf / mass[type[i]]; - _dtfm[n++] = dtf / mass[type[i]]; - _dtfm[n++] = dtf / mass[type[i]]; + if (mask[i] & groupbit) { + _dtfm[n++] = dtf / mass[type[i]]; + _dtfm[n++] = dtf / mass[type[i]]; + _dtfm[n++] = dtf / mass[type[i]]; } else { - _dtfm[n++] = 0.0; - _dtfm[n++] = 0.0; - _dtfm[n++] = 0.0; - } + _dtfm[n++] = 0.0; + _dtfm[n++] = 0.0; + _dtfm[n++] = 0.0; + } } } } -double FixNVEIntel::memory_usage() +double FixNVEIntel::memory_usage() { return FixNVE::memory_usage() + _nlocal_max * 3 * sizeof(double); } diff --git a/src/USER-INTEL/improper_cvff_intel.cpp b/src/USER-INTEL/improper_cvff_intel.cpp index 0fb02420b92684c2e3a6fd0277d574b85d5e7461..dc9765d9130670ec7dd2e991dc8d936b66240142 100644 --- a/src/USER-INTEL/improper_cvff_intel.cpp +++ b/src/USER-INTEL/improper_cvff_intel.cpp @@ -42,7 +42,7 @@ typedef struct { int a,b,c,d,t; } int5_t; /* ---------------------------------------------------------------------- */ -ImproperCvffIntel::ImproperCvffIntel(LAMMPS *lmp) : +ImproperCvffIntel::ImproperCvffIntel(LAMMPS *lmp) : ImproperCvff(lmp) { suffix_flag |= Suffix::INTEL; @@ -80,23 +80,23 @@ void ImproperCvffIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void ImproperCvffIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; if (evflag) { - if (eflag) { + if (vflag && !eflag) { if (force->newton_bond) - eval<1,1,1>(vflag, buffers, fc); + eval<0,1,1>(vflag, buffers, fc); else - eval<1,1,0>(vflag, buffers, fc); + eval<0,1,0>(vflag, buffers, fc); } else { if (force->newton_bond) - eval<1,0,1>(vflag, buffers, fc); + eval<1,1,1>(vflag, buffers, fc); else - eval<1,0,0>(vflag, buffers, fc); + eval<1,1,0>(vflag, buffers, fc); } } else { if (force->newton_bond) @@ -108,10 +108,10 @@ void ImproperCvffIntel::compute(int eflag, int vflag, /* ---------------------------------------------------------------------- */ -template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> -void ImproperCvffIntel::eval(const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void ImproperCvffIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { const int inum = neighbor->nimproperlist; if (inum == 0) return; @@ -131,12 +131,9 @@ void ImproperCvffIntel::eval(const int vflag, const int nthreads = tc; acc_t oeimproper, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - if (EFLAG) - oeimproper = (acc_t)0.0; - if (vflag) { - ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - } + if (EFLAG) oeimproper = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; } #if defined(_OPENMP) @@ -145,17 +142,31 @@ void ImproperCvffIntel::eval(const int vflag, reduction(+:oeimproper,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int nfrom, nto, tid; + int nfrom, npl, nto, tid; + #ifdef LMP_INTEL_USE_SIMDOFF_FIX IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + #else + IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); + #endif FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); - const int5_t * _noalias const improperlist = + const int5_t * _noalias const improperlist = (int5_t *) neighbor->improperlist[0]; + #ifdef LMP_INTEL_USE_SIMDOFF_FIX + acc_t seimproper, sv0, sv1, sv2, sv3, sv4, sv5; + if (EFLAG) seimproper = (acc_t)0.0; + if (VFLAG && vflag) { + sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; + } + #pragma simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5) for (int n = nfrom; n < nto; n++) { + #else + for (int n = nfrom; n < nto; n += npl) { + #endif const int i1 = improperlist[n].a; const int i2 = improperlist[n].b; const int i3 = improperlist[n].c; @@ -216,28 +227,29 @@ void ImproperCvffIntel::eval(const int vflag, flt_t c = (c0 + c1mag*c2mag) * s12; // error check - + #ifndef LMP_INTEL_USE_SIMDOFF_FIX if (c > PTOLERANCE || c < MTOLERANCE) { int me; - MPI_Comm_rank(world,&me); - if (screen) { + MPI_Comm_rank(world,&me); + if (screen) { char str[128]; - sprintf(str,"Improper problem: %d " BIGINT_FORMAT " " + sprintf(str,"Improper problem: %d " BIGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT, me,update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); - error->warning(FLERR,str,0); - fprintf(screen," 1st atom: %d %g %g %g\n", + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", me,x[i1].x,x[i1].y,x[i1].z); - fprintf(screen," 2nd atom: %d %g %g %g\n", + fprintf(screen," 2nd atom: %d %g %g %g\n", me,x[i2].x,x[i2].y,x[i2].z); - fprintf(screen," 3rd atom: %d %g %g %g\n", + fprintf(screen," 3rd atom: %d %g %g %g\n", me,x[i3].x,x[i3].y,x[i3].z); - fprintf(screen," 4th atom: %d %g %g %g\n", + fprintf(screen," 4th atom: %d %g %g %g\n", me,x[i4].x,x[i4].y,x[i4].z); } } + #endif if (c > (flt_t)1.0) c = (flt_t)1.0; if (c < (flt_t)-1.0) c = (flt_t)-1.0; @@ -250,36 +262,41 @@ void ImproperCvffIntel::eval(const int vflag, const int m = fc.fc[type].multiplicity; flt_t p, pd; - if (m == 2) { - p = (flt_t)2.0*c*c; - pd = (flt_t)2.0*c; - } else if (m == 3) { - const flt_t rc2 = c*c; - p = ((flt_t)4.0*rc2-(flt_t)3.0)*c + (flt_t)1.0; - pd = (flt_t)6.0*rc2 - (flt_t)1.5; - } else if (m == 4) { - const flt_t rc2 = c*c; - p = (flt_t)8.0*(rc2-1)*rc2 + (flt_t)2.0; - pd = ((flt_t)16.0*rc2-(flt_t)8.0)*c; - } else if (m == 6) { - const flt_t rc2 = c*c; - p = (((flt_t)32.0*rc2-(flt_t)48.0)*rc2 + (flt_t)18.0)*rc2; - pd = ((flt_t)96.0*(rc2-(flt_t)1.0)*rc2 + (flt_t)18.0)*c; - } else if (m == 1) { - p = c + (flt_t)1.0; - pd = (flt_t)0.5; - } else if (m == 5) { - const flt_t rc2 = c*c; - p = (((flt_t)16.0*rc2-(flt_t)20.0)*rc2 + (flt_t)5.0)*c + (flt_t)1.0; - pd = ((flt_t)40.0*rc2-(flt_t)30.0)*rc2 + (flt_t)2.5; - } else if (m == 0) { - p = (flt_t)2.0; - pd = (flt_t)0.0; + #ifdef LMP_INTEL_USE_SIMDOFF_FIX + #pragma simdoff + #endif + { + if (m == 2) { + p = (flt_t)2.0*c*c; + pd = (flt_t)2.0*c; + } else if (m == 3) { + const flt_t rc2 = c*c; + p = ((flt_t)4.0*rc2-(flt_t)3.0)*c + (flt_t)1.0; + pd = (flt_t)6.0*rc2 - (flt_t)1.5; + } else if (m == 4) { + const flt_t rc2 = c*c; + p = (flt_t)8.0*(rc2-1)*rc2 + (flt_t)2.0; + pd = ((flt_t)16.0*rc2-(flt_t)8.0)*c; + } else if (m == 6) { + const flt_t rc2 = c*c; + p = (((flt_t)32.0*rc2-(flt_t)48.0)*rc2 + (flt_t)18.0)*rc2; + pd = ((flt_t)96.0*(rc2-(flt_t)1.0)*rc2 + (flt_t)18.0)*c; + } else if (m == 1) { + p = c + (flt_t)1.0; + pd = (flt_t)0.5; + } else if (m == 5) { + const flt_t rc2 = c*c; + p = (((flt_t)16.0*rc2-(flt_t)20.0)*rc2 + (flt_t)5.0)*c + (flt_t)1.0; + pd = ((flt_t)40.0*rc2-(flt_t)30.0)*rc2 + (flt_t)2.5; + } else if (m == 0) { + p = (flt_t)2.0; + pd = (flt_t)0.0; + } } if (fc.fc[type].sign == -1) { - p = (flt_t)2.0 - p; - pd = -pd; + p = (flt_t)2.0 - p; + pd = -pd; } flt_t eimproper; @@ -317,46 +334,63 @@ void ImproperCvffIntel::eval(const int vflag, // apply force to each of 4 atoms - if (NEWTON_BOND || i1 < nlocal) { - f[i1].x += f1x; - f[i1].y += f1y; - f[i1].z += f1z; - } + #ifdef LMP_INTEL_USE_SIMDOFF_FIX + #pragma simdoff + #endif + { + if (NEWTON_BOND || i1 < nlocal) { + f[i1].x += f1x; + f[i1].y += f1y; + f[i1].z += f1z; + } - if (NEWTON_BOND || i2 < nlocal) { - f[i2].x += f2x; - f[i2].y += f2y; - f[i2].z += f2z; - } + if (NEWTON_BOND || i2 < nlocal) { + f[i2].x += f2x; + f[i2].y += f2y; + f[i2].z += f2z; + } - if (NEWTON_BOND || i3 < nlocal) { - f[i3].x += f3x; - f[i3].y += f3y; - f[i3].z += f3z; - } + if (NEWTON_BOND || i3 < nlocal) { + f[i3].x += f3x; + f[i3].y += f3y; + f[i3].z += f3z; + } - if (NEWTON_BOND || i4 < nlocal) { - f[i4].x += f4x; - f[i4].y += f4y; - f[i4].z += f4z; + if (NEWTON_BOND || i4 < nlocal) { + f[i4].x += f4x; + f[i4].y += f4y; + f[i4].z += f4z; + } } - if (EVFLAG) { - IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, eimproper, i1, i2, i3, i4, - f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, - vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, - vb3y, vb3z, oeimproper, f, NEWTON_BOND, nlocal, - ov0, ov1, ov2, ov3, ov4, ov5); + if (EFLAG || VFLAG) { + #ifdef LMP_INTEL_USE_SIMDOFF_FIX + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, eimproper, i1, i2, + i3, i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, + f4z, vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, + vb3x, vb3y, vb3z, seimproper, f, NEWTON_BOND, + nlocal, sv0, sv1, sv2, sv3, sv4, sv5); + #else + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, eimproper, i1, i2, + i3, i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, + f4z, vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, + vb3x, vb3y, vb3z, oeimproper, f, NEWTON_BOND, + nlocal, ov0, ov1, ov2, ov3, ov4, ov5); + #endif } } // for n - } // omp parallel - if (EVFLAG) { - if (EFLAG) - energy += oeimproper; - if (vflag) { - virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; - virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; + #ifdef LMP_INTEL_USE_SIMDOFF_FIX + if (EFLAG) oeimproper += seimproper; + if (VFLAG && vflag) { + ov0 += sv0; ov1 += sv1; ov2 += sv2; + ov3 += sv3; ov4 += sv4; ov5 += sv5; } + #endif + } // omp parallel + if (EFLAG) energy += oeimproper; + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; } fix->set_reduce_flag(); @@ -394,7 +428,7 @@ void ImproperCvffIntel::init_style() template <class flt_t, class acc_t> void ImproperCvffIntel::pack_force_const(ForceConst<flt_t> &fc, - IntelBuffers<flt_t,acc_t> *buffers) + IntelBuffers<flt_t,acc_t> *buffers) { const int bp1 = atom->nimpropertypes + 1; fc.set_ntypes(bp1,memory); @@ -410,11 +444,11 @@ void ImproperCvffIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void ImproperCvffIntel::ForceConst<flt_t>::set_ntypes(const int nimproper, - Memory *memory) { + Memory *memory) { if (nimproper != _nimpropertypes) { if (_nimpropertypes > 0) _memory->destroy(fc); - + if (nimproper > 0) _memory->create(fc,nimproper,"improperharmonicintel.fc"); } diff --git a/src/USER-INTEL/improper_cvff_intel.h b/src/USER-INTEL/improper_cvff_intel.h index 95ccd8f9d212c8ebd4c174ec3bd376086b752a52..cb5da25f992d26f95f365918f1a5691c0e3f0fe0 100644 --- a/src/USER-INTEL/improper_cvff_intel.h +++ b/src/USER-INTEL/improper_cvff_intel.h @@ -45,8 +45,8 @@ class ImproperCvffIntel : public ImproperCvff { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> - void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc); + void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t, acc_t> *buffers); diff --git a/src/USER-INTEL/improper_harmonic_intel.cpp b/src/USER-INTEL/improper_harmonic_intel.cpp index 071ff548ea8419fd90c8c1c3a235e0a53856e412..fe0efca5ec163d50f4a75643223f6c8df0980a95 100644 --- a/src/USER-INTEL/improper_harmonic_intel.cpp +++ b/src/USER-INTEL/improper_harmonic_intel.cpp @@ -43,7 +43,7 @@ typedef struct { int a,b,c,d,t; } int5_t; /* ---------------------------------------------------------------------- */ -ImproperHarmonicIntel::ImproperHarmonicIntel(LAMMPS *lmp) : +ImproperHarmonicIntel::ImproperHarmonicIntel(LAMMPS *lmp) : ImproperHarmonic(lmp) { suffix_flag |= Suffix::INTEL; @@ -81,23 +81,23 @@ void ImproperHarmonicIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void ImproperHarmonicIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) ev_setup(eflag,vflag); else evflag = 0; if (evflag) { - if (eflag) { + if (vflag && !eflag) { if (force->newton_bond) - eval<1,1,1>(vflag, buffers, fc); + eval<0,1,1>(vflag, buffers, fc); else - eval<1,1,0>(vflag, buffers, fc); + eval<0,1,0>(vflag, buffers, fc); } else { if (force->newton_bond) - eval<1,0,1>(vflag, buffers, fc); + eval<1,1,1>(vflag, buffers, fc); else - eval<1,0,0>(vflag, buffers, fc); + eval<1,1,0>(vflag, buffers, fc); } } else { if (force->newton_bond) @@ -109,10 +109,10 @@ void ImproperHarmonicIntel::compute(int eflag, int vflag, /* ---------------------------------------------------------------------- */ -template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> -void ImproperHarmonicIntel::eval(const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) +template <int EFLAG, int VFLAG, int NEWTON_BOND, class flt_t, class acc_t> +void ImproperHarmonicIntel::eval(const int vflag, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { const int inum = neighbor->nimproperlist; if (inum == 0) return; @@ -132,12 +132,9 @@ void ImproperHarmonicIntel::eval(const int vflag, const int nthreads = tc; acc_t oeimproper, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - if (EFLAG) - oeimproper = (acc_t)0.0; - if (vflag) { - ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - } + if (EFLAG) oeimproper = (acc_t)0.0; + if (VFLAG && vflag) { + ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; } #if defined(_OPENMP) @@ -146,17 +143,31 @@ void ImproperHarmonicIntel::eval(const int vflag, reduction(+:oeimproper,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int nfrom, nto, tid; + int nfrom, npl, nto, tid; + #ifdef LMP_INTEL_USE_SIMDOFF IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads); + #else + IP_PRE_omp_stride_id(nfrom, npl, nto, tid, inum, nthreads); + #endif FORCE_T * _noalias const f = f_start + (tid * f_stride); if (fix->need_zero(tid)) memset(f, 0, f_stride * sizeof(FORCE_T)); - const int5_t * _noalias const improperlist = + const int5_t * _noalias const improperlist = (int5_t *) neighbor->improperlist[0]; + #ifdef LMP_INTEL_USE_SIMDOFF + acc_t seimproper, sv0, sv1, sv2, sv3, sv4, sv5; + if (EFLAG) seimproper = (acc_t)0.0; + if (VFLAG && vflag) { + sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; + } + #pragma simd reduction(+:seimproper, sv0, sv1, sv2, sv3, sv4, sv5) for (int n = nfrom; n < nto; n++) { + #else + for (int n = nfrom; n < nto; n += npl) { + #endif const int i1 = improperlist[n].a; const int i2 = improperlist[n].b; const int i3 = improperlist[n].c; @@ -207,28 +218,29 @@ void ImproperHarmonicIntel::eval(const int vflag, flt_t c = (c1*c2 + c0) * s12; // error check - + #ifndef LMP_INTEL_USE_SIMDOFF if (c > PTOLERANCE || c < MTOLERANCE) { int me; - MPI_Comm_rank(world,&me); - if (screen) { + MPI_Comm_rank(world,&me); + if (screen) { char str[128]; - sprintf(str,"Improper problem: %d " BIGINT_FORMAT " " + sprintf(str,"Improper problem: %d " BIGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT " " TAGINT_FORMAT, me,update->ntimestep, atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]); - error->warning(FLERR,str,0); - fprintf(screen," 1st atom: %d %g %g %g\n", + error->warning(FLERR,str,0); + fprintf(screen," 1st atom: %d %g %g %g\n", me,x[i1].x,x[i1].y,x[i1].z); - fprintf(screen," 2nd atom: %d %g %g %g\n", + fprintf(screen," 2nd atom: %d %g %g %g\n", me,x[i2].x,x[i2].y,x[i2].z); - fprintf(screen," 3rd atom: %d %g %g %g\n", + fprintf(screen," 3rd atom: %d %g %g %g\n", me,x[i3].x,x[i3].y,x[i3].z); - fprintf(screen," 4th atom: %d %g %g %g\n", + fprintf(screen," 4th atom: %d %g %g %g\n", me,x[i4].x,x[i4].y,x[i4].z); } } + #endif if (c > (flt_t)1.0) c = (flt_t)1.0; if (c < (flt_t)-1.0) c = (flt_t)-1.0; @@ -278,46 +290,63 @@ void ImproperHarmonicIntel::eval(const int vflag, // apply force to each of 4 atoms - if (NEWTON_BOND || i1 < nlocal) { - f[i1].x += f1x; - f[i1].y += f1y; - f[i1].z += f1z; - } + #ifdef LMP_INTEL_USE_SIMDOFF + #pragma simdoff + #endif + { + if (NEWTON_BOND || i1 < nlocal) { + f[i1].x += f1x; + f[i1].y += f1y; + f[i1].z += f1z; + } - if (NEWTON_BOND || i2 < nlocal) { - f[i2].x += f2x; - f[i2].y += f2y; - f[i2].z += f2z; - } + if (NEWTON_BOND || i2 < nlocal) { + f[i2].x += f2x; + f[i2].y += f2y; + f[i2].z += f2z; + } - if (NEWTON_BOND || i3 < nlocal) { - f[i3].x += f3x; - f[i3].y += f3y; - f[i3].z += f3z; - } + if (NEWTON_BOND || i3 < nlocal) { + f[i3].x += f3x; + f[i3].y += f3y; + f[i3].z += f3z; + } - if (NEWTON_BOND || i4 < nlocal) { - f[i4].x += f4x; - f[i4].y += f4y; - f[i4].z += f4z; + if (NEWTON_BOND || i4 < nlocal) { + f[i4].x += f4x; + f[i4].y += f4y; + f[i4].z += f4z; + } } - if (EVFLAG) { - IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, eimproper, i1, i2, i3, i4, - f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, - vb1x, vb1y, vb1z, vb2x, vb2y, vb2z, vb3x, vb3y, - vb3z, oeimproper, f, NEWTON_BOND, nlocal, - ov0, ov1, ov2, ov3, ov4, ov5); + if (EFLAG || VFLAG) { + #ifdef LMP_INTEL_USE_SIMDOFF + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, eimproper, i1, i2, + i3, i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, + f4y, f4z, vb1x, vb1y, vb1z, vb2x, vb2y, vb2z, + vb3x, vb3y, vb3z, seimproper, f, NEWTON_BOND, + nlocal, sv0, sv1, sv2, sv3, sv4, sv5); + #else + IP_PRE_ev_tally_dihed(EFLAG, VFLAG, eatom, vflag, eimproper, i1, i2, + i3, i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x, + f4y, f4z, vb1x, vb1y, vb1z, vb2x, vb2y, vb2z, + vb3x, vb3y, vb3z, oeimproper, f, NEWTON_BOND, + nlocal, ov0, ov1, ov2, ov3, ov4, ov5); + #endif } } // for n - } // omp parallel - if (EVFLAG) { - if (EFLAG) - energy += oeimproper; - if (vflag) { - virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; - virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; + #ifdef LMP_INTEL_USE_SIMDOFF + if (EFLAG) oeimproper += seimproper; + if (VFLAG && vflag) { + ov0 += sv0; ov1 += sv1; ov2 += sv2; + ov3 += sv3; ov4 += sv4; ov5 += sv5; } + #endif + } // omp parallel + if (EFLAG) energy += oeimproper; + if (VFLAG && vflag) { + virial[0] += ov0; virial[1] += ov1; virial[2] += ov2; + virial[3] += ov3; virial[4] += ov4; virial[5] += ov5; } fix->set_reduce_flag(); @@ -355,7 +384,7 @@ void ImproperHarmonicIntel::init_style() template <class flt_t, class acc_t> void ImproperHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc, - IntelBuffers<flt_t,acc_t> *buffers) + IntelBuffers<flt_t,acc_t> *buffers) { const int bp1 = atom->nimpropertypes + 1; fc.set_ntypes(bp1,memory); @@ -370,11 +399,11 @@ void ImproperHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void ImproperHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nimproper, - Memory *memory) { + Memory *memory) { if (nimproper != _nimpropertypes) { if (_nimpropertypes > 0) _memory->destroy(fc); - + if (nimproper > 0) _memory->create(fc,nimproper,"improperharmonicintel.fc"); } diff --git a/src/USER-INTEL/improper_harmonic_intel.h b/src/USER-INTEL/improper_harmonic_intel.h index 4e3838386347809afd3b228da7d9fc57151105e8..0b759b4e43e894d30d5a9ec5390b4259302a69a4 100644 --- a/src/USER-INTEL/improper_harmonic_intel.h +++ b/src/USER-INTEL/improper_harmonic_intel.h @@ -45,8 +45,8 @@ class ImproperHarmonicIntel : public ImproperHarmonic { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t> - void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc); + void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t, acc_t> *buffers); diff --git a/src/USER-INTEL/intel_buffers.cpp b/src/USER-INTEL/intel_buffers.cpp index c81dffec8333bc0906de384446c039c222267e7e..3664bc248bdb77d9c14777f2644f1fb74de0d190 100644 --- a/src/USER-INTEL/intel_buffers.cpp +++ b/src/USER-INTEL/intel_buffers.cpp @@ -12,6 +12,7 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include <math.h> #include "intel_buffers.h" #include "force.h" #include "memory.h" @@ -28,6 +29,7 @@ IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) : _ntypes = 0; _off_map_listlocal = 0; _ccachex = 0; + _ncache_alloc = 0; #ifdef _LMP_INTEL_OFFLOAD _separate_buffers = 0; _off_f = 0; @@ -36,6 +38,7 @@ IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) : _off_list_alloc = false; _off_threads = 0; _off_ccache = 0; + _off_ncache = 0; _host_nmax = 0; #endif } @@ -68,8 +71,8 @@ void IntelBuffers<flt_t, acc_t>::free_buffers() if (ev_global != 0) { #pragma offload_transfer target(mic:_cop) \ nocopy(x:alloc_if(0) free_if(1)) \ - nocopy(f_start:alloc_if(0) free_if(1)) \ - nocopy(ev_global:alloc_if(0) free_if(1)) + nocopy(f_start:alloc_if(0) free_if(1)) \ + nocopy(ev_global:alloc_if(0) free_if(1)) } if (q != 0) { @@ -102,8 +105,8 @@ void IntelBuffers<flt_t, acc_t>::free_buffers() template <class flt_t, class acc_t> void IntelBuffers<flt_t, acc_t>::_grow(const int nall, const int nlocal, - const int nthreads, - const int offload_end) + const int nthreads, + const int offload_end) { free_buffers(); _buf_size = static_cast<double>(nall) * 1.1 + 1; @@ -111,15 +114,20 @@ void IntelBuffers<flt_t, acc_t>::_grow(const int nall, const int nlocal, _buf_local_size = _buf_size; else _buf_local_size = static_cast<double>(nlocal) * 1.1 + 1; - if (lmp->atom->torque) - _buf_local_size *= 2; const int f_stride = get_stride(_buf_local_size); lmp->memory->create(_x, _buf_size,"intel_x"); if (lmp->atom->q != NULL) lmp->memory->create(_q, _buf_size, "intel_q"); if (lmp->atom->ellipsoid != NULL) lmp->memory->create(_quat, _buf_size, "intel_quat"); - lmp->memory->create(_f, f_stride * nthreads, "intel_f"); + #ifdef _LMP_INTEL_OFFLOAD + if (lmp->force->newton_pair) + #else + if (lmp->force->newton_pair || lmp->atom->molecular) + #endif + lmp->memory->create(_f, f_stride * nthreads, "intel_f"); + else + lmp->memory->create(_f, f_stride, "intel_f"); #ifdef _LMP_INTEL_OFFLOAD if (_separate_buffers) { @@ -131,7 +139,10 @@ void IntelBuffers<flt_t, acc_t>::_grow(const int nall, const int nlocal, } if (offload_end > 0) { - lmp->memory->create(_off_f, f_stride * _off_threads, "intel_off_f"); + int fm; + if (lmp->force->newton_pair) fm = _off_threads; + else fm = 1; + lmp->memory->create(_off_f, f_stride * fm, "intel_off_f"); const atom_t * const x = get_x(); const flt_t * const q = get_q(); const vec3_acc_t * f_start = get_off_f(); @@ -140,15 +151,15 @@ void IntelBuffers<flt_t, acc_t>::_grow(const int nall, const int nlocal, if (x != NULL && q != NULL && f_start != NULL && ev_global != NULL) { #pragma offload_transfer target(mic:_cop) \ nocopy(x,q:length(_buf_size) alloc_if(1) free_if(0)) \ - nocopy(f_start:length(f_stride*_off_threads) alloc_if(1) free_if(0))\ - nocopy(ev_global:length(8) alloc_if(1) free_if(0)) + nocopy(f_start:length(f_stride*fm) alloc_if(1) free_if(0))\ + nocopy(ev_global:length(8) alloc_if(1) free_if(0)) } } else { if (x != NULL && f_start != NULL && ev_global != NULL) { #pragma offload_transfer target(mic:_cop) \ nocopy(x:length(_buf_size) alloc_if(1) free_if(0)) \ - nocopy(f_start:length(f_stride*_off_threads) alloc_if(1) free_if(0))\ - nocopy(ev_global:length(8) alloc_if(1) free_if(0)) + nocopy(f_start:length(f_stride*fm) alloc_if(1) free_if(0))\ + nocopy(ev_global:length(8) alloc_if(1) free_if(0)) } } if (lmp->atom->ellipsoid != NULL) { @@ -175,7 +186,7 @@ void IntelBuffers<flt_t, acc_t>::free_nmax() if (tag != 0 && special != 0 && nspecial !=0) { #pragma offload_transfer target(mic:_cop) \ nocopy(tag:alloc_if(0) free_if(1)) \ - nocopy(special,nspecial:alloc_if(0) free_if(1)) + nocopy(special,nspecial:alloc_if(0) free_if(1)) } _off_map_nmax = 0; _host_nmax = 0; @@ -250,7 +261,7 @@ void IntelBuffers<flt_t, acc_t>::free_list_local() template <class flt_t, class acc_t> void IntelBuffers<flt_t, acc_t>::_grow_list_local(NeighList *list, - const int offload_end) + const int offload_end) { free_list_local(); int size = list->get_maxlocal(); @@ -265,7 +276,7 @@ void IntelBuffers<flt_t, acc_t>::_grow_list_local(NeighList *list, if (cnumneigh != 0) { #pragma offload_transfer target(mic:_cop) \ nocopy(ilist:length(size) alloc_if(1) free_if(0)) \ - nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \ + nocopy(numneigh:length(size) alloc_if(1) free_if(0)) \ nocopy(cnumneigh:length(size) alloc_if(1) free_if(0)) } _off_map_ilist = ilist; @@ -298,14 +309,14 @@ void IntelBuffers<flt_t, acc_t>::free_nbor_list() template <class flt_t, class acc_t> void IntelBuffers<flt_t, acc_t>::_grow_nbor_list(NeighList *list, const int nlocal, - const int nthreads, - const int offload_end, - const int pack_width) + const int nthreads, + const int offload_end, + const int pack_width) { free_nbor_list(); _list_alloc_atoms = 1.10 * nlocal; int nt = MAX(nthreads, _off_threads); - int list_alloc_size = (_list_alloc_atoms + nt * 2 + pack_width - 1) * + int list_alloc_size = (_list_alloc_atoms + nt * 2 + pack_width - 1) * get_max_nbors(); lmp->memory->create(_list_alloc, list_alloc_size, "_list_alloc"); #ifdef _LMP_INTEL_OFFLOAD @@ -369,8 +380,8 @@ void IntelBuffers<flt_t, acc_t>::free_ccache() template <class flt_t, class acc_t> void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag, - const int nthreads, - const int width) + const int nthreads, + const int width) { #ifdef _LMP_INTEL_OFFLOAD if (_ccachex && off_flag && _off_ccache == 0) @@ -407,7 +418,7 @@ void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag, int *ccachej = _ccachej; if (ccachex != NULL && ccachey !=NULL && ccachez != NULL && - ccachew != NULL && ccachei != NULL && ccachej !=NULL) { + ccachew != NULL && ccachei != NULL && ccachej !=NULL) { #pragma offload_transfer target(mic:_cop) \ nocopy(ccachex,ccachey:length(vsize) alloc_if(1) free_if(0)) \ nocopy(ccachez,ccachew:length(vsize) alloc_if(1) free_if(0)) \ @@ -427,6 +438,115 @@ void IntelBuffers<flt_t, acc_t>::grow_ccache(const int off_flag, /* ---------------------------------------------------------------------- */ +template <class flt_t, class acc_t> +void IntelBuffers<flt_t, acc_t>::free_ncache() +{ + if (_ncache_alloc) { + flt_t *ncachex = _ncachex; + flt_t *ncachey = _ncachey; + flt_t *ncachez = _ncachez; + int *ncachej = _ncachej; + int *ncachejtype = _ncachejtype; + + #ifdef _LMP_INTEL_OFFLOAD + if (_off_ncache) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(ncachex,ncachey,ncachez,ncachej:alloc_if(0) free_if(1)) \ + nocopy(ncachejtype:alloc_if(0) free_if(1)) + } + _off_ncache = 0; + #endif + + lmp->memory->destroy(ncachex); + lmp->memory->destroy(ncachey); + lmp->memory->destroy(ncachez); + lmp->memory->destroy(ncachej); + lmp->memory->destroy(ncachejtype); + + _ncache_alloc = 0; + } +} + +/* ---------------------------------------------------------------------- */ + +template <class flt_t, class acc_t> +void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag, + const int nthreads) +{ + const int nsize = get_max_nbors() * 3; + int esize = MIN(sizeof(int), sizeof(flt_t)); + IP_PRE_get_stride(_ncache_stride, nsize, esize, 0); + int nt = MAX(nthreads, _off_threads); + const int vsize = _ncache_stride * nt; + + if (_ncache_alloc) { + if (vsize > _ncache_alloc) + free_ncache(); + #ifdef _LMP_INTEL_OFFLOAD + else if (off_flag && _off_ncache == 0) + free_ncache(); + #endif + else + return; + } + + lmp->memory->create(_ncachex, vsize, "_ncachex"); + lmp->memory->create(_ncachey, vsize, "_ncachey"); + lmp->memory->create(_ncachez, vsize, "_ncachez"); + lmp->memory->create(_ncachej, vsize, "_ncachej"); + lmp->memory->create(_ncachejtype, vsize, "_ncachejtype"); + + _ncache_alloc = vsize; + + #ifdef _LMP_INTEL_OFFLOAD + if (off_flag) { + flt_t *ncachex = _ncachex; + flt_t *ncachey = _ncachey; + flt_t *ncachez = _ncachez; + int *ncachej = _ncachej; + int *ncachejtype = _ncachejtype; + + if (ncachex != NULL && ncachey !=NULL && ncachez != NULL && + ncachej != NULL && ncachejtype != NULL) { + #pragma offload_transfer target(mic:_cop) \ + nocopy(ncachex,ncachey:length(vsize) alloc_if(1) free_if(0)) \ + nocopy(ncachez,ncachej:length(vsize) alloc_if(1) free_if(0)) \ + nocopy(ncachejtype:length(vsize) alloc_if(1) free_if(0)) + } + _off_ncache = 1; + } + #endif +} + +/* ---------------------------------------------------------------------- */ + +#ifndef _LMP_INTEL_OFFLOAD +template <class flt_t, class acc_t> +void IntelBuffers<flt_t, acc_t>::fdotr_reduce_l5(const int lf, const int lt, + const int nthreads, const int f_stride, acc_t &ov0, acc_t &ov1, + acc_t &ov2, acc_t &ov3, acc_t &ov4, acc_t &ov5) +{ + IP_PRE_fdotr_acc_force_l5(lf, lt, 0, nthreads, _f, f_stride, _x, ov0, + ov1, ov2, ov3, ov4, ov5); +} +#endif + +/* ---------------------------------------------------------------------- */ + +#ifndef _LMP_INTEL_OFFLOAD +template <class flt_t, class acc_t> +void IntelBuffers<flt_t, acc_t>::fdotr_reduce(const int nall, + const int nthreads, const int f_stride, acc_t &ov0, acc_t &ov1, + acc_t &ov2, acc_t &ov3, acc_t &ov4, acc_t &ov5) +{ + int iifrom, iito, tid; + IP_PRE_fdotr_acc_force(nall, 0, nthreads, _f, f_stride, _x, 0, 2, + ov0, ov1, ov2, ov3, ov4, ov5); +} +#endif + +/* ---------------------------------------------------------------------- */ + template <class flt_t, class acc_t> void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes) { diff --git a/src/USER-INTEL/intel_buffers.h b/src/USER-INTEL/intel_buffers.h index 3462d013a1d9d637e5a6e285bec1b2a0f24633b0..135309fe44557b8d63b420602405212421adae7a 100644 --- a/src/USER-INTEL/intel_buffers.h +++ b/src/USER-INTEL/intel_buffers.h @@ -62,7 +62,7 @@ class IntelBuffers { void free_buffers(); void free_nmax(); - inline void set_bininfo(int *atombin, int *binpacked) + inline void set_bininfo(int *atombin, int *binpacked) { _atombin = atombin; _binpacked = binpacked; } inline void grow(const int nall, const int nlocal, const int nthreads, const int offload_end) { @@ -78,6 +78,7 @@ class IntelBuffers { free_nbor_list(); free_nmax(); free_list_local(); + free_ncache(); } inline void grow_list(NeighList *list, const int nlocal, const int nthreads, @@ -106,6 +107,15 @@ class IntelBuffers { inline acc_t * get_ccachef() { return _ccachef; } #endif + void free_ncache(); + void grow_ncache(const int off_flag, const int nthreads); + inline int ncache_stride() { return _ncache_stride; } + inline flt_t * get_ncachex() { return _ncachex; } + inline flt_t * get_ncachey() { return _ncachey; } + inline flt_t * get_ncachez() { return _ncachez; } + inline int * get_ncachej() { return _ncachej; } + inline int * get_ncachejtype() { return _ncachejtype; } + inline int get_max_nbors() { int mn = lmp->neighbor->oneatom * sizeof(int) / (INTEL_ONEATOM_FACTOR * INTEL_DATA_ALIGN); @@ -116,7 +126,7 @@ class IntelBuffers { inline void grow_nbor_list(NeighList *list, const int nlocal, const int nthreads, const int offload_end, - const int pack_width) { + const int pack_width) { if (nlocal > _list_alloc_atoms) _grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width); } @@ -155,7 +165,7 @@ class IntelBuffers { inline int get_off_threads() { return _off_threads; } #ifdef _LMP_INTEL_OFFLOAD inline void set_off_params(const int n, const int cop, - const int separate_buffers) + const int separate_buffers) { _off_threads = n; _cop = cop; _separate_buffers = separate_buffers; } inline vec3_acc_t * get_off_f() { return _off_f; } #endif @@ -180,9 +190,18 @@ class IntelBuffers { } } + #ifndef _LMP_INTEL_OFFLOAD + void fdotr_reduce_l5(const int lf, const int lt, const int nthreads, + const int f_stride, acc_t &ov0, acc_t &ov1, + acc_t &ov2, acc_t &ov3, acc_t &ov4, acc_t &ov5); + void fdotr_reduce(const int nall, const int nthreads, const int f_stride, + acc_t &ov0, acc_t &ov1, acc_t &ov2, acc_t &ov3, + acc_t &ov4, acc_t &ov5); + #endif + #ifdef _LMP_INTEL_OFFLOAD inline void thr_pack_cop(const int ifrom, const int ito, - const int offset, const bool dotype = false) { + const int offset, const bool dotype = false) { double ** x = lmp->atom->x + offset; if (dotype == false) { #pragma vector nontemporal @@ -195,16 +214,16 @@ class IntelBuffers { int *type = lmp->atom->type + offset; #pragma vector nontemporal for (int i = ifrom; i < ito; i++) { - _x[i].x = x[i][0]; - _x[i].y = x[i][1]; - _x[i].z = x[i][2]; - _x[i].w = type[i]; + _x[i].x = x[i][0]; + _x[i].y = x[i][1]; + _x[i].z = x[i][2]; + _x[i].w = type[i]; } } } inline void thr_pack_host(const int ifrom, const int ito, - const int offset) { + const int offset) { double ** x = lmp->atom->x + offset; for (int i = ifrom; i < ito; i++) { _host_x[i].x = x[i][0]; @@ -214,13 +233,13 @@ class IntelBuffers { } inline void pack_sep_from_single(const int host_min_local, - const int used_local, - const int host_min_ghost, - const int used_ghost) { + const int used_local, + const int host_min_ghost, + const int used_ghost) { memcpy(_host_x + host_min_local, _x + host_min_local, - used_local * sizeof(atom_t)); + used_local * sizeof(atom_t)); memcpy(_host_x + host_min_local + used_local, _x + host_min_ghost, - used_ghost * sizeof(atom_t)); + used_ghost * sizeof(atom_t)); int nall = used_local + used_ghost + host_min_local; _host_x[nall].x = INTEL_BIGP; _host_x[nall].y = INTEL_BIGP; @@ -228,9 +247,9 @@ class IntelBuffers { _host_x[nall].w = 1; if (lmp->atom->q != NULL) { memcpy(_host_q + host_min_local, _q + host_min_local, - used_local * sizeof(flt_t)); + used_local * sizeof(flt_t)); memcpy(_host_q + host_min_local + used_local, _q + host_min_ghost, - used_ghost * sizeof(flt_t)); + used_ghost * sizeof(flt_t)); } } @@ -263,6 +282,10 @@ class IntelBuffers { int _ccache_stride; flt_t *_ccachex, *_ccachey, *_ccachez, *_ccachew; int *_ccachei, *_ccachej; + + int _ncache_stride, _ncache_alloc; + flt_t *_ncachex, *_ncachey, *_ncachez; + int *_ncachej, *_ncachejtype; #ifdef LMP_USE_AVXCD int _ccache_stride3; acc_t * _ccachef; @@ -274,7 +297,7 @@ class IntelBuffers { flt_t *_host_q; quat_t *_host_quat; vec3_acc_t *_off_f; - int _off_map_nmax, _cop, _off_ccache; + int _off_map_nmax, _cop, _off_ccache, _off_ncache; int *_off_map_ilist; int *_off_map_special, *_off_map_nspecial, *_off_map_tag; int *_off_map_numneigh; @@ -287,7 +310,7 @@ class IntelBuffers { _alignvar(acc_t _ev_global_host[8],64); void _grow(const int nall, const int nlocal, const int nthreads, - const int offload_end); + const int offload_end); void _grow_nmax(const int offload_end); void _grow_list_local(NeighList *list, const int offload_end); void _grow_nbor_list(NeighList *list, const int nlocal, const int nthreads, diff --git a/src/USER-INTEL/intel_intrinsics.h b/src/USER-INTEL/intel_intrinsics.h index 44a9605961f3d64d2d4eaa667f859f7c2aa39777..069eb5bed50b5e7be003e42e59f98bbe003e3a84 100644 --- a/src/USER-INTEL/intel_intrinsics.h +++ b/src/USER-INTEL/intel_intrinsics.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* *- c++ -*- ----------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -46,23 +46,23 @@ struct lmp_intel_an_fvec { lmp_intel_an_fvec(const lmp_intel_an_fvec &a) { data[:] = a.data[:]; } lmp_intel_an_fvec& operator =(const lmp_intel_an_fvec &a) { data[:] = a.data[:]; return *this; } const lmp_intel_an_fvec operator +(const lmp_intel_an_fvec &b) const { - lmp_intel_an_fvec ret = *this; - ret.data[:] += b.data[:]; + lmp_intel_an_fvec ret = *this; + ret.data[:] += b.data[:]; return ret; } const lmp_intel_an_fvec operator -(const lmp_intel_an_fvec &b) const { - lmp_intel_an_fvec ret = *this; - ret.data[:] -= b.data[:]; + lmp_intel_an_fvec ret = *this; + ret.data[:] -= b.data[:]; return ret; } const lmp_intel_an_fvec operator *(const lmp_intel_an_fvec &b) const { - lmp_intel_an_fvec ret = *this; - ret.data[:] *= b.data[:]; + lmp_intel_an_fvec ret = *this; + ret.data[:] *= b.data[:]; return ret; } const lmp_intel_an_fvec operator /(const lmp_intel_an_fvec &b) const { - lmp_intel_an_fvec ret = *this; - ret.data[:] /= b.data[:]; + lmp_intel_an_fvec ret = *this; + ret.data[:] /= b.data[:]; return ret; } lmp_intel_an_fvec& operator +=(const lmp_intel_an_fvec &b) { @@ -103,18 +103,18 @@ struct lmp_intel_an_ivec { explicit lmp_intel_an_ivec(int i) { data[:] = i; } explicit lmp_intel_an_ivec(const int * a) { data[:] = a[0:VL]; } const lmp_intel_an_ivec operator &(const lmp_intel_an_ivec &b) { - lmp_intel_an_ivec ret = *this; - ret.data[:] &= b.data[:]; + lmp_intel_an_ivec ret = *this; + ret.data[:] &= b.data[:]; return ret; } const lmp_intel_an_ivec operator |(const lmp_intel_an_ivec &b) { - lmp_intel_an_ivec ret = *this; - ret.data[:] |= b.data[:]; + lmp_intel_an_ivec ret = *this; + ret.data[:] |= b.data[:]; return ret; } const lmp_intel_an_ivec operator +(const lmp_intel_an_ivec &b) { - lmp_intel_an_ivec ret = *this; - ret.data[:] += b.data[:]; + lmp_intel_an_ivec ret = *this; + ret.data[:] += b.data[:]; return ret; } }; @@ -171,13 +171,13 @@ enum CalculationMode { KNC, AVX, AVX2, SSE, NONE, AN }; // This is used in the selection logic template<CalculationMode mode> -struct vector_traits { - static const bool support_integer_and_gather_ops = true; +struct vector_traits { + static const bool support_integer_and_gather_ops = true; }; template<> -struct vector_traits<AVX> { - static const bool support_integer_and_gather_ops = false; +struct vector_traits<AVX> { + static const bool support_integer_and_gather_ops = false; }; // This is the base template for all the different architectures @@ -198,10 +198,10 @@ struct ivec32x16 { } explicit ivec32x16(int i) { vec = _mm512_set1_epi32(i); } operator __m512i() const { return vec; } - friend ivec32x16 operator &(const ivec32x16 &a, const ivec32x16 &b) { + friend ivec32x16 operator &(const ivec32x16 &a, const ivec32x16 &b) { return _mm512_and_epi32(a, b); } - friend ivec32x16 operator |(const ivec32x16 &a, const ivec32x16 &b) { + friend ivec32x16 operator |(const ivec32x16 &a, const ivec32x16 &b) { return _mm512_or_epi32(a, b); } friend ivec32x16 operator +(const ivec32x16 &a, const ivec32x16 &b) { @@ -326,7 +326,7 @@ struct vector_ops<double, KNC> { *z = gather<1>(*z, mask, idxs, &base->z); *w = int_gather<1>(*w, mask, idxs, &base->w); } - static void gather_8(const ivec &idxs, const bvec &mask, const void *base, + static void gather_8(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) { *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char *>(base) + 0); *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char *>(base) + 8); @@ -337,7 +337,7 @@ struct vector_ops<double, KNC> { *r6 = gather<4>(*r6, mask, idxs, reinterpret_cast<const char *>(base) + 48); *r7 = gather<4>(*r7, mask, idxs, reinterpret_cast<const char *>(base) + 56); } - static void gather_4(const ivec &idxs, const bvec &mask, const void *base, + static void gather_4(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3) { *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char *>(base) + 0); *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char *>(base) + 8); @@ -464,7 +464,7 @@ struct vector_ops<float, KNC> { *z = gather<1>(*z, mask, idxs, &base->z); *w = int_gather<1>(*w, mask, idxs, &base->w); } - static void gather_8(const ivec &idxs, const bvec &mask, const void *base, + static void gather_8(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) { *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char *>(base) + 0); *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char *>(base) + 4); @@ -475,7 +475,7 @@ struct vector_ops<float, KNC> { *r6 = gather<4>(*r6, mask, idxs, reinterpret_cast<const char *>(base) + 24); *r7 = gather<4>(*r7, mask, idxs, reinterpret_cast<const char *>(base) + 28); } - static void gather_4(const ivec &idxs, const bvec &mask, const void *base, + static void gather_4(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3) { *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char *>(base) + 0); *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char *>(base) + 4); @@ -519,10 +519,10 @@ struct ivec32x8 { } explicit ivec32x8(int i) { vec = _mm256_set1_epi32(i); } operator __m256i() const { return vec; } - friend ivec32x8 operator &(const ivec32x8 &a, const ivec32x8 &b) { + friend ivec32x8 operator &(const ivec32x8 &a, const ivec32x8 &b) { return _mm256_castpd_si256(_mm256_and_pd(_mm256_castsi256_pd(a), _mm256_castsi256_pd(b))); } - friend ivec32x8 operator |(const ivec32x8 &a, const ivec32x8 &b) { + friend ivec32x8 operator |(const ivec32x8 &a, const ivec32x8 &b) { return _mm256_castpd_si256(_mm256_or_pd(_mm256_castsi256_pd(a), _mm256_castsi256_pd(b))); } friend ivec32x8 operator +(const ivec32x8 &a, const ivec32x8 &b) { @@ -545,10 +545,10 @@ struct avx_bvec { operator F64vec4() const { return _mm256_castsi256_pd(vec); } operator F32vec8() const { return _mm256_castsi256_ps(vec); } operator ivec32x8() const { return vec; } - friend avx_bvec operator &(const avx_bvec &a, const avx_bvec &b) { + friend avx_bvec operator &(const avx_bvec &a, const avx_bvec &b) { return _mm256_castpd_si256(_mm256_and_pd(_mm256_castsi256_pd(a), _mm256_castsi256_pd(b))); } - friend avx_bvec operator |(const avx_bvec &a, const avx_bvec &b) { + friend avx_bvec operator |(const avx_bvec &a, const avx_bvec &b) { return _mm256_castpd_si256(_mm256_or_pd(_mm256_castsi256_pd(a), _mm256_castsi256_pd(b))); } friend avx_bvec operator ~(const avx_bvec &a) { return _mm256_castpd_si256(_mm256_andnot_pd(_mm256_castsi256_pd(a), _mm256_castsi256_pd(avx_bvec(0xFFFFFFFF)))); } @@ -582,8 +582,8 @@ struct vector_ops<double, AVX> { _mm256_store_si256(reinterpret_cast<__m256i*>(idxs), idx); _mm256_store_pd(reinterpret_cast<double*>(src), from); for (int i = 0; i < VL; i++) { - result[i] = mask_test_at(mask, i) - ? *reinterpret_cast<const double*>(reinterpret_cast<const char*>(base) + scale * idxs[2*i]) + result[i] = mask_test_at(mask, i) + ? *reinterpret_cast<const double*>(reinterpret_cast<const char*>(base) + scale * idxs[2*i]) : src[i]; } return _mm256_load_pd(reinterpret_cast<double*>(result)); @@ -605,18 +605,18 @@ struct vector_ops<double, AVX> { __m256d c1 = _mm256_permute2f128_pd(b1, b3, 0x20); __m256d c2 = _mm256_permute2f128_pd(b0, b2, 0x31); __m256d c3 = _mm256_permute2f128_pd(b1, b3, 0x31); - *x = blend(mask, *x, c0); - *y = blend(mask, *y, c1); - *z = blend(mask, *z, c2); + *x = blend(mask, *x, c0); + *y = blend(mask, *y, c1); + *z = blend(mask, *z, c2); *w = int_blend(mask, *w, _mm256_castps_si256(_mm256_permute_ps(_mm256_castpd_ps(c3), 0xA0))); } - static void gather_8(const ivec &idxs, const bvec &mask, const void *base, + static void gather_8(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) { fvec a = zero(), b = zero(), c = zero(), d = zero(); gather_4(idxs, mask, base, r0, r1, r2, r3); gather_4(idxs, mask, reinterpret_cast<const char*>(base) + 32, r4, r5, r6, r7); } - static void gather_4(const ivec &idxs, const bvec &mask, const void *base, + static void gather_4(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3) { iarr i, m; _mm256_store_si256(reinterpret_cast<__m256i*>(i), idxs); @@ -642,10 +642,10 @@ struct vector_ops<double, AVX> { __m256d c1 = _mm256_permute2f128_pd(b1, b3, 0x20); __m256d c2 = _mm256_permute2f128_pd(b0, b2, 0x31); __m256d c3 = _mm256_permute2f128_pd(b1, b3, 0x31); - *r0 = blend(mask, *r0, c0); - *r1 = blend(mask, *r1, c1); - *r2 = blend(mask, *r2, c2); - *r3 = blend(mask, *r3, c3); + *r0 = blend(mask, *r0, c0); + *r1 = blend(mask, *r1, c1); + *r2 = blend(mask, *r2, c2); + *r3 = blend(mask, *r3, c3); } static fvec blend(const bvec &mask, const fvec &a, const fvec &b) { return (b & mask) | (a & ~ mask); @@ -809,8 +809,8 @@ struct vector_ops<float, AVX> { _mm256_store_si256(reinterpret_cast<__m256i*>(idxs), idx); _mm256_store_ps(reinterpret_cast<float*>(src), from); for (int i = 0; i < VL; i++) { - result[i] = mask_test_at(mask, i) - ? *reinterpret_cast<const float*>(reinterpret_cast<const char*>(base) + scale * idxs[i]) + result[i] = mask_test_at(mask, i) + ? *reinterpret_cast<const float*>(reinterpret_cast<const char*>(base) + scale * idxs[i]) : src[i]; } return _mm256_load_ps(reinterpret_cast<float*>(result)); @@ -842,18 +842,18 @@ struct vector_ops<float, AVX> { __m256 c1 = _mm256_shuffle_ps(b0, b2, 0xEE); __m256 c2 = _mm256_shuffle_ps(b1, b3, 0x44); __m256 c3 = _mm256_shuffle_ps(b1, b3, 0xEE); - *x = blend(mask, *x, c0); - *y = blend(mask, *y, c1); - *z = blend(mask, *z, c2); + *x = blend(mask, *x, c0); + *y = blend(mask, *y, c1); + *z = blend(mask, *z, c2); *w = int_blend(mask, *w, _mm256_castps_si256(c3)); } - static void gather_8(const ivec &idxs, const bvec &mask, const void *base, + static void gather_8(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) { fvec a = zero(), b = zero(), c = zero(), d = zero(); gather_4(idxs, mask, base, r0, r1, r2, r3); gather_4(idxs, mask, reinterpret_cast<const char*>(base) + 16, r4, r5, r6, r7); } - static void gather_4(const ivec &idxs, const bvec &mask, const void *base, + static void gather_4(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3) { iarr i, m; int_store(i, idxs); @@ -880,10 +880,10 @@ struct vector_ops<float, AVX> { __m256 c1 = _mm256_shuffle_ps(b0, b2, 0xEE); __m256 c2 = _mm256_shuffle_ps(b1, b3, 0x44); __m256 c3 = _mm256_shuffle_ps(b1, b3, 0xEE); - *r0 = blend(mask, *r0, c0); - *r1 = blend(mask, *r1, c1); - *r2 = blend(mask, *r2, c2); - *r3 = blend(mask, *r3, c3); + *r0 = blend(mask, *r0, c0); + *r1 = blend(mask, *r1, c1); + *r2 = blend(mask, *r2, c2); + *r3 = blend(mask, *r3, c3); } static fvec blend(const bvec &mask, const fvec &a, const fvec &b) { return (b & mask) | (a & ~ mask); @@ -961,8 +961,8 @@ struct vector_ops<float, AVX> { _mm256_store_si256(reinterpret_cast<__m256i*>(idxs), idx); _mm256_store_si256(reinterpret_cast<__m256i*>(src), from); for (int i = 0; i < VL; i++) { - result[i] = mask_test_at(mask, i) - ? *reinterpret_cast<const int*>(reinterpret_cast<const char*>(base) + scale * idxs[i]) + result[i] = mask_test_at(mask, i) + ? *reinterpret_cast<const int*>(reinterpret_cast<const char*>(base) + scale * idxs[i]) : src[i]; } return _mm256_load_si256(reinterpret_cast<__m256i*>(result)); @@ -1038,10 +1038,10 @@ struct avx2_ivec32 { } explicit avx2_ivec32(int i) { vec = _mm256_set1_epi32(i); } operator __m256i() const { return vec; } - friend avx2_ivec32 operator &(const avx2_ivec32 &a, const avx2_ivec32 &b) { + friend avx2_ivec32 operator &(const avx2_ivec32 &a, const avx2_ivec32 &b) { return _mm256_and_si256(a, b); } - friend avx2_ivec32 operator |(const avx2_ivec32 &a, const avx2_ivec32 &b) { + friend avx2_ivec32 operator |(const avx2_ivec32 &a, const avx2_ivec32 &b) { return _mm256_or_si256(a, b); } friend avx2_ivec32 operator +(const avx2_ivec32 &a, const avx2_ivec32 &b) { @@ -1060,14 +1060,14 @@ struct avx2_bvec { operator F64vec4() const { return _mm256_castsi256_pd(vec); } operator F32vec8() const { return _mm256_castsi256_ps(vec); } operator avx2_ivec32() const { return vec; } - friend avx2_bvec operator &(const avx2_bvec &a, const avx2_bvec &b) { + friend avx2_bvec operator &(const avx2_bvec &a, const avx2_bvec &b) { return _mm256_and_si256(a, b); } - friend avx2_bvec operator |(const avx2_bvec &a, const avx2_bvec &b) { + friend avx2_bvec operator |(const avx2_bvec &a, const avx2_bvec &b) { return _mm256_or_si256(a, b); } friend avx2_bvec operator ~(const avx2_bvec &a) { - return _mm256_andnot_si256(a, avx2_bvec(0xFFFFFFFF)); + return _mm256_andnot_si256(a, avx2_bvec(0xFFFFFFFF)); } avx2_bvec& operator &=(const avx2_bvec &a) { return *this = _mm256_and_si256(vec,a); } }; @@ -1106,13 +1106,13 @@ struct vector_ops<double, AVX2> { *z = _mm256_mask_i32gather_pd(*z, &base->z, _mm256_castsi256_si128(idx1), mask, 1); *w = _mm256_mask_i32gather_epi32(*w, &base->w, idx, mask, 1); } - static void gather_8(const ivec &idxs, const bvec &mask, const void *base, + static void gather_8(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) { fvec a = zero(), b = zero(), c = zero(), d = zero(); gather_4(idxs, mask, base, r0, r1, r2, r3); gather_4(idxs, mask, reinterpret_cast<const char*>(base) + 32, r4, r5, r6, r7); } - static void gather_4(const ivec &idx, const bvec &mask, const void *base, + static void gather_4(const ivec &idx, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3) { ivec idx0 = _mm256_shuffle_epi32(idx, 0xD8); // 11011000 ->3120 ivec idx1 = _mm256_permute4x64_epi64(idx0, 0xD8); @@ -1253,7 +1253,7 @@ struct vector_ops<float, AVX2> { *z = _mm256_mask_i32gather_ps(*z, reinterpret_cast<const float*>(base) + 2, idx, mask, 1); *w = _mm256_mask_i32gather_epi32(*w, reinterpret_cast<const int*>(base) + 3, idx, mask, 1); } - static void gather_8(const ivec &idxs, const bvec &mask, const void *base, + static void gather_8(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) { *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char *>(base) + 0); *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char *>(base) + 4); @@ -1264,7 +1264,7 @@ struct vector_ops<float, AVX2> { *r6 = gather<4>(*r6, mask, idxs, reinterpret_cast<const char *>(base) + 24); *r7 = gather<4>(*r7, mask, idxs, reinterpret_cast<const char *>(base) + 28); } - static void gather_4(const ivec &idxs, const bvec &mask, const void *base, + static void gather_4(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3) { *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char *>(base) + 0); *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char *>(base) + 4); @@ -1401,10 +1401,10 @@ struct ivec32x4 { } explicit ivec32x4(int i) { vec = _mm_set1_epi32(i); } operator __m128i() const { return vec; } - friend ivec32x4 operator &(const ivec32x4 &a, const ivec32x4 &b) { + friend ivec32x4 operator &(const ivec32x4 &a, const ivec32x4 &b) { return _mm_castpd_si128(_mm_and_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b))); } - friend ivec32x4 operator |(const ivec32x4 &a, const ivec32x4 &b) { + friend ivec32x4 operator |(const ivec32x4 &a, const ivec32x4 &b) { return _mm_castpd_si128(_mm_or_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b))); } friend ivec32x4 operator +(const ivec32x4 &a, const ivec32x4 &b) { @@ -1420,10 +1420,10 @@ struct sse_bvecx4 { operator __m128i() const { return vec; } operator F64vec2() const { return _mm_castsi128_pd(vec); } operator ivec32x4() const { return vec; } - friend sse_bvecx4 operator &(const sse_bvecx4 &a, const sse_bvecx4 &b) { + friend sse_bvecx4 operator &(const sse_bvecx4 &a, const sse_bvecx4 &b) { return _mm_castpd_si128(_mm_and_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b))); } - friend sse_bvecx4 operator |(const sse_bvecx4 &a, const sse_bvecx4 &b) { + friend sse_bvecx4 operator |(const sse_bvecx4 &a, const sse_bvecx4 &b) { return _mm_castpd_si128(_mm_or_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(b))); } friend sse_bvecx4 operator ~(const sse_bvecx4 &a) { return _mm_castpd_si128(_mm_andnot_pd(_mm_castsi128_pd(a), _mm_castsi128_pd(sse_bvecx4(0xFFFFFFFF)))); } @@ -1477,18 +1477,18 @@ struct vector_ops<double, SSE> { __m128d c1 = _mm_unpackhi_pd(a0lo, a1lo); __m128d c2 = _mm_unpacklo_pd(a0hi, a1hi); __m128d c3 = _mm_unpackhi_pd(a0hi, a1hi); - *x = blend(mask, *x, c0); - *y = blend(mask, *y, c1); - *z = blend(mask, *z, c2); + *x = blend(mask, *x, c0); + *y = blend(mask, *y, c1); + *z = blend(mask, *z, c2); *w = int_blend(mask, *w, _mm_shuffle_epi32(_mm_castpd_si128(c3), 0xA0)); } - static void gather_8(const ivec &idxs, const bvec &mask, const void *base, + static void gather_8(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) { fvec a = zero(), b = zero(), c = zero(), d = zero(); gather_4(idxs, mask, base, r0, r1, r2, r3); gather_4(idxs, mask, reinterpret_cast<const char*>(base) + 32, r4, r5, r6, r7); } - static void gather_4(const ivec &idxs, const bvec &mask, const void *base, + static void gather_4(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3) { *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char*>(base) + 0); *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char*>(base) + 8); @@ -1634,8 +1634,8 @@ struct vector_ops<float, SSE> { _mm_store_si128(reinterpret_cast<__m128i*>(idxs), idx); _mm_store_ps(reinterpret_cast<float*>(src), from); for (int i = 0; i < VL; i++) { - result[i] = m[i] - ? *reinterpret_cast<const float*>(reinterpret_cast<const char*>(base) + scale * idxs[i]) + result[i] = m[i] + ? *reinterpret_cast<const float*>(reinterpret_cast<const char*>(base) + scale * idxs[i]) : src[i]; } return _mm_load_ps(reinterpret_cast<float*>(result)); @@ -1647,13 +1647,13 @@ struct vector_ops<float, SSE> { *z = gather<1>(*z, mask, idxs, &base->z); *w = int_gather<1>(*w, mask, idxs, &base->w); } - static void gather_8(const ivec &idxs, const bvec &mask, const void *base, + static void gather_8(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) { fvec a = zero(), b = zero(), c = zero(), d = zero(); gather_4(idxs, mask, base, r0, r1, r2, r3); gather_4(idxs, mask, reinterpret_cast<const char*>(base) + 16, r4, r5, r6, r7); } - static void gather_4(const ivec &idxs, const bvec &mask, const void *base, + static void gather_4(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3) { *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char*>(base) + 0); *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char*>(base) + 4); @@ -1816,13 +1816,13 @@ struct vector_ops<flt_t, NONE> { *z = gather<1>(*z, mask, idxs, &base->z); *w = int_gather<1>(*w, mask, idxs, &base->w); } - static void gather_8(const ivec &idxs, const bvec &mask, const void *base, + static void gather_8(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) { fvec a = zero(), b = zero(), c = zero(), d = zero(); gather_4(idxs, mask, base, r0, r1, r2, r3); gather_4(idxs, mask, reinterpret_cast<const char*>(base) + 4 * sizeof(fscal), r4, r5, r6, r7); } - static void gather_4(const ivec &idxs, const bvec &mask, const void *base, + static void gather_4(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3) { *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char*>(base) + 0 * sizeof(fscal)); *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char*>(base) + 1 * sizeof(fscal)); @@ -1946,13 +1946,13 @@ struct vector_ops<flt_t, AN> { *z = gather<1>(*z, mask, idxs, &base->z); *w = int_gather<1>(*w, mask, idxs, &base->w); } - static void gather_8(const ivec &idxs, const bvec &mask, const void *base, + static void gather_8(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) { fvec a = zero(), b = zero(), c = zero(), d = zero(); gather_4(idxs, mask, base, r0, r1, r2, r3); gather_4(idxs, mask, reinterpret_cast<const char*>(base) + 4 * sizeof(fscal), r4, r5, r6, r7); } - static void gather_4(const ivec &idxs, const bvec &mask, const void *base, + static void gather_4(const ivec &idxs, const bvec &mask, const void *base, fvec *r0, fvec *r1, fvec *r2, fvec *r3) { *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char*>(base) + 0 * sizeof(fscal)); *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char*>(base) + 1 * sizeof(fscal)); @@ -2113,7 +2113,7 @@ struct AccumulatorTwiceMixin { typedef avec_t avec; typedef typename HIGH::fscal aarr[BASE::VL] __attribute__((aligned(BASE::ALIGN))); - + static avec acc_mask_add(const avec &src, const typename BASE::bvec &m, const avec &a, const typename BASE::fvec &b) { typename HIGH::fvec blo = BASE::cvtup_lo(b); typename HIGH::fvec bhi = BASE::cvtup_hi(b); @@ -2121,7 +2121,7 @@ struct AccumulatorTwiceMixin { BASE::mask_cvtup(m, &mlo, &mhi); return avec(HIGH::mask_add(src.lo, mlo, a.lo, blo), HIGH::mask_add(src.hi, mhi, a.hi, bhi)); } - + static typename HIGH::fscal acc_reduce_add(const avec &a) { return HIGH::reduce_add(a.lo + a.hi); } @@ -2143,13 +2143,13 @@ template<class BASE_flt_t, class HIGH_flt_t, CalculationMode mic> struct AccumulatorTwiceMixinNone { typedef vector_ops<BASE_flt_t, mic> BASE; typedef vector_ops<HIGH_flt_t, mic> HIGH; - + typedef typename HIGH::fvec avec; typedef typename HIGH::fscal aarr[BASE::VL]; - + static avec acc_mask_add(const avec &src, const typename BASE::bvec &m, const avec &a, const typename BASE::fvec &b) { return HIGH::mask_add(src, m, a, static_cast<typename HIGH::fvec>(b)); - } + } static typename HIGH::fscal acc_reduce_add(const avec &a) { return HIGH::reduce_add(a); } diff --git a/src/USER-INTEL/intel_preprocess.h b/src/USER-INTEL/intel_preprocess.h index ad07dfd7c2cd86251bcfa016b7a1e3574f85d2a8..d5cf6f5be2cee5cb5b15c876d08cdd5da81441b0 100644 --- a/src/USER-INTEL/intel_preprocess.h +++ b/src/USER-INTEL/intel_preprocess.h @@ -17,6 +17,9 @@ #ifdef __INTEL_COMPILER #define LMP_SIMD_COMPILER +#if (__INTEL_COMPILER_BUILD_DATE > 20160720) +#define LMP_INTEL_USE_SIMDOFF +#endif #endif #ifdef __INTEL_OFFLOAD @@ -65,7 +68,10 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, #define INTEL_MAX_STENCIL 256 // INTEL_MAX_STENCIL * sqrt(INTEL_MAX_STENCIL) #define INTEL_MAX_STENCIL_CHECK 4096 -#define INTEL_P3M_MAXORDER 5 +#define INTEL_P3M_MAXORDER 7 +#define INTEL_P3M_ALIGNED_MAXORDER 8 +// PRECOMPUTE VALUES IN TABLE (DOESN'T AFFECT ACCURACY) +#define INTEL_P3M_TABLE 1 #ifdef __INTEL_COMPILER #ifdef __AVX__ @@ -87,7 +93,12 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, #ifdef __MIC__ #define INTEL_V512 1 #define INTEL_VMASK 1 +#define INTEL_HTHREADS 4 +#endif #endif + +#ifdef __AVX512ER__ +#define INTEL_HTHREADS 4 #endif #ifdef __AVX512CD__ @@ -96,15 +107,22 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, #endif #endif +#ifdef __MIC__ +#define INTEL_COMPILE_WIDTH INTEL_MIC_VECTOR_WIDTH +#else +#define INTEL_COMPILE_WIDTH INTEL_VECTOR_WIDTH +#endif + #else #undef INTEL_VECTOR_WIDTH #define INTEL_VECTOR_WIDTH 1 +#define INTEL_COMPILE_WIDTH 1 #endif #define INTEL_DATA_ALIGN 64 -#define INTEL_ONEATOM_FACTOR 2 +#define INTEL_ONEATOM_FACTOR 1 #define INTEL_MIC_NBOR_PAD INTEL_MIC_VECTOR_WIDTH #define INTEL_NBOR_PAD INTEL_VECTOR_WIDTH #define INTEL_LB_MEAN_WEIGHT 0.1 @@ -112,104 +130,380 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR, #define INTEL_MAX_HOST_CORE_COUNT 512 #define INTEL_MAX_COI_CORES 36 -#define IP_PRE_get_stride(stride, n, datasize, torque) \ - { \ - int blength = n; \ - if (torque) blength *= 2; \ - const int bytes = blength * datasize; \ +#ifndef INTEL_HTHREADS +#define INTEL_HTHREADS 2 +#endif + +#define IP_PRE_get_stride(stride, n, datasize, torque) \ + { \ + int blength = n; \ + if (torque) blength *= 2; \ + const int bytes = blength * datasize; \ stride = INTEL_DATA_ALIGN - (bytes % INTEL_DATA_ALIGN); \ - stride = blength + stride / datasize; \ + stride = blength + stride / datasize; \ } #if defined(_OPENMP) -#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \ - { \ - const int idelta = 1 + inum/nthreads; \ - ifrom = tid * idelta; \ - ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; \ +#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \ + { \ + int idelta = inum/nthreads; \ + const int imod = inum % nthreads; \ + ifrom = tid * idelta; \ + ito = ifrom + idelta; \ + if (tid < imod) { \ + ito+=tid+1; \ + ifrom+=tid; \ + } else { \ + ito+=imod; \ + ifrom+=imod; \ + } \ + } + +#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \ + { \ + tid = omp_get_thread_num(); \ + IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads); \ + } + +#define IP_PRE_omp_stride(ifrom, ip, ito, tid, inum, nthr) \ + { \ + if (nthr <= INTEL_HTHREADS) { \ + ifrom = tid; \ + ito = inum; \ + ip = nthr; \ + } else if (nthr % INTEL_HTHREADS == 0) { \ + int nd = nthr / INTEL_HTHREADS; \ + int td = tid / INTEL_HTHREADS; \ + int tm = tid % INTEL_HTHREADS; \ + IP_PRE_omp_range(ifrom, ito, td, inum, nd); \ + ifrom += tm; \ + ip = INTEL_HTHREADS; \ + } else { \ + IP_PRE_omp_range(ifrom, ito, tid, inum, nthr); \ + ip = 1; \ + } \ } -#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \ - { \ - tid = omp_get_thread_num(); \ - IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads); \ +#define IP_PRE_omp_stride_id(ifrom, ip, ito, tid, inum, nthr) \ + { \ + tid = omp_get_thread_num(); \ + IP_PRE_omp_stride(ifrom, ip, ito, tid, inum, nthr); \ } #define IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \ datasize) \ { \ int chunk_size = INTEL_DATA_ALIGN / datasize; \ - int idelta = static_cast<int>(static_cast<float>(inum) \ - /chunk_size/nthreads) + 1; \ - idelta *= chunk_size; \ + int idelta = static_cast<int>(ceil(static_cast<float>(inum) \ + /chunk_size/nthreads)); \ + idelta *= chunk_size; \ ifrom = tid*idelta; \ ito = ifrom + idelta; \ if (ito > inum) ito = inum; \ } #define IP_PRE_omp_range_id_align(ifrom, ito, tid, inum, \ - nthreads, datasize) \ - { \ - tid = omp_get_thread_num(); \ + nthreads, datasize) \ + { \ + tid = omp_get_thread_num(); \ IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \ - datasize); \ + datasize); \ } #define IP_PRE_omp_range_id_vec(ifrom, ito, tid, inum, \ - nthreads, vecsize) \ - { \ - tid = omp_get_thread_num(); \ - int idelta = static_cast<int>(ceil(static_cast<float>(inum) \ - /vecsize/nthreads)); \ - idelta *= vecsize; \ - ifrom = tid*idelta; \ - ito = ifrom + idelta; \ - if (ito > inum) ito = inum; \ + nthreads, vecsize) \ + { \ + tid = omp_get_thread_num(); \ + int idelta = static_cast<int>(ceil(static_cast<float>(inum) \ + /vecsize/nthreads)); \ + idelta *= vecsize; \ + ifrom = tid*idelta; \ + ito = ifrom + idelta; \ + if (ito > inum) ito = inum; \ + } + +#define IP_PRE_omp_stride_id_vec(ifrom, ip, ito, tid, inum, \ + nthr, vecsize) \ + { \ + tid = omp_get_thread_num(); \ + if (nthr <= INTEL_HTHREADS) { \ + ifrom = tid*vecsize; \ + ito = inum; \ + ip = nthr*vecsize; \ + } else if (nthr % INTEL_HTHREADS == 0) { \ + int nd = nthr / INTEL_HTHREADS; \ + int td = tid / INTEL_HTHREADS; \ + int tm = tid % INTEL_HTHREADS; \ + IP_PRE_omp_range_id_vec(ifrom, ito, td, inum, nd, \ + vecsize); \ + ifrom += tm * vecsize; \ + ip = INTEL_HTHREADS * vecsize; \ + } else { \ + IP_PRE_omp_range_id_vec(ifrom, ito, tid, inum, nthr, \ + vecsize); \ + ip = vecsize; \ + } \ } #else -#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \ - { \ - ifrom = 0; \ - ito = inum; \ +#define IP_PRE_omp_range(ifrom, ito, tid, inum, nthreads) \ + { \ + ifrom = 0; \ + ito = inum; \ } -#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \ - { \ - tid = 0; \ - ifrom = 0; \ - ito = inum; \ +#define IP_PRE_omp_range_id(ifrom, ito, tid, inum, nthreads) \ + { \ + tid = 0; \ + ifrom = 0; \ + ito = inum; \ + } + +#define IP_PRE_omp_range(ifrom, ip, ito, tid, inum, nthreads) \ + { \ + ifrom = 0; \ + ito = inum; \ + ip = 1; \ + } + +#define IP_PRE_omp_stride_id(ifrom, ip, ito, tid, inum, nthr) \ + { \ + tid = 0; \ + ifrom = 0; \ + ito = inum; \ + ip = 1; \ } #define IP_PRE_omp_range_align(ifrom, ito, tid, inum, nthreads, \ datasize) \ { \ - ifrom = 0; \ - ito = inum; \ + ifrom = 0; \ + ito = inum; \ } #define IP_PRE_omp_range_id_align(ifrom, ito, tid, inum, \ - nthreads, datasize) \ -{ \ - tid = 0; \ - ifrom = 0; \ - ito = inum; \ + nthreads, datasize) \ +{ \ + tid = 0; \ + ifrom = 0; \ + ito = inum; \ } #define IP_PRE_omp_range_id_vec(ifrom, ito, tid, inum, \ - nthreads, vecsize) \ - { \ - tid = 0; \ - int idelta = static_cast<int>(ceil(static_cast<float>(inum) \ - /vecsize)); \ - ifrom = 0; \ - ito = inum; \ + nthreads, vecsize) \ + { \ + tid = 0; \ + ifrom = 0; \ + ito = inum; \ + } + +#define IP_PRE_omp_range_id_vec(ifrom, ip, ito, tid, inum, \ + nthreads, vecsize) \ + { \ + tid = 0; \ + ifrom = 0; \ + ito = inum; \ + ip = vecsize; \ } #endif +#define IP_PRE_fdotr_acc_force_l5(lf, lt, minlocal, nthreads, f_start, \ + f_stride, pos, ov0, ov1, ov2, \ + ov3, ov4, ov5) \ +{ \ + acc_t *f_scalar = &f_start[0].x; \ + flt_t *x_scalar = &pos[minlocal].x; \ + int f_stride4 = f_stride * 4; \ + _alignvar(acc_t ovv[INTEL_COMPILE_WIDTH],64); \ + int vwidth; \ + if (sizeof(acc_t) == sizeof(double)) \ + vwidth = INTEL_COMPILE_WIDTH/2; \ + else \ + vwidth = INTEL_COMPILE_WIDTH; \ + if (vwidth < 4) vwidth = 4; \ + _use_simd_pragma("vector aligned") \ + _use_simd_pragma("simd") \ + for (int v = 0; v < vwidth; v++) ovv[v] = (acc_t)0.0; \ + int remainder = lt % vwidth; \ + if (lf > lt) remainder = 0; \ + const int v_range = lt - remainder; \ + if (nthreads == 2) { \ + acc_t *f_scalar2 = f_scalar + f_stride4; \ + for (int n = lf; n < v_range; n += vwidth) { \ + _use_simd_pragma("vector aligned") \ + _use_simd_pragma("simd") \ + for (int v = 0; v < vwidth; v++) { \ + f_scalar[n+v] += f_scalar2[n+v]; \ + ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \ + } \ + ov3 += f_scalar[n+1] * x_scalar[n+0]; \ + ov4 += f_scalar[n+2] * x_scalar[n+0]; \ + ov5 += f_scalar[n+2] * x_scalar[n+1]; \ + if (vwidth > 4) { \ + ov3 += f_scalar[n+5] * x_scalar[n+4]; \ + ov4 += f_scalar[n+6] * x_scalar[n+4]; \ + ov5 += f_scalar[n+6] * x_scalar[n+5]; \ + } \ + if (vwidth > 8) { \ + ov3 += f_scalar[n+9] * x_scalar[n+8]; \ + ov3 += f_scalar[n+13] * x_scalar[n+12]; \ + ov4 += f_scalar[n+10] * x_scalar[n+8]; \ + ov4 += f_scalar[n+14] * x_scalar[n+12]; \ + ov5 += f_scalar[n+10] * x_scalar[n+9]; \ + ov5 += f_scalar[n+14] * x_scalar[n+13]; \ + } \ + } \ + _use_simd_pragma("vector aligned") \ + _use_simd_pragma("ivdep") \ + _use_simd_pragma("loop_count min(4) max(INTEL_COMPILE_WIDTH)") \ + for (int n = v_range; n < lt; n++) \ + f_scalar[n] += f_scalar2[n]; \ + } else if (nthreads==4) { \ + acc_t *f_scalar2 = f_scalar + f_stride4; \ + acc_t *f_scalar3 = f_scalar2 + f_stride4; \ + acc_t *f_scalar4 = f_scalar3 + f_stride4; \ + for (int n = lf; n < v_range; n += vwidth) { \ + _use_simd_pragma("vector aligned") \ + _use_simd_pragma("simd") \ + for (int v = 0; v < vwidth; v++) { \ + f_scalar[n+v] += f_scalar2[n+v] + f_scalar3[n+v] + \ + f_scalar4[n+v]; \ + ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \ + } \ + ov3 += f_scalar[n+1] * x_scalar[n+0]; \ + ov4 += f_scalar[n+2] * x_scalar[n+0]; \ + ov5 += f_scalar[n+2] * x_scalar[n+1]; \ + if (vwidth > 4) { \ + ov3 += f_scalar[n+5] * x_scalar[n+4]; \ + ov4 += f_scalar[n+6] * x_scalar[n+4]; \ + ov5 += f_scalar[n+6] * x_scalar[n+5]; \ + } \ + if (vwidth > 8) { \ + ov3 += f_scalar[n+9] * x_scalar[n+8]; \ + ov3 += f_scalar[n+13] * x_scalar[n+12]; \ + ov4 += f_scalar[n+10] * x_scalar[n+8]; \ + ov4 += f_scalar[n+14] * x_scalar[n+12]; \ + ov5 += f_scalar[n+10] * x_scalar[n+9]; \ + ov5 += f_scalar[n+14] * x_scalar[n+13]; \ + } \ + } \ + _use_simd_pragma("vector aligned") \ + _use_simd_pragma("ivdep") \ + _use_simd_pragma("loop_count min(4) max(INTEL_COMPILE_WIDTH)") \ + for (int n = v_range; n < lt; n++) \ + f_scalar[n] += f_scalar2[n] + f_scalar3[n] + f_scalar4[n]; \ + } else if (nthreads==1) { \ + for (int n = lf; n < v_range; n += vwidth) { \ + _use_simd_pragma("vector aligned") \ + _use_simd_pragma("simd") \ + for (int v = 0; v < vwidth; v++) \ + ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \ + ov3 += f_scalar[n+1] * x_scalar[n+0]; \ + ov4 += f_scalar[n+2] * x_scalar[n+0]; \ + ov5 += f_scalar[n+2] * x_scalar[n+1]; \ + if (vwidth > 4) { \ + ov3 += f_scalar[n+5] * x_scalar[n+4]; \ + ov4 += f_scalar[n+6] * x_scalar[n+4]; \ + ov5 += f_scalar[n+6] * x_scalar[n+5]; \ + } \ + if (vwidth > 8) { \ + ov3 += f_scalar[n+9] * x_scalar[n+8]; \ + ov3 += f_scalar[n+13] * x_scalar[n+12]; \ + ov4 += f_scalar[n+10] * x_scalar[n+8]; \ + ov4 += f_scalar[n+14] * x_scalar[n+12]; \ + ov5 += f_scalar[n+10] * x_scalar[n+9]; \ + ov5 += f_scalar[n+14] * x_scalar[n+13]; \ + } \ + } \ + } else if (nthreads==3) { \ + acc_t *f_scalar2 = f_scalar + f_stride4; \ + acc_t *f_scalar3 = f_scalar2 + f_stride4; \ + for (int n = lf; n < v_range; n += vwidth) { \ + _use_simd_pragma("vector aligned") \ + _use_simd_pragma("simd") \ + for (int v = 0; v < vwidth; v++) { \ + f_scalar[n+v] += f_scalar2[n+v] + f_scalar3[n+v]; \ + ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \ + } \ + ov3 += f_scalar[n+1] * x_scalar[n+0]; \ + ov4 += f_scalar[n+2] * x_scalar[n+0]; \ + ov5 += f_scalar[n+2] * x_scalar[n+1]; \ + if (vwidth > 4) { \ + ov3 += f_scalar[n+5] * x_scalar[n+4]; \ + ov4 += f_scalar[n+6] * x_scalar[n+4]; \ + ov5 += f_scalar[n+6] * x_scalar[n+5]; \ + } \ + if (vwidth > 8) { \ + ov3 += f_scalar[n+9] * x_scalar[n+8]; \ + ov3 += f_scalar[n+13] * x_scalar[n+12]; \ + ov4 += f_scalar[n+10] * x_scalar[n+8]; \ + ov4 += f_scalar[n+14] * x_scalar[n+12]; \ + ov5 += f_scalar[n+10] * x_scalar[n+9]; \ + ov5 += f_scalar[n+14] * x_scalar[n+13]; \ + } \ + } \ + _use_simd_pragma("vector aligned") \ + _use_simd_pragma("ivdep") \ + _use_simd_pragma("loop_count min(4) max(INTEL_COMPILE_WIDTH)") \ + for (int n = v_range; n < lt; n++) \ + f_scalar[n] += f_scalar2[n] + f_scalar3[n]; \ + } \ + for (int n = v_range; n < lt; n += 4) { \ + _use_simd_pragma("vector aligned") \ + _use_simd_pragma("ivdep") \ + for (int v = 0; v < 4; v++) \ + ovv[v] += f_scalar[n+v] * x_scalar[n+v]; \ + ov3 += f_scalar[n+1] * x_scalar[n+0]; \ + ov4 += f_scalar[n+2] * x_scalar[n+0]; \ + ov5 += f_scalar[n+2] * x_scalar[n+1]; \ + } \ + ov0 += ovv[0]; \ + ov1 += ovv[1]; \ + ov2 += ovv[2]; \ + if (vwidth > 4) { \ + ov0 += ovv[4]; \ + ov1 += ovv[5]; \ + ov2 += ovv[6]; \ + } \ + if (vwidth > 8) { \ + ov0 += ovv[8] + ovv[12]; \ + ov1 += ovv[9] + ovv[13]; \ + ov2 += ovv[10] + ovv[14]; \ + } \ +} + +#define IP_PRE_fdotr_acc_force(nall, minlocal, nthreads, f_start, \ + f_stride, pos, offload, vflag, ov0, ov1, \ + ov2, ov3, ov4, ov5) \ +{ \ + int o_range = (nall - minlocal) * 4; \ + IP_PRE_omp_range_id_align(iifrom, iito, tid, o_range, nthreads, \ + sizeof(acc_t)); \ + \ + acc_t *f_scalar = &f_start[0].x; \ + int f_stride4 = f_stride * 4; \ + int t; \ + if (vflag == 2) t = 4; else t = 1; \ + acc_t *f_scalar2 = f_scalar + f_stride4 * t; \ + for ( ; t < nthreads; t++) { \ + _use_simd_pragma("vector aligned") \ + _use_simd_pragma("simd") \ + for (int n = iifrom; n < iito; n++) \ + f_scalar[n] += f_scalar2[n]; \ + f_scalar2 += f_stride4; \ + } \ + \ + if (vflag == 2) { \ + int nt_min = MIN(4,nthreads); \ + IP_PRE_fdotr_acc_force_l5(iifrom, iito, minlocal, nt_min, f_start, \ + f_stride, pos, ov0, ov1, ov2, ov3, ov4, \ + ov5); \ + } \ +} + #ifdef _LMP_INTEL_OFFLOAD #include <sys/time.h> @@ -223,120 +517,131 @@ inline double MIC_Wtime() { return time; } -#define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \ - nlocal, nall) \ -{ \ - if (fix->separate_buffers() && ago != 0) { \ - fix->start_watch(TIME_PACK); \ - if (offload) { \ - _use_omp_pragma("omp parallel default(none) shared(buffers,nlocal,nall)") \ - { \ - int ifrom, ito, tid; \ - int nthreads = comm->nthreads; \ - IP_PRE_omp_range_id_align(ifrom, ito, tid, nlocal, \ - nthreads, sizeof(flt_t)); \ - buffers->thr_pack_cop(ifrom, ito, 0); \ - int nghost = nall - nlocal; \ - if (nghost) { \ - IP_PRE_omp_range_align(ifrom, ito, tid, nall - nlocal, \ - nthreads, sizeof(flt_t)); \ - buffers->thr_pack_cop(ifrom + nlocal, ito + nlocal, \ - fix->offload_min_ghost() - nlocal, \ - ago == 1); \ - } \ - } \ - } else { \ - buffers->thr_pack_host(fix->host_min_local(), nlocal, 0); \ - buffers->thr_pack_host(nlocal, nall, \ - fix->host_min_ghost()-nlocal); \ - } \ - fix->stop_watch(TIME_PACK); \ - } \ +#define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \ + nlocal, nall) \ +{ \ + if (fix->separate_buffers() && ago != 0) { \ + fix->start_watch(TIME_PACK); \ + if (offload) { \ + int packthreads; \ + if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;\ + else packthreads = 1; \ + _use_omp_pragma("omp parallel if(packthreads > 1)") \ + { \ + int ifrom, ito, tid; \ + IP_PRE_omp_range_id_align(ifrom, ito, tid, nlocal, \ + packthreads, sizeof(flt_t)); \ + buffers->thr_pack_cop(ifrom, ito, 0); \ + int nghost = nall - nlocal; \ + if (nghost) { \ + IP_PRE_omp_range_align(ifrom, ito, tid, nall - nlocal, \ + packthreads, sizeof(flt_t)); \ + buffers->thr_pack_cop(ifrom + nlocal, ito + nlocal, \ + fix->offload_min_ghost() - nlocal, \ + ago == 1); \ + } \ + } \ + } else { \ + buffers->thr_pack_host(fix->host_min_local(), nlocal, 0); \ + buffers->thr_pack_host(nlocal, nall, \ + fix->host_min_ghost()-nlocal); \ + } \ + fix->stop_watch(TIME_PACK); \ + } \ } -#define IP_PRE_get_transfern(ago, newton, evflag, eflag, vflag, \ - buffers, offload, fix, separate_flag, \ - x_size, q_size, ev_size, f_stride) \ -{ \ - separate_flag = 0; \ - if (ago == 0) { \ - x_size = 0; \ - q_size = nall; \ - if (offload) { \ - if (fix->separate_buffers()) { \ - if (lmp->atom->torque) \ - separate_flag = 2; \ - else \ - separate_flag = 1; \ - } else \ - separate_flag = 3; \ - } \ - } else { \ - x_size = nall; \ - q_size = 0; \ - } \ - ev_size = 0; \ - if (evflag) { \ - if (eflag) ev_size = 2; \ - if (vflag) ev_size = 8; \ - } \ - int f_length; \ - if (newton) \ - f_length = nall; \ - else \ - f_length = nlocal; \ - f_length -= minlocal; \ - f_stride = buffers->get_stride(f_length); \ +#define IP_PRE_get_transfern(ago, newton, eflag, vflag, \ + buffers, offload, fix, separate_flag, \ + x_size, q_size, ev_size, f_stride) \ +{ \ + separate_flag = 0; \ + if (ago == 0) { \ + x_size = 0; \ + q_size = nall; \ + if (offload) { \ + if (fix->separate_buffers()) { \ + if (lmp->atom->torque) \ + separate_flag = 2; \ + else \ + separate_flag = 1; \ + } else \ + separate_flag = 3; \ + } \ + } else { \ + x_size = nall; \ + q_size = 0; \ + } \ + ev_size = 0; \ + if (eflag) ev_size = 2; \ + if (vflag) ev_size = 8; \ + if (newton) \ + f_stride = buffers->get_stride(nall); \ + else \ + f_stride = buffers->get_stride(inum); \ } -#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \ - ev_global) \ -{ \ - if (offload) { \ - tc = buffers->get_off_threads(); \ - f_start = buffers->get_off_f(); \ - ev_global = buffers->get_ev_global(); \ - } else { \ - tc = comm->nthreads; \ - f_start = buffers->get_f(); \ - fix->start_watch(TIME_HOST_PAIR); \ - ev_global = buffers->get_ev_global_host(); \ - } \ +#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \ + ev_global) \ +{ \ + if (offload) { \ + tc = buffers->get_off_threads(); \ + f_start = buffers->get_off_f(); \ + ev_global = buffers->get_ev_global(); \ + } else { \ + tc = comm->nthreads; \ + f_start = buffers->get_f(); \ + fix->start_watch(TIME_HOST_PAIR); \ + ev_global = buffers->get_ev_global_host(); \ + } \ } -#define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \ - f_stride, x, q) \ -{ \ - if (separate_flag) { \ - if (separate_flag < 3) { \ - int all_local = nlocal; \ - int ghost_min = overflow[LMP_GHOST_MIN]; \ - nlocal = overflow[LMP_LOCAL_MAX] + 1; \ - int nghost = overflow[LMP_GHOST_MAX] + 1 - ghost_min; \ - if (nghost < 0) nghost = 0; \ - nall = nlocal + nghost; \ - separate_flag--; \ - int flength; \ - if (newton) flength = nall; \ - else flength = nlocal; \ - IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), \ - separate_flag); \ - if (nghost) { \ - if (nlocal < all_local || ghost_min > all_local) { \ - memmove(x + nlocal, x + ghost_min, \ - (nall - nlocal) * sizeof(ATOM_T)); \ - if (q != 0) \ - memmove((void *)(q + nlocal), (void *)(q + ghost_min), \ - (nall - nlocal) * sizeof(flt_t)); \ - } \ - } \ - } \ - x[nall].x = INTEL_BIGP; \ - x[nall].y = INTEL_BIGP; \ - x[nall].z = INTEL_BIGP; \ - } \ +#define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \ + f_stride, x, q) \ +{ \ + if (separate_flag) { \ + if (separate_flag < 3) { \ + int all_local = nlocal; \ + int ghost_min = overflow[LMP_GHOST_MIN]; \ + nlocal = overflow[LMP_LOCAL_MAX] + 1; \ + int nghost = overflow[LMP_GHOST_MAX] + 1 - ghost_min; \ + if (nghost < 0) nghost = 0; \ + nall = nlocal + nghost; \ + separate_flag--; \ + int flength; \ + if (newton) flength = nall; \ + else flength = nlocal; \ + IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), \ + separate_flag); \ + if (nghost) { \ + if (nlocal < all_local || ghost_min > all_local) { \ + memmove(x + nlocal, x + ghost_min, \ + (nall - nlocal) * sizeof(ATOM_T)); \ + if (q != 0) \ + memmove((void *)(q + nlocal), (void *)(q + ghost_min), \ + (nall - nlocal) * sizeof(flt_t)); \ + } \ + } \ + } \ + x[nall].x = INTEL_BIGP; \ + x[nall].y = INTEL_BIGP; \ + x[nall].z = INTEL_BIGP; \ + } \ } +#define IP_PRE_fdotr_reduce_omp(newton, nall, minlocal, nthreads, \ + f_start, f_stride, x, offload, vflag, \ + ov0, ov1, ov2, ov3, ov4, ov5) \ +{ \ + if (newton) { \ + _use_omp_pragma("omp barrier"); \ + IP_PRE_fdotr_acc_force(nall, minlocal, nthreads, f_start, \ + f_stride, x, offload, vflag, ov0, ov1, ov2, \ + ov3, ov4, ov5); \ + } \ +} + +#define IP_PRE_fdotr_reduce(newton, nall, nthreads, f_stride, vflag, \ + ov0, ov1, ov2, ov3, ov4, ov5) #else @@ -344,265 +649,242 @@ inline double MIC_Wtime() { #define IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, \ nlocal, nall) -#define IP_PRE_get_transfern(ago, newton, evflag, eflag, vflag, \ - buffers, offload, fix, separate_flag, \ - x_size, q_size, ev_size, f_stride) \ +#define IP_PRE_get_transfern(ago, newton, eflag, vflag, \ + buffers, offload, fix, separate_flag, \ + x_size, q_size, ev_size, f_stride) \ { \ - separate_flag = 0; \ + separate_flag = 0; \ int f_length; \ if (newton) \ f_length = nall; \ else \ f_length = nlocal; \ - f_stride = buffers->get_stride(f_length); \ + f_stride = buffers->get_stride(f_length); \ } -#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \ - ev_global) \ -{ \ - tc = comm->nthreads; \ - f_start = buffers->get_f(); \ - fix->start_watch(TIME_HOST_PAIR); \ - ev_global = buffers->get_ev_global_host(); \ +#define IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, \ + ev_global) \ +{ \ + tc = comm->nthreads; \ + f_start = buffers->get_f(); \ + fix->start_watch(TIME_HOST_PAIR); \ + ev_global = buffers->get_ev_global_host(); \ } -#define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \ - f_stride, x, q) +#define IP_PRE_repack_for_offload(newton, separate_flag, nlocal, nall, \ + f_stride, x, q) +#define IP_PRE_fdotr_reduce_omp(newton, nall, minlocal, nthreads, \ + f_start, f_stride, x, offload, vflag, \ + ov0, ov1, ov2, ov3, ov4, ov5) \ +{ \ + if (newton) { \ + if (vflag == 2 && nthreads > INTEL_HTHREADS) { \ + _use_omp_pragma("omp barrier"); \ + buffers->fdotr_reduce(nall, nthreads, f_stride, ov0, ov1, ov2, \ + ov3, ov4, ov5); \ + } \ + } \ +} + +#define IP_PRE_fdotr_reduce(newton, nall, nthreads, f_stride, vflag, \ + ov0, ov1, ov2, ov3, ov4, ov5) \ +{ \ + if (newton) { \ + if (vflag == 2 && nthreads <= INTEL_HTHREADS) { \ + int lt = nall * 4; \ + buffers->fdotr_reduce_l5(0, lt, nthreads, f_stride, ov0, ov1, \ + ov2, ov3, ov4, ov5); \ + } \ + } \ +} #endif -#define IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz) \ +#define IP_PRE_ev_tally_nbor(vflag, fpair, delx, dely, delz) \ { \ if (vflag == 1) { \ - sv0 += ev_pre * delx * delx * fpair; \ - sv1 += ev_pre * dely * dely * fpair; \ - sv2 += ev_pre * delz * delz * fpair; \ - sv3 += ev_pre * delx * dely * fpair; \ - sv4 += ev_pre * delx * delz * fpair; \ - sv5 += ev_pre * dely * delz * fpair; \ + sv0 += delx * delx * fpair; \ + sv1 += dely * dely * fpair; \ + sv2 += delz * delz * fpair; \ + sv3 += delx * dely * fpair; \ + sv4 += delx * delz * fpair; \ + sv5 += dely * delz * fpair; \ } \ } -#define IP_PRE_ev_tally_nbor3(vflag, fj, fk, delx, dely, delz, delr2) \ +#define IP_PRE_ev_tally_nborv(vflag, dx, dy, dz, fpx, fpy, fpz) \ { \ if (vflag == 1) { \ - sv0 += delx * fj[0] + delr2[0] * fk[0]; \ - sv1 += dely * fj[1] + delr2[1] * fk[1]; \ - sv2 += delz * fj[2] + delr2[2] * fk[2]; \ - sv3 += delx * fj[1] + delr2[0] * fk[1]; \ - sv4 += delx * fj[2] + delr2[0] * fk[2]; \ - sv5 += dely * fj[2] + delr2[1] * fk[2]; \ + sv0 += dx * fpx; \ + sv1 += dy * fpy; \ + sv2 += dz * fpz; \ + sv3 += dx * fpy; \ + sv4 += dx * fpz; \ + sv5 += dy * fpz; \ } \ } -#define IP_PRE_ev_tally_nbor3v(vflag, fj0, fj1, fj2, delx, dely, delz) \ +#define IP_PRE_ev_tally_nbor3(vflag, fj, fk, delx, dely, delz, delr2) \ { \ if (vflag == 1) { \ - sv0 += delx * fj0; \ - sv1 += dely * fj1; \ - sv2 += delz * fj2; \ - sv3 += delx * fj1; \ - sv4 += delx * fj2; \ - sv5 += dely * fj2; \ + sv0 += delx * fj[0] + delr2[0] * fk[0]; \ + sv1 += dely * fj[1] + delr2[1] * fk[1]; \ + sv2 += delz * fj[2] + delr2[2] * fk[2]; \ + sv3 += delx * fj[1] + delr2[0] * fk[1]; \ + sv4 += delx * fj[2] + delr2[0] * fk[2]; \ + sv5 += dely * fj[2] + delr2[1] * fk[2]; \ } \ } -#define IP_PRE_ev_tally_bond(eflag, eatom, vflag, ebond, i1, i2, fbond, \ - delx, dely, delz, obond, force, newton, \ - nlocal, ov0, ov1, ov2, ov3, ov4, ov5) \ +#define IP_PRE_ev_tally_nbor3v(vflag, fj0, fj1, fj2, delx, dely, delz) \ { \ - flt_t ev_pre; \ - if (newton) ev_pre = (flt_t)1.0; \ - else { \ - ev_pre = (flt_t)0.0; \ - if (i1 < nlocal) ev_pre += (flt_t)0.5; \ - if (i2 < nlocal) ev_pre += (flt_t)0.5; \ - } \ - \ - if (eflag) { \ - oebond += ev_pre * ebond; \ - if (eatom) { \ - flt_t halfeng = ebond * (flt_t)0.5; \ - if (newton || i1 < nlocal) f[i1].w += halfeng; \ - if (newton || i2 < nlocal) f[i2].w += halfeng; \ - } \ - } \ - \ - if (vflag) { \ - ov0 += ev_pre * (delx * delx * fbond); \ - ov1 += ev_pre * (dely * dely * fbond); \ - ov2 += ev_pre * (delz * delz * fbond); \ - ov3 += ev_pre * (delx * dely * fbond); \ - ov4 += ev_pre * (delx * delz * fbond); \ - ov5 += ev_pre * (dely * delz * fbond); \ + if (vflag == 1) { \ + sv0 += delx * fj0; \ + sv1 += dely * fj1; \ + sv2 += delz * fj2; \ + sv3 += delx * fj1; \ + sv4 += delx * fj2; \ + sv5 += dely * fj2; \ } \ } -#define IP_PRE_ev_tally_angle(eflag, eatom, vflag, eangle, i1, i2, i3, \ - f1x, f1y, f1z, f3x, f3y, f3z, delx1, \ - dely1, delz1, delx2, dely2, delz2, \ - oeangle, force, newton, nlocal, ov0, ov1, \ - ov2, ov3, ov4, ov5) \ +#define IP_PRE_ev_tally_bond(eflag, VFLAG, eatom, vflag, ebond, i1, i2, \ + fbond, delx, dely, delz, obond, force, \ + newton, nlocal, ov0, ov1, ov2, ov3, ov4, \ + ov5) \ { \ - flt_t ev_pre; \ - if (newton) ev_pre = (flt_t)1.0; \ - else { \ - ev_pre = (flt_t)0.0; \ - if (i1 < nlocal) ev_pre += (flt_t)0.3333333333333333; \ - if (i2 < nlocal) ev_pre += (flt_t)0.3333333333333333; \ - if (i3 < nlocal) ev_pre += (flt_t)0.3333333333333333; \ - } \ - \ - if (eflag) { \ - oeangle += ev_pre * eangle; \ - if (eatom) { \ - flt_t thirdeng = eangle * (flt_t)0.3333333333333333; \ - if (newton || i1 < nlocal) f[i1].w += thirdeng; \ - if (newton || i2 < nlocal) f[i2].w += thirdeng; \ - if (newton || i3 < nlocal) f[i3].w += thirdeng; \ - } \ - } \ - \ - if (vflag) { \ - ov0 += ev_pre * (delx1 * f1x + delx2 * f3x); \ - ov1 += ev_pre * (dely1 * f1y + dely2 * f3y); \ - ov2 += ev_pre * (delz1 * f1z + delz2 * f3z); \ - ov3 += ev_pre * (delx1 * f1y + delx2 * f3y); \ - ov4 += ev_pre * (delx1 * f1z + delx2 * f3z); \ - ov5 += ev_pre * (dely1 * f1z + dely2 * f3z); \ + flt_t ev_pre; \ + if (newton) ev_pre = (flt_t)1.0; \ + else { \ + ev_pre = (flt_t)0.0; \ + if (i1 < nlocal) ev_pre += (flt_t)0.5; \ + if (i2 < nlocal) ev_pre += (flt_t)0.5; \ + } \ + \ + if (eflag) { \ + obond += ev_pre * ebond; \ + if (eatom) { \ + flt_t halfeng = ebond * (flt_t)0.5; \ + if (newton || i1 < nlocal) f[i1].w += halfeng; \ + if (newton || i2 < nlocal) f[i2].w += halfeng; \ + } \ + } \ + \ + if (VFLAG && vflag) { \ + ov0 += ev_pre * (delx * delx * fbond); \ + ov1 += ev_pre * (dely * dely * fbond); \ + ov2 += ev_pre * (delz * delz * fbond); \ + ov3 += ev_pre * (delx * dely * fbond); \ + ov4 += ev_pre * (delx * delz * fbond); \ + ov5 += ev_pre * (dely * delz * fbond); \ } \ } -#define IP_PRE_ev_tally_dihed(eflag, eatom, vflag, deng, i1, i2, i3, i4,\ - f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, \ - f4z, vb1x, vb1y, vb1z, vb2x, vb2y, vb2z, \ - vb3x, vb3y, vb3z,oedihedral, force, \ - newton, nlocal, ov0, ov1, ov2, ov3, ov4, \ - ov5) \ +#define IP_PRE_ev_tally_angle(eflag, VFLAG, eatom, vflag, eangle, i1, \ + i2, i3, f1x, f1y, f1z, f3x, f3y, f3z, \ + delx1, dely1, delz1, delx2, dely2, delz2, \ + oeangle, force, newton, nlocal, ov0, ov1, \ + ov2, ov3, ov4, ov5) \ { \ - flt_t ev_pre; \ - if (newton) ev_pre = (flt_t)1.0; \ - else { \ - ev_pre = (flt_t)0.0; \ - if (i1 < nlocal) ev_pre += (flt_t)0.25; \ - if (i2 < nlocal) ev_pre += (flt_t)0.25; \ - if (i3 < nlocal) ev_pre += (flt_t)0.25; \ - if (i4 < nlocal) ev_pre += (flt_t)0.25; \ - } \ - \ - if (eflag) { \ - oedihedral += ev_pre * deng; \ - if (eatom) { \ - flt_t qdeng = deng * (flt_t)0.25; \ - if (newton || i1 < nlocal) f[i1].w += qdeng; \ - if (newton || i2 < nlocal) f[i2].w += qdeng; \ - if (newton || i3 < nlocal) f[i3].w += qdeng; \ - if (newton || i4 < nlocal) f[i4].w += qdeng; \ - } \ - } \ - \ - if (vflag) { \ - ov0 += ev_pre * (vb1x*f1x + vb2x*f3x + (vb3x+vb2x)*f4x); \ - ov1 += ev_pre * (vb1y*f1y + vb2y*f3y + (vb3y+vb2y)*f4y); \ - ov2 += ev_pre * (vb1z*f1z + vb2z*f3z + (vb3z+vb2z)*f4z); \ - ov3 += ev_pre * (vb1x*f1y + vb2x*f3y + (vb3x+vb2x)*f4y); \ - ov4 += ev_pre * (vb1x*f1z + vb2x*f3z + (vb3x+vb2x)*f4z); \ - ov5 += ev_pre * (vb1y*f1z + vb2y*f3z + (vb3y+vb2y)*f4z); \ + flt_t ev_pre; \ + if (newton) ev_pre = (flt_t)1.0; \ + else { \ + ev_pre = (flt_t)0.0; \ + if (i1 < nlocal) ev_pre += (flt_t)0.3333333333333333; \ + if (i2 < nlocal) ev_pre += (flt_t)0.3333333333333333; \ + if (i3 < nlocal) ev_pre += (flt_t)0.3333333333333333; \ + } \ + \ + if (eflag) { \ + oeangle += ev_pre * eangle; \ + if (eatom) { \ + flt_t thirdeng = eangle * (flt_t)0.3333333333333333; \ + if (newton || i1 < nlocal) f[i1].w += thirdeng; \ + if (newton || i2 < nlocal) f[i2].w += thirdeng; \ + if (newton || i3 < nlocal) f[i3].w += thirdeng; \ + } \ + } \ + \ + if (VFLAG && vflag) { \ + ov0 += ev_pre * (delx1 * f1x + delx2 * f3x); \ + ov1 += ev_pre * (dely1 * f1y + dely2 * f3y); \ + ov2 += ev_pre * (delz1 * f1z + delz2 * f3z); \ + ov3 += ev_pre * (delx1 * f1y + delx2 * f3y); \ + ov4 += ev_pre * (delx1 * f1z + delx2 * f3z); \ + ov5 += ev_pre * (dely1 * f1z + dely2 * f3z); \ } \ } -#define IP_PRE_ev_tally_atom(evflag, eflag, vflag, f, fwtmp) \ -{ \ - if (evflag) { \ - if (eflag) { \ - f[i].w += fwtmp; \ - oevdwl += sevdwl; \ - } \ - if (vflag == 1) { \ - ov0 += sv0; \ - ov1 += sv1; \ - ov2 += sv2; \ - ov3 += sv3; \ - ov4 += sv4; \ - ov5 += sv5; \ - } \ - } \ +#define IP_PRE_ev_tally_dihed(eflag, VFLAG, eatom, vflag, deng, i1, i2, \ + i3, i4, f1x, f1y, f1z, f3x, f3y, f3z, f4x,\ + f4y, f4z, vb1x, vb1y, vb1z, vb2x, vb2y, \ + vb2z, vb3x, vb3y, vb3z, oedihedral, force,\ + newton, nlocal, ov0, ov1, ov2, ov3, ov4, \ + ov5) \ +{ \ + flt_t ev_pre; \ + if (newton) ev_pre = (flt_t)1.0; \ + else { \ + ev_pre = (flt_t)0.0; \ + if (i1 < nlocal) ev_pre += (flt_t)0.25; \ + if (i2 < nlocal) ev_pre += (flt_t)0.25; \ + if (i3 < nlocal) ev_pre += (flt_t)0.25; \ + if (i4 < nlocal) ev_pre += (flt_t)0.25; \ + } \ + \ + if (eflag) { \ + oedihedral += ev_pre * deng; \ + if (eatom) { \ + flt_t qdeng = deng * (flt_t)0.25; \ + if (newton || i1 < nlocal) f[i1].w += qdeng; \ + if (newton || i2 < nlocal) f[i2].w += qdeng; \ + if (newton || i3 < nlocal) f[i3].w += qdeng; \ + if (newton || i4 < nlocal) f[i4].w += qdeng; \ + } \ + } \ + \ + if (VFLAG && vflag) { \ + ov0 += ev_pre * (vb1x*f1x + vb2x*f3x + (vb3x+vb2x)*f4x); \ + ov1 += ev_pre * (vb1y*f1y + vb2y*f3y + (vb3y+vb2y)*f4y); \ + ov2 += ev_pre * (vb1z*f1z + vb2z*f3z + (vb3z+vb2z)*f4z); \ + ov3 += ev_pre * (vb1x*f1y + vb2x*f3y + (vb3x+vb2x)*f4y); \ + ov4 += ev_pre * (vb1x*f1z + vb2x*f3z + (vb3x+vb2x)*f4z); \ + ov5 += ev_pre * (vb1y*f1z + vb2y*f3z + (vb3y+vb2y)*f4z); \ + } \ } -#define IP_PRE_ev_tally_atomq(evflag, eflag, vflag, f, fwtmp) \ -{ \ - if (evflag) { \ - if (eflag) { \ - f[i].w += fwtmp; \ - oevdwl += sevdwl; \ - oecoul += secoul; \ - } \ - if (vflag == 1) { \ - ov0 += sv0; \ - ov1 += sv1; \ - ov2 += sv2; \ - ov3 += sv3; \ - ov4 += sv4; \ - ov5 += sv5; \ - } \ - } \ +#define IP_PRE_ev_tally_atom(newton, eflag, vflag, f, fwtmp) \ +{ \ + if (eflag) { \ + f[i].w += fwtmp; \ + oevdwl += sevdwl; \ + } \ + if (newton == 0 && vflag == 1) { \ + ov0 += sv0; \ + ov1 += sv1; \ + ov2 += sv2; \ + ov3 += sv3; \ + ov4 += sv4; \ + ov5 += sv5; \ + } \ } -#define IP_PRE_fdotr_acc_force(newton, evflag, eflag, vflag, eatom, \ - nall, nlocal, minlocal, nthreads, \ - f_start, f_stride, x, offload) \ -{ \ - int o_range; \ - if (newton) \ - o_range = nall; \ - else \ - o_range = nlocal; \ - if (offload == 0) o_range -= minlocal; \ - IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, \ - sizeof(acc_t)); \ - \ - int t_off = f_stride; \ - if (eflag && eatom) { \ - for (int t = 1; t < nthreads; t++) { \ - _use_simd_pragma("vector nontemporal") \ - _use_simd_pragma("novector") \ - for (int n = iifrom; n < iito; n++) { \ - f_start[n].x += f_start[n + t_off].x; \ - f_start[n].y += f_start[n + t_off].y; \ - f_start[n].z += f_start[n + t_off].z; \ - f_start[n].w += f_start[n + t_off].w; \ - } \ - t_off += f_stride; \ - } \ - } else { \ - for (int t = 1; t < nthreads; t++) { \ - _use_simd_pragma("vector nontemporal") \ - _use_simd_pragma("novector") \ - for (int n = iifrom; n < iito; n++) { \ - f_start[n].x += f_start[n + t_off].x; \ - f_start[n].y += f_start[n + t_off].y; \ - f_start[n].z += f_start[n + t_off].z; \ - } \ - t_off += f_stride; \ - } \ - } \ - \ - if (evflag) { \ - if (vflag == 2) { \ - const ATOM_T * _noalias const xo = x + minlocal; \ - _use_simd_pragma("vector nontemporal") \ - _use_simd_pragma("novector") \ - for (int n = iifrom; n < iito; n++) { \ - ov0 += f_start[n].x * xo[n].x; \ - ov1 += f_start[n].y * xo[n].y; \ - ov2 += f_start[n].z * xo[n].z; \ - ov3 += f_start[n].y * xo[n].x; \ - ov4 += f_start[n].z * xo[n].x; \ - ov5 += f_start[n].z * xo[n].y; \ - } \ - } \ - } \ +#define IP_PRE_ev_tally_atomq(newton, eflag, vflag, f, fwtmp) \ +{ \ + if (eflag) { \ + f[i].w += fwtmp; \ + oevdwl += sevdwl; \ + oecoul += secoul; \ + } \ + if (newton == 0 && vflag == 1) { \ + ov0 += sv0; \ + ov1 += sv1; \ + ov2 += sv2; \ + ov3 += sv3; \ + ov4 += sv4; \ + ov5 += sv5; \ + } \ } } diff --git a/src/USER-INTEL/intel_simd.h b/src/USER-INTEL/intel_simd.h index ac13f1edfdebaa56a23cdf1c56039b726586992d..4616f628e73a969b3aee3ba2e036d91ffa5f97f6 100644 --- a/src/USER-INTEL/intel_simd.h +++ b/src/USER-INTEL/intel_simd.h @@ -42,25 +42,25 @@ namespace ip_simd { struct SIMD_int { __m512i v; SIMD_int() {} - SIMD_int(const __m512i in) : v(in) {} + SIMD_int(const __m512i in) : v(in) {} operator __m512i() const { return v;} }; struct SIMD_float { __m512 v; SIMD_float() {} - SIMD_float(const __m512 in) : v(in) {} + SIMD_float(const __m512 in) : v(in) {} operator __m512() const { return v;} }; struct SIMD_double { __m512d v; SIMD_double() {} - SIMD_double(const __m512d in) : v(in) {} + SIMD_double(const __m512d in) : v(in) {} operator __m512d() const { return v;} }; - template<class flt_t> + template<class flt_t> class SIMD_type { }; @@ -92,20 +92,20 @@ namespace ip_simd { // ------- Set Operations - inline SIMD_int SIMD_set(const int l0, const int l1, const int l2, - const int l3, const int l4, const int l5, - const int l6, const int l7, const int l8, - const int l9, const int l10, const int l11, - const int l12, const int l13, const int l14, - const int l15) { + inline SIMD_int SIMD_set(const int l0, const int l1, const int l2, + const int l3, const int l4, const int l5, + const int l6, const int l7, const int l8, + const int l9, const int l10, const int l11, + const int l12, const int l13, const int l14, + const int l15) { return _mm512_setr_epi32(l0,l1,l2,l3,l4,l5,l6,l7, - l8,l9,l10,l11,l12,l13,l14,l15); + l8,l9,l10,l11,l12,l13,l14,l15); } inline SIMD_int SIMD_set(const int l) { return _mm512_set1_epi32(l); } - + inline SIMD_float SIMD_set(const float l) { return _mm512_set1_ps(l); } @@ -113,28 +113,28 @@ namespace ip_simd { inline SIMD_double SIMD_set(const double l) { return _mm512_set1_pd(l); } - + inline SIMD_int SIMD_zero_masked(const SIMD_mask &m, const SIMD_int &one) { return _mm512_maskz_mov_epi32(m, one); } - inline SIMD_float SIMD_zero_masked(const SIMD_mask &m, - const SIMD_float &one) { + inline SIMD_float SIMD_zero_masked(const SIMD_mask &m, + const SIMD_float &one) { return _mm512_maskz_mov_ps(m, one); } - inline SIMD_double SIMD_zero_masked(const SIMD_mask &m, - const SIMD_double &one) { + inline SIMD_double SIMD_zero_masked(const SIMD_mask &m, + const SIMD_double &one) { return _mm512_maskz_mov_pd(m, one); } - inline SIMD_float SIMD_set(const SIMD_float &src, const SIMD_mask &m, - const SIMD_float &one) { + inline SIMD_float SIMD_set(const SIMD_float &src, const SIMD_mask &m, + const SIMD_float &one) { return _mm512_mask_mov_ps(src,m,one); } - inline SIMD_double SIMD_set(const SIMD_double &src, const SIMD_mask &m, - const SIMD_double &one) { + inline SIMD_double SIMD_set(const SIMD_double &src, const SIMD_mask &m, + const SIMD_double &one) { return _mm512_mask_mov_pd(src,m,one); } @@ -147,11 +147,11 @@ namespace ip_simd { inline SIMD_float SIMD_load(const float *p) { return _mm512_load_ps(p); } - + inline SIMD_double SIMD_load(const double *p) { return _mm512_load_pd(p); } - + inline SIMD_int SIMD_loadz(const SIMD_mask &m, const int *p) { return _mm512_maskz_load_epi32(m, p); } @@ -159,7 +159,7 @@ namespace ip_simd { inline SIMD_float SIMD_loadz(const SIMD_mask &m, const float *p) { return _mm512_maskz_load_ps(m, p); } - + inline SIMD_double SIMD_loadz(const SIMD_mask &m, const double *p) { return _mm512_maskz_load_pd(m, p); } @@ -168,7 +168,7 @@ namespace ip_simd { return _mm512_i32gather_epi32(i, p, _MM_SCALE_4); } - inline SIMD_float SIMD_gather(const float *p, const SIMD_int &i) { + inline SIMD_float SIMD_gather(const float *p, const SIMD_int &i) { return _mm512_i32gather_ps(i, p, _MM_SCALE_4); } @@ -177,56 +177,56 @@ namespace ip_simd { } inline SIMD_int SIMD_gather(const SIMD_mask &m, const int *p, - const SIMD_int &i) { + const SIMD_int &i) { return _mm512_mask_i32gather_epi32(_mm512_undefined_epi32(), m, i, p, - _MM_SCALE_4); + _MM_SCALE_4); } inline SIMD_float SIMD_gather(const SIMD_mask &m, const float *p, - const SIMD_int &i) { + const SIMD_int &i) { return _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, p, - _MM_SCALE_4); + _MM_SCALE_4); } inline SIMD_double SIMD_gather(const SIMD_mask &m, const double *p, - const SIMD_int &i) { + const SIMD_int &i) { return _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, p, - _MM_SCALE_8); + _MM_SCALE_8); } template <typename T> inline SIMD_int SIMD_gatherz_offset(const SIMD_mask &m, const int *p, - const SIMD_int &i) { + const SIMD_int &i) { } template <> inline SIMD_int SIMD_gatherz_offset<float>(const SIMD_mask &m, const int *p, - const SIMD_int &i) { + const SIMD_int &i) { return _mm512_mask_i32gather_epi32( _mm512_set1_epi32(0), m, i, p, - _MM_SCALE_4); + _MM_SCALE_4); } template <> inline SIMD_int SIMD_gatherz_offset<double>(const SIMD_mask &m, const int *p, - const SIMD_int &i) { + const SIMD_int &i) { return _mm512_mask_i32gather_epi32( _mm512_set1_epi32(0), m, i, p, - _MM_SCALE_8); + _MM_SCALE_8); } inline SIMD_float SIMD_gatherz(const SIMD_mask &m, const float *p, - const SIMD_int &i) { + const SIMD_int &i) { return _mm512_mask_i32gather_ps( _mm512_set1_ps((float)0), m, i, p, - _MM_SCALE_4); + _MM_SCALE_4); } inline SIMD_double SIMD_gatherz(const SIMD_mask &m, const double *p, - const SIMD_int &i) { + const SIMD_int &i) { return _mm512_mask_i32logather_pd( _mm512_set1_pd(0.0), m, i, p, - _MM_SCALE_8); + _MM_SCALE_8); } // ------- Store Operations - + inline void SIMD_store(int *p, const SIMD_int &one) { return _mm512_store_epi32(p,one); } @@ -240,17 +240,17 @@ namespace ip_simd { } inline void SIMD_scatter(const SIMD_mask &m, int *p, - const SIMD_int &i, const SIMD_int &vec) { + const SIMD_int &i, const SIMD_int &vec) { _mm512_mask_i32scatter_epi32(p, m, i, vec, _MM_SCALE_4); } inline void SIMD_scatter(const SIMD_mask &m, float *p, - const SIMD_int &i, const SIMD_float &vec) { + const SIMD_int &i, const SIMD_float &vec) { _mm512_mask_i32scatter_ps(p, m, i, vec, _MM_SCALE_4); } inline void SIMD_scatter(const SIMD_mask &m, double *p, - const SIMD_int &i, const SIMD_double &vec) { + const SIMD_int &i, const SIMD_double &vec) { _mm512_mask_i32loscatter_pd(p, m, i, vec, _MM_SCALE_8); } @@ -263,76 +263,76 @@ namespace ip_simd { inline SIMD_float operator+(const SIMD_float &one, const SIMD_float &two) { return _mm512_add_ps(one,two); } - + inline SIMD_double operator+(const SIMD_double &one, const SIMD_double &two) { return _mm512_add_pd(one,two); } - + inline SIMD_int operator+(const SIMD_int &one, const int two) { return _mm512_add_epi32(one,SIMD_set(two)); } - + inline SIMD_float operator+(const SIMD_float &one, const float two) { return _mm512_add_ps(one,SIMD_set(two)); } - + inline SIMD_double operator+(const SIMD_double &one, const double two) { return _mm512_add_pd(one,SIMD_set(two)); } inline SIMD_int SIMD_add(const SIMD_mask &m, - const SIMD_int &one, const int two) { + const SIMD_int &one, const int two) { return _mm512_mask_add_epi32(one,m,one,SIMD_set(two)); } inline SIMD_float SIMD_add(const SIMD_mask &m, - const SIMD_float &one, const float two) { + const SIMD_float &one, const float two) { return _mm512_mask_add_ps(one,m,one,SIMD_set(two)); } inline SIMD_double SIMD_add(const SIMD_mask &m, - const SIMD_double &one, const double two) { + const SIMD_double &one, const double two) { return _mm512_mask_add_pd(one,m,one,SIMD_set(two)); } inline SIMD_int SIMD_add(const SIMD_int &s, const SIMD_mask &m, - const SIMD_int &one, const SIMD_int &two) { + const SIMD_int &one, const SIMD_int &two) { return _mm512_mask_add_epi32(s,m,one,two); } inline SIMD_float SIMD_add(const SIMD_float &s, const SIMD_mask &m, - const SIMD_float &one, const SIMD_float &two) { + const SIMD_float &one, const SIMD_float &two) { return _mm512_mask_add_ps(s,m,one,two); } inline SIMD_double SIMD_add(const SIMD_double &s, const SIMD_mask &m, - const SIMD_double &one, const SIMD_double &two) { + const SIMD_double &one, const SIMD_double &two) { return _mm512_mask_add_pd(s,m,one,two); } inline SIMD_int SIMD_sub(const SIMD_int &s, const SIMD_mask &m, - const SIMD_int &one, const SIMD_int &two) { + const SIMD_int &one, const SIMD_int &two) { return _mm512_mask_sub_epi32(s,m,one,two); } inline SIMD_float SIMD_sub(const SIMD_float &s, const SIMD_mask &m, - const SIMD_float &one, const SIMD_float &two) { + const SIMD_float &one, const SIMD_float &two) { return _mm512_mask_sub_ps(s,m,one,two); } inline SIMD_double SIMD_sub(const SIMD_double &s, const SIMD_mask &m, - const SIMD_double &one, const SIMD_double &two) { + const SIMD_double &one, const SIMD_double &two) { return _mm512_mask_sub_pd(s,m,one,two); } inline SIMD_int operator-(const SIMD_int &one) { return _mm512_sub_epi32(SIMD_set((int)0),one); } - + inline SIMD_float operator-(const SIMD_float &one) { return _mm512_sub_ps(SIMD_set((float)0),one); } - + inline SIMD_double operator-(const SIMD_double &one) { return _mm512_sub_pd(SIMD_set((double)0),one); } @@ -340,80 +340,80 @@ namespace ip_simd { inline SIMD_int operator-(const SIMD_int &one, const SIMD_int &two) { return _mm512_sub_epi32(one,two); } - + inline SIMD_float operator-(const SIMD_float &one, const SIMD_float &two) { return _mm512_sub_ps(one,two); } - + inline SIMD_double operator-(const SIMD_double &one, const SIMD_double &two) { return _mm512_sub_pd(one,two); } - + inline SIMD_int operator-(const SIMD_int &one, const int two) { return _mm512_sub_epi32(one,SIMD_set(two)); } - + inline SIMD_float operator-(const SIMD_float &one, const float two) { return _mm512_sub_ps(one,SIMD_set(two)); } - + inline SIMD_double operator-(const SIMD_double &one, const double two) { return _mm512_sub_pd(one,SIMD_set(two)); } - + inline SIMD_int operator*(const SIMD_int &one, const SIMD_int &two) { return _mm512_mullo_epi32(one,two); } - + inline SIMD_float operator*(const SIMD_float &one, const SIMD_float &two) { return _mm512_mul_ps(one,two); } - + inline SIMD_double operator*(const SIMD_double &one, const SIMD_double &two) { return _mm512_mul_pd(one,two); } - + inline SIMD_int operator*(const SIMD_int &one, const int two) { return _mm512_mullo_epi32(one,SIMD_set(two)); } - + inline SIMD_float operator*(const SIMD_float &one, const float two) { return _mm512_mul_ps(one,SIMD_set(two)); } - + inline SIMD_double operator*(const SIMD_double &one, const double two) { return _mm512_mul_pd(one,SIMD_set(two)); } - + inline SIMD_float operator/(const SIMD_float &one, const SIMD_float &two) { return _mm512_div_ps(one,two); } - + inline SIMD_double operator/(const SIMD_double &one, const SIMD_double &two) { return _mm512_div_pd(one,two); } - + inline SIMD_float SIMD_fma(const SIMD_float &one, const SIMD_float &two, - const SIMD_float &three) { + const SIMD_float &three) { return _mm512_fmadd_ps(one,two,three); } inline SIMD_double SIMD_fma(const SIMD_double &one, const SIMD_double &two, - const SIMD_double &three) { + const SIMD_double &three) { return _mm512_fmadd_pd(one,two,three); } inline SIMD_float SIMD_fms(const SIMD_float &one, const SIMD_float &two, - const SIMD_float &three) { + const SIMD_float &three) { return _mm512_fmsub_ps(one,two,three); } inline SIMD_double SIMD_fms(const SIMD_double &one, const SIMD_double &two, - const SIMD_double &three) { + const SIMD_double &three) { return _mm512_fmsub_pd(one,two,three); } - - // ------- SVML operations + + // ------- SVML operations inline SIMD_float SIMD_rcp(const SIMD_float &one) { #ifdef __AVX512ER__ @@ -489,33 +489,33 @@ namespace ip_simd { // ------- Comparison operations - inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_int &one, - const SIMD_int &two) { + inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_int &one, + const SIMD_int &two) { return _mm512_mask_cmplt_epi32_mask(m, one, two); } - inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_float &one, - const SIMD_float &two) { + inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_float &one, + const SIMD_float &two) { return _mm512_mask_cmplt_ps_mask(m, one, two); } - inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_double &one, - const SIMD_double &two) { + inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_double &one, + const SIMD_double &two) { return _mm512_mask_cmplt_pd_mask(m, one, two); } - inline SIMD_mask SIMD_lt(SIMD_mask m, const int one, - const SIMD_int &two) { + inline SIMD_mask SIMD_lt(SIMD_mask m, const int one, + const SIMD_int &two) { return _mm512_mask_cmplt_epi32_mask(m, SIMD_set(one), two); } - inline SIMD_mask SIMD_lt(SIMD_mask m, const float one, - const SIMD_float &two) { + inline SIMD_mask SIMD_lt(SIMD_mask m, const float one, + const SIMD_float &two) { return _mm512_mask_cmplt_ps_mask(m, SIMD_set(one), two); } - inline SIMD_mask SIMD_lt(SIMD_mask m, const double one, - const SIMD_double &two) { + inline SIMD_mask SIMD_lt(SIMD_mask m, const double one, + const SIMD_double &two) { return _mm512_mask_cmplt_pd_mask(m, SIMD_set(one), two); } @@ -629,112 +629,112 @@ namespace ip_simd { // i indices should be positive inline void SIMD_conflict_pi_reduce1(const SIMD_mask &m, const SIMD_int &i, - SIMD_float &v1) { + SIMD_float &v1) { SIMD_int jc = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), m, i); SIMD_int cd = _mm512_maskz_conflict_epi32(m, jc); SIMD_mask todo_mask = _mm512_test_epi32_mask(cd, _mm512_set1_epi32(-1)); if (todo_mask) { SIMD_int lz = _mm512_lzcnt_epi32(cd); SIMD_int lid = _mm512_sub_epi32(_mm512_set1_epi32(31), - _mm512_lzcnt_epi32(cd)); - + _mm512_lzcnt_epi32(cd)); + while(todo_mask) { - SIMD_int todo_bcast = _mm512_broadcastmw_epi32(todo_mask); - SIMD_mask now_mask = _mm512_mask_testn_epi32_mask(todo_mask, cd, - todo_bcast); - SIMD_float am_perm; - am_perm = _mm512_mask_permutexvar_ps(_mm512_undefined_ps(), - now_mask, lid, v1); - v1 = _mm512_mask_add_ps(v1, now_mask, v1, am_perm); - todo_mask = _mm512_kxor(todo_mask, now_mask); + SIMD_int todo_bcast = _mm512_broadcastmw_epi32(todo_mask); + SIMD_mask now_mask = _mm512_mask_testn_epi32_mask(todo_mask, cd, + todo_bcast); + SIMD_float am_perm; + am_perm = _mm512_mask_permutexvar_ps(_mm512_undefined_ps(), + now_mask, lid, v1); + v1 = _mm512_mask_add_ps(v1, now_mask, v1, am_perm); + todo_mask = _mm512_kxor(todo_mask, now_mask); } } } // i indices should be positive inline void SIMD_conflict_pi_reduce1(const SIMD_mask &m, const SIMD_int &i, - SIMD_double &v1) { + SIMD_double &v1) { SIMD_int jc = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), m, i); SIMD_int cd = _mm512_maskz_conflict_epi32(m, jc); SIMD_mask todo_mask = _mm512_test_epi32_mask(cd, _mm512_set1_epi32(-1)); if (todo_mask) { SIMD_int lz = _mm512_lzcnt_epi32(cd); SIMD_int lid = _mm512_sub_epi32(_mm512_set1_epi32(31), - _mm512_lzcnt_epi32(cd)); + _mm512_lzcnt_epi32(cd)); lid = _mm512_cvtepi32_epi64(_mm512_castsi512_si256(lid)); - + while(todo_mask) { - SIMD_int todo_bcast = _mm512_broadcastmw_epi32(todo_mask); - SIMD_mask now_mask = _mm512_mask_testn_epi32_mask(todo_mask, cd, - todo_bcast); - SIMD_double am_perm; - am_perm = _mm512_mask_permutexvar_pd(_mm512_undefined_pd(), - now_mask, lid, v1); - v1 = _mm512_mask_add_pd(v1, now_mask, v1, am_perm); - todo_mask = _mm512_kxor(todo_mask, now_mask); + SIMD_int todo_bcast = _mm512_broadcastmw_epi32(todo_mask); + SIMD_mask now_mask = _mm512_mask_testn_epi32_mask(todo_mask, cd, + todo_bcast); + SIMD_double am_perm; + am_perm = _mm512_mask_permutexvar_pd(_mm512_undefined_pd(), + now_mask, lid, v1); + v1 = _mm512_mask_add_pd(v1, now_mask, v1, am_perm); + todo_mask = _mm512_kxor(todo_mask, now_mask); } } } // i indices should be positive inline void SIMD_conflict_pi_reduce3(const SIMD_mask &m, const SIMD_int &i, - SIMD_float &v1, SIMD_float &v2, - SIMD_float &v3) { + SIMD_float &v1, SIMD_float &v2, + SIMD_float &v3) { SIMD_int jc = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), m, i); SIMD_int cd = _mm512_maskz_conflict_epi32(m, jc); SIMD_mask todo_mask = _mm512_test_epi32_mask(cd, _mm512_set1_epi32(-1)); if (todo_mask) { SIMD_int lz = _mm512_lzcnt_epi32(cd); SIMD_int lid = _mm512_sub_epi32(_mm512_set1_epi32(31), - _mm512_lzcnt_epi32(cd)); - + _mm512_lzcnt_epi32(cd)); + while(todo_mask) { - SIMD_int todo_bcast = _mm512_broadcastmw_epi32(todo_mask); - SIMD_mask now_mask = _mm512_mask_testn_epi32_mask(todo_mask, cd, - todo_bcast); - SIMD_float am_perm; - am_perm = _mm512_mask_permutexvar_ps(_mm512_undefined_ps(), - now_mask, lid, v1); - v1 = _mm512_mask_add_ps(v1, now_mask, v1, am_perm); - am_perm = _mm512_mask_permutexvar_ps(_mm512_undefined_ps(), - now_mask, lid, v2); - v2 = _mm512_mask_add_ps(v2, now_mask, v2, am_perm); - am_perm = _mm512_mask_permutexvar_ps(_mm512_undefined_ps(), - now_mask, lid, v3); - v3 = _mm512_mask_add_ps(v3, now_mask, v3, am_perm); - todo_mask = _mm512_kxor(todo_mask, now_mask); + SIMD_int todo_bcast = _mm512_broadcastmw_epi32(todo_mask); + SIMD_mask now_mask = _mm512_mask_testn_epi32_mask(todo_mask, cd, + todo_bcast); + SIMD_float am_perm; + am_perm = _mm512_mask_permutexvar_ps(_mm512_undefined_ps(), + now_mask, lid, v1); + v1 = _mm512_mask_add_ps(v1, now_mask, v1, am_perm); + am_perm = _mm512_mask_permutexvar_ps(_mm512_undefined_ps(), + now_mask, lid, v2); + v2 = _mm512_mask_add_ps(v2, now_mask, v2, am_perm); + am_perm = _mm512_mask_permutexvar_ps(_mm512_undefined_ps(), + now_mask, lid, v3); + v3 = _mm512_mask_add_ps(v3, now_mask, v3, am_perm); + todo_mask = _mm512_kxor(todo_mask, now_mask); } } } // i indices should be positive inline void SIMD_conflict_pi_reduce3(const SIMD_mask &m, const SIMD_int &i, - SIMD_double &v1, SIMD_double &v2, - SIMD_double &v3) { + SIMD_double &v1, SIMD_double &v2, + SIMD_double &v3) { SIMD_int jc = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), m, i); SIMD_int cd = _mm512_maskz_conflict_epi32(m, jc); SIMD_mask todo_mask = _mm512_test_epi32_mask(cd, _mm512_set1_epi32(-1)); if (todo_mask) { SIMD_int lz = _mm512_lzcnt_epi32(cd); SIMD_int lid = _mm512_sub_epi32(_mm512_set1_epi32(31), - _mm512_lzcnt_epi32(cd)); + _mm512_lzcnt_epi32(cd)); lid = _mm512_cvtepi32_epi64(_mm512_castsi512_si256(lid)); - + while(todo_mask) { - SIMD_int todo_bcast = _mm512_broadcastmw_epi32(todo_mask); - SIMD_mask now_mask = _mm512_mask_testn_epi32_mask(todo_mask, cd, - todo_bcast); - SIMD_double am_perm; - am_perm = _mm512_mask_permutexvar_pd(_mm512_undefined_pd(), - now_mask, lid, v1); - v1 = _mm512_mask_add_pd(v1, now_mask, v1, am_perm); - am_perm = _mm512_mask_permutexvar_pd(_mm512_undefined_pd(), - now_mask, lid, v2); - v2 = _mm512_mask_add_pd(v2, now_mask, v2, am_perm); - am_perm = _mm512_mask_permutexvar_pd(_mm512_undefined_pd(), - now_mask, lid, v3); - v3 = _mm512_mask_add_pd(v3, now_mask, v3, am_perm); - todo_mask = _mm512_kxor(todo_mask, now_mask); + SIMD_int todo_bcast = _mm512_broadcastmw_epi32(todo_mask); + SIMD_mask now_mask = _mm512_mask_testn_epi32_mask(todo_mask, cd, + todo_bcast); + SIMD_double am_perm; + am_perm = _mm512_mask_permutexvar_pd(_mm512_undefined_pd(), + now_mask, lid, v1); + v1 = _mm512_mask_add_pd(v1, now_mask, v1, am_perm); + am_perm = _mm512_mask_permutexvar_pd(_mm512_undefined_pd(), + now_mask, lid, v2); + v2 = _mm512_mask_add_pd(v2, now_mask, v2, am_perm); + am_perm = _mm512_mask_permutexvar_pd(_mm512_undefined_pd(), + now_mask, lid, v3); + v3 = _mm512_mask_add_pd(v3, now_mask, v3, am_perm); + todo_mask = _mm512_kxor(todo_mask, now_mask); } } } @@ -744,7 +744,7 @@ namespace ip_simd { inline SIMD_int operator&(const SIMD_int &one, const SIMD_int &two) { return _mm512_and_epi32(one,two); } - + inline SIMD_int operator>>(const SIMD_int &one, const SIMD_int &two) { return _mm512_srlv_epi32(one,two); } @@ -752,21 +752,21 @@ namespace ip_simd { inline SIMD_int operator<<(const SIMD_int &one, const unsigned two) { return _mm512_slli_epi32(one,two); } - + // -------- I/O operations inline void SIMD_print(const __m512i &vec) { - for (int i = 0; i < 16; i++) + for (int i = 0; i < 16; i++) printf("%d ",(*((int*)&(vec) + (i)))); } inline void SIMD_print(const __m512 &vec) { - for (int i = 0; i < 16; i++) + for (int i = 0; i < 16; i++) printf("%f ",(*((float*)&(vec) + (i)))); } inline void SIMD_print(const __m512d &vec) { - for (int i = 0; i < 8; i++) + for (int i = 0; i < 8; i++) printf("%f ",(*((double*)&(vec) + (i)))); } @@ -801,280 +801,280 @@ namespace ip_simd { // ---------- LAMMPS operations #ifndef SW_GATHER_TEST inline void SIMD_atom_gather(const SIMD_mask &m, const float *atom, - const SIMD_int &i, SIMD_float &x, SIMD_float &y, - SIMD_float &z) { - x = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, atom, - _MM_SCALE_1); + const SIMD_int &i, SIMD_float &x, SIMD_float &y, + SIMD_float &z) { + x = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, atom, + _MM_SCALE_1); y = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, atom+1, - _MM_SCALE_1); + _MM_SCALE_1); z = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, atom+2, - _MM_SCALE_1); + _MM_SCALE_1); } inline void SIMD_atom_gather(const SIMD_mask &m, const float *atom, - const SIMD_int &i, SIMD_float &x, SIMD_float &y, - SIMD_float &z, SIMD_int &type) { - x = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, atom, - _MM_SCALE_1); + const SIMD_int &i, SIMD_float &x, SIMD_float &y, + SIMD_float &z, SIMD_int &type) { + x = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, atom, + _MM_SCALE_1); y = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, atom+1, - _MM_SCALE_1); + _MM_SCALE_1); z = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, atom+2, - _MM_SCALE_1); + _MM_SCALE_1); type = _mm512_mask_i32gather_epi32(_mm512_undefined_epi32(), m, i, atom+3, - _MM_SCALE_1); + _MM_SCALE_1); } #endif inline void SIMD_atom_gather(const SIMD_mask &m, const double *atom, - const SIMD_int &i, SIMD_double &x, - SIMD_double &y, SIMD_double &z) { - x = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom, - _MM_SCALE_2); + const SIMD_int &i, SIMD_double &x, + SIMD_double &y, SIMD_double &z) { + x = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom, + _MM_SCALE_2); y = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+1, - _MM_SCALE_2); + _MM_SCALE_2); z = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+2, - _MM_SCALE_2); + _MM_SCALE_2); } inline void SIMD_atom_gather(const SIMD_mask &m, const double *atom, - const SIMD_int &i, SIMD_double &x, - SIMD_double &y, SIMD_double &z, SIMD_int &type) { - x = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom, - _MM_SCALE_2); + const SIMD_int &i, SIMD_double &x, + SIMD_double &y, SIMD_double &z, SIMD_int &type) { + x = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom, + _MM_SCALE_2); y = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+1, - _MM_SCALE_2); + _MM_SCALE_2); z = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, atom+2, - _MM_SCALE_2); + _MM_SCALE_2); type = _mm512_mask_i32gather_epi32(_mm512_undefined_epi32(), m, i, atom+3, - _MM_SCALE_2); + _MM_SCALE_2); } - inline SIMD_float SIMD_ev_add(const SIMD_float &one, - const SIMD_float &two) { + inline SIMD_float SIMD_ev_add(const SIMD_float &one, + const SIMD_float &two) { return _mm512_add_ps(one,two); } - inline SIMD_double SIMD_ev_add(const SIMD_double &one, - const SIMD_double &two) { + inline SIMD_double SIMD_ev_add(const SIMD_double &one, + const SIMD_double &two) { return _mm512_add_pd(one,two); } - inline SIMD_double SIMD_ev_add(const SIMD_double &one, - const SIMD_float &two) { + inline SIMD_double SIMD_ev_add(const SIMD_double &one, + const SIMD_float &two) { SIMD_double twod = _mm512_cvtps_pd(_mm512_castps512_ps256(two)); SIMD_double ans = _mm512_add_pd(one,twod); twod = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(two,two,238))); + _mm512_shuffle_f32x4(two,two,238))); return _mm512_add_pd(ans,twod); } - inline void SIMD_jeng_update(const SIMD_mask &rmask, float *force, - const SIMD_int &joffset, SIMD_float &eng) { + inline void SIMD_jeng_update(const SIMD_mask &rmask, float *force, + const SIMD_int &joffset, SIMD_float &eng) { SIMD_float jeng; SIMD_conflict_pi_reduce1(rmask, joffset, eng); - jeng = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), rmask, joffset, - force, _MM_SCALE_1); + jeng = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), rmask, joffset, + force, _MM_SCALE_1); jeng = jeng + eng; _mm512_mask_i32scatter_ps(force, rmask, joffset, jeng, _MM_SCALE_1); } - inline void SIMD_jeng_update(const SIMD_mask &rmask, double *force, - const SIMD_int &joffset, SIMD_double &eng) { + inline void SIMD_jeng_update(const SIMD_mask &rmask, double *force, + const SIMD_int &joffset, SIMD_double &eng) { SIMD_double jeng; SIMD_conflict_pi_reduce1(rmask, joffset, eng); - jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, - force, _MM_SCALE_2); + jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, + force, _MM_SCALE_2); jeng = jeng + eng; _mm512_mask_i32loscatter_pd(force, rmask, joffset, jeng, _MM_SCALE_2); } - inline void SIMD_jeng_update(const SIMD_mask &rmask, double *force, - const SIMD_int &joffset, SIMD_float &eng) { + inline void SIMD_jeng_update(const SIMD_mask &rmask, double *force, + const SIMD_int &joffset, SIMD_float &eng) { SIMD_double engd, jeng; engd = _mm512_cvtps_pd(_mm512_castps512_ps256(eng)); SIMD_conflict_pi_reduce1(rmask, joffset, engd); - jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, - force, _MM_SCALE_2); + jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, + force, _MM_SCALE_2); jeng = jeng + engd; _mm512_mask_i32loscatter_pd(force, rmask, joffset, jeng, _MM_SCALE_2); SIMD_mask rmask2 = rmask >> 8; engd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(eng,eng,238))); + _mm512_shuffle_f32x4(eng,eng,238))); SIMD_int joffset2 = _mm512_shuffle_i32x4(joffset, joffset, 238); SIMD_conflict_pi_reduce1(rmask2, joffset2, engd); - jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2, - force, _MM_SCALE_2); + jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2, + force, _MM_SCALE_2); jeng = jeng + engd; _mm512_mask_i32loscatter_pd(force, rmask2, joffset2, jeng, _MM_SCALE_2); } - inline void SIMD_jeng_update_hi(const SIMD_mask &mask, float *force, - const SIMD_int &joffset1, SIMD_float &eng) { + inline void SIMD_jeng_update_hi(const SIMD_mask &mask, float *force, + const SIMD_int &joffset1, SIMD_float &eng) { } - inline void SIMD_jeng_update_hi(const SIMD_mask &mask, double *force, - const SIMD_int &joffset1, SIMD_double &eng) { + inline void SIMD_jeng_update_hi(const SIMD_mask &mask, double *force, + const SIMD_int &joffset1, SIMD_double &eng) { SIMD_mask rmask = mask >> 8; SIMD_int joffset = _mm512_shuffle_i32x4(joffset1, joffset1, 238); SIMD_double jeng; SIMD_conflict_pi_reduce1(rmask, joffset, eng); - jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, - force, _MM_SCALE_2); + jeng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, + force, _MM_SCALE_2); jeng = jeng + eng; _mm512_mask_i32loscatter_pd(force, rmask, joffset, jeng, _MM_SCALE_2); } inline void SIMD_safe_jforce(const SIMD_mask &m, float *force, - const SIMD_int &i, SIMD_float &fx, - SIMD_float &fy, SIMD_float &fz) { + const SIMD_int &i, SIMD_float &fx, + SIMD_float &fy, SIMD_float &fz) { SIMD_conflict_pi_reduce3(m, i, fx, fy, fz); SIMD_float jfrc; - jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force, - _MM_SCALE_1); + jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force, + _MM_SCALE_1); jfrc = jfrc + fx; _mm512_mask_i32scatter_ps(force, m, i, jfrc, _MM_SCALE_1); - jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 1, - _MM_SCALE_1); + jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 1, + _MM_SCALE_1); jfrc = jfrc + fy; _mm512_mask_i32scatter_ps(force+1, m, i, jfrc, _MM_SCALE_1); jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 2, - _MM_SCALE_1); + _MM_SCALE_1); jfrc = jfrc + fz; _mm512_mask_i32scatter_ps(force+2, m, i, jfrc, _MM_SCALE_1); } inline void SIMD_safe_jforce(const SIMD_mask &m, double *force, - const SIMD_int &i, SIMD_double &fx, - SIMD_double &fy, SIMD_double &fz) { + const SIMD_int &i, SIMD_double &fx, + SIMD_double &fy, SIMD_double &fz) { SIMD_conflict_pi_reduce3(m, i, fx, fy, fz); SIMD_double jfrc; - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force, - _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force, + _MM_SCALE_2); jfrc = jfrc + fx; _mm512_mask_i32loscatter_pd(force, m, i, jfrc, _MM_SCALE_2); - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1, - _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1, + _MM_SCALE_2); jfrc = jfrc + fy; _mm512_mask_i32loscatter_pd(force+1, m, i, jfrc, _MM_SCALE_2); jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 2, - _MM_SCALE_2); + _MM_SCALE_2); jfrc = jfrc + fz; _mm512_mask_i32loscatter_pd(force+2, m, i, jfrc, _MM_SCALE_2); } - inline void SIMD_safe_jforce(const SIMD_mask &rmask, double *force, - const SIMD_int &joffset, SIMD_float &amx, - SIMD_float &amy, SIMD_float &amz) { + inline void SIMD_safe_jforce(const SIMD_mask &rmask, double *force, + const SIMD_int &joffset, SIMD_float &amx, + SIMD_float &amy, SIMD_float &amz) { SIMD_double amxd, amyd, amzd; amxd = _mm512_cvtps_pd(_mm512_castps512_ps256(amx)); amyd = _mm512_cvtps_pd(_mm512_castps512_ps256(amy)); amzd = _mm512_cvtps_pd(_mm512_castps512_ps256(amz)); SIMD_conflict_pi_reduce3(rmask, joffset, amxd, amyd, amzd); SIMD_double jfrc; - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, - force, _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, + force, _MM_SCALE_2); jfrc = jfrc + amxd; _mm512_mask_i32loscatter_pd(force, rmask, joffset, jfrc, _MM_SCALE_2); - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, - force + 1, _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, + force + 1, _MM_SCALE_2); jfrc = jfrc + amyd; _mm512_mask_i32loscatter_pd(force+1, rmask, joffset, jfrc, _MM_SCALE_2); - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, - force + 2, _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask, joffset, + force + 2, _MM_SCALE_2); jfrc = jfrc + amzd; _mm512_mask_i32loscatter_pd(force+2, rmask, joffset, jfrc, _MM_SCALE_2); SIMD_mask rmask2 = rmask >> 8; amxd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(amx,amx,238))); + _mm512_shuffle_f32x4(amx,amx,238))); amyd = _mm512_cvtps_pd(_mm512_castps512_ps256( _mm512_shuffle_f32x4(amy,amy,238))); amzd = _mm512_cvtps_pd(_mm512_castps512_ps256( _mm512_shuffle_f32x4(amz,amz,238))); SIMD_int joffset2 = _mm512_shuffle_i32x4(joffset, joffset, 238); SIMD_conflict_pi_reduce3(rmask2, joffset2, amxd, amyd, amzd); - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2, - force, _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2, + force, _MM_SCALE_2); jfrc = jfrc + amxd; _mm512_mask_i32loscatter_pd(force, rmask2, joffset2, jfrc, _MM_SCALE_2); - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2, - force + 1, _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2, + force + 1, _MM_SCALE_2); jfrc = jfrc + amyd; _mm512_mask_i32loscatter_pd(force+1, rmask2, joffset2, jfrc, _MM_SCALE_2); - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2, - force + 2, _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), rmask2, joffset2, + force + 2, _MM_SCALE_2); jfrc = jfrc + amzd; _mm512_mask_i32loscatter_pd(force+2, rmask2, joffset2, jfrc, _MM_SCALE_2); } inline void SIMD_jforce_update(const SIMD_mask &m, float *force, - const SIMD_int &i, const SIMD_float &fx, - const SIMD_float &fy, const SIMD_float &fz) { + const SIMD_int &i, const SIMD_float &fx, + const SIMD_float &fy, const SIMD_float &fz) { SIMD_float jfrc; - jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force, - _MM_SCALE_1); + jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force, + _MM_SCALE_1); jfrc = jfrc - fx; _mm512_mask_i32scatter_ps(force, m, i, jfrc, _MM_SCALE_1); - jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 1, - _MM_SCALE_1); + jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 1, + _MM_SCALE_1); jfrc = jfrc - fy; _mm512_mask_i32scatter_ps(force+1, m, i, jfrc, _MM_SCALE_1); jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 2, - _MM_SCALE_1); + _MM_SCALE_1); jfrc = jfrc - fz; _mm512_mask_i32scatter_ps(force+2, m, i, jfrc, _MM_SCALE_1); } template <class ft> inline void SIMD_scalar_update(const int jj, const int* ejnum, ft *force, - const int* i, const double *fx, - const double *fy, const double *fz, - const double *fx2, const double *fy2, - const double *fz2) { + const int* i, const double *fx, + const double *fy, const double *fz, + const double *fx2, const double *fy2, + const double *fz2) { #pragma novector for (int k=0; k<8; k++) { if (jj < ejnum[k]) { - const int j = i[k]; - force[j].x -= fx[k]; - force[j].y -= fy[k]; - force[j].z -= fz[k]; + const int j = i[k]; + force[j].x -= fx[k]; + force[j].y -= fy[k]; + force[j].z -= fz[k]; } } - + #pragma novector for (int k=8; k<16; k++) { if (jj < ejnum[k]) { - const int j = i[k]; - force[j].x -= fx2[k-8]; - force[j].y -= fy2[k-8]; - force[j].z -= fz2[k-8]; + const int j = i[k]; + force[j].x -= fx2[k-8]; + force[j].y -= fy2[k-8]; + force[j].z -= fz2[k-8]; } } } inline void SIMD_jforce_update(const SIMD_mask &m, double *force, - const SIMD_int &i, const SIMD_double &fx, - const SIMD_double &fy, const SIMD_double &fz) { + const SIMD_int &i, const SIMD_double &fx, + const SIMD_double &fy, const SIMD_double &fz) { SIMD_double jfrc; - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force, - _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force, + _MM_SCALE_2); jfrc = jfrc - fx; _mm512_mask_i32loscatter_pd(force, m, i, jfrc, _MM_SCALE_2); - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1, - _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1, + _MM_SCALE_2); jfrc = jfrc - fy; _mm512_mask_i32loscatter_pd(force+1, m, i, jfrc, _MM_SCALE_2); jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 2, - _MM_SCALE_2); + _MM_SCALE_2); jfrc = jfrc - fz; _mm512_mask_i32loscatter_pd(force+2, m, i, jfrc, _MM_SCALE_2); } - inline void SIMD_jforce_update(const SIMD_mask &rmask, + inline void SIMD_jforce_update(const SIMD_mask &rmask, double *force, const SIMD_int &joffset, SIMD_float &amx, - SIMD_float &amy, SIMD_float &amz) { + SIMD_float &amy, SIMD_float &amz) { SIMD_double amxd, amyd, amzd; amxd = _mm512_cvtps_pd(_mm512_castps512_ps256(amx)); amyd = _mm512_cvtps_pd(_mm512_castps512_ps256(amy)); @@ -1084,7 +1084,7 @@ namespace ip_simd { SIMD_mask rmask2 = rmask >> 8; amxd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(amx,amx,238))); + _mm512_shuffle_f32x4(amx,amx,238))); amyd = _mm512_cvtps_pd(_mm512_castps512_ps256( _mm512_shuffle_f32x4(amy,amy,238))); amzd = _mm512_cvtps_pd(_mm512_castps512_ps256( @@ -1095,8 +1095,8 @@ namespace ip_simd { } inline void SIMD_cache3(float *pr, const int offset, - const SIMD_float &fx, - const SIMD_float &fy, const SIMD_float &fz) { + const SIMD_float &fx, + const SIMD_float &fy, const SIMD_float &fz) { float *p = pr; SIMD_float t; t = SIMD_load(p); @@ -1113,8 +1113,8 @@ namespace ip_simd { } inline void SIMD_cache3(double *pr, const int offset, - const SIMD_double &fx, - const SIMD_double &fy, const SIMD_double &fz) { + const SIMD_double &fx, + const SIMD_double &fy, const SIMD_double &fz) { double *p = pr; SIMD_double t; t = SIMD_load(p); @@ -1131,8 +1131,8 @@ namespace ip_simd { } inline void SIMD_cache3(double *pr, const int foffset, - const SIMD_float &fx, - const SIMD_float &fy, const SIMD_float &fz) { + const SIMD_float &fx, + const SIMD_float &fy, const SIMD_float &fz) { const int offset = foffset >> 1; double *p = pr; SIMD_double t, fd; @@ -1142,7 +1142,7 @@ namespace ip_simd { t = t + fd; SIMD_store(p,t); fd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fx,fx,238))); + _mm512_shuffle_f32x4(fx,fx,238))); p = p + offset; t = SIMD_load(p); t = t + fd; @@ -1154,7 +1154,7 @@ namespace ip_simd { t = t + fd; SIMD_store(p,t); fd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fy,fy,238))); + _mm512_shuffle_f32x4(fy,fy,238))); p = p + offset; t = SIMD_load(p); t = t + fd; @@ -1166,7 +1166,7 @@ namespace ip_simd { t = t + fd; SIMD_store(p,t); fd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fz,fz,238))); + _mm512_shuffle_f32x4(fz,fz,238))); p = p + offset; t = SIMD_load(p); t = t + fd; @@ -1174,15 +1174,15 @@ namespace ip_simd { } inline void SIMD_cache3(float *pr, const int offset, - const SIMD_float &fx, const SIMD_float &fy, - const SIMD_float &fz, const SIMD_float &fx2, - const SIMD_float &fy2, const SIMD_float &fz2) { + const SIMD_float &fx, const SIMD_float &fy, + const SIMD_float &fz, const SIMD_float &fx2, + const SIMD_float &fy2, const SIMD_float &fz2) { } inline void SIMD_cache3(double *pr, const int foffset, - const SIMD_double &fx, const SIMD_double &fy, - const SIMD_double &fz, const SIMD_double &fx2, - const SIMD_double &fy2, const SIMD_double &fz2) { + const SIMD_double &fx, const SIMD_double &fy, + const SIMD_double &fz, const SIMD_double &fx2, + const SIMD_double &fy2, const SIMD_double &fz2) { const int offset = foffset >> 1; double *p = pr; SIMD_double t; @@ -1214,14 +1214,14 @@ namespace ip_simd { SIMD_store(p,t); } - inline void SIMD_accumulate3(const SIMD_mask &kmask, const SIMD_float &fjx, - const SIMD_float &fjy, const SIMD_float &fjz, - SIMD_float &fxtmp, SIMD_float &fytmp, - SIMD_float &fztmp, SIMD_float &fjxtmp, - SIMD_float &fjytmp, SIMD_float &fjztmp, - SIMD_float &fxtmp2, SIMD_float &fytmp2, - SIMD_float &fztmp2, SIMD_float &fjxtmp2, - SIMD_float &fjytmp2, SIMD_float &fjztmp2) { + inline void SIMD_accumulate3(const SIMD_mask &kmask, const SIMD_float &fjx, + const SIMD_float &fjy, const SIMD_float &fjz, + SIMD_float &fxtmp, SIMD_float &fytmp, + SIMD_float &fztmp, SIMD_float &fjxtmp, + SIMD_float &fjytmp, SIMD_float &fjztmp, + SIMD_float &fxtmp2, SIMD_float &fytmp2, + SIMD_float &fztmp2, SIMD_float &fjxtmp2, + SIMD_float &fjytmp2, SIMD_float &fjztmp2) { fxtmp = SIMD_sub(fxtmp, kmask, fxtmp, fjx); fjxtmp = SIMD_sub(fjxtmp, kmask, fjxtmp, fjx); fytmp = SIMD_sub(fytmp, kmask, fytmp, fjy); @@ -1230,14 +1230,14 @@ namespace ip_simd { fjztmp = SIMD_sub(fjztmp, kmask, fjztmp, fjz); } - inline void SIMD_accumulate3(const SIMD_mask &kmask, const SIMD_double &fjx, - const SIMD_double &fjy, const SIMD_double &fjz, - SIMD_double &fxtmp, SIMD_double &fytmp, - SIMD_double &fztmp, SIMD_double &fjxtmp, - SIMD_double &fjytmp, SIMD_double &fjztmp, - SIMD_double &fxtmp2, SIMD_double &fytmp2, - SIMD_double &fztmp2, SIMD_double &fjxtmp2, - SIMD_double &fjytmp2, SIMD_double &fjztmp2) { + inline void SIMD_accumulate3(const SIMD_mask &kmask, const SIMD_double &fjx, + const SIMD_double &fjy, const SIMD_double &fjz, + SIMD_double &fxtmp, SIMD_double &fytmp, + SIMD_double &fztmp, SIMD_double &fjxtmp, + SIMD_double &fjytmp, SIMD_double &fjztmp, + SIMD_double &fxtmp2, SIMD_double &fytmp2, + SIMD_double &fztmp2, SIMD_double &fjxtmp2, + SIMD_double &fjytmp2, SIMD_double &fjztmp2) { fxtmp = SIMD_sub(fxtmp, kmask, fxtmp, fjx); fjxtmp = SIMD_sub(fjxtmp, kmask, fjxtmp, fjx); fytmp = SIMD_sub(fytmp, kmask, fytmp, fjy); @@ -1246,20 +1246,20 @@ namespace ip_simd { fjztmp = SIMD_sub(fjztmp, kmask, fjztmp, fjz); } - inline void SIMD_accumulate3(const SIMD_mask &kmask, const SIMD_float &fjx, - const SIMD_float &fjy, const SIMD_float &fjz, - SIMD_double &fxtmp, SIMD_double &fytmp, - SIMD_double &fztmp, SIMD_double &fjxtmp, - SIMD_double &fjytmp, SIMD_double &fjztmp, - SIMD_double &fxtmp2, SIMD_double &fytmp2, - SIMD_double &fztmp2, SIMD_double &fjxtmp2, - SIMD_double &fjytmp2, SIMD_double &fjztmp2) { + inline void SIMD_accumulate3(const SIMD_mask &kmask, const SIMD_float &fjx, + const SIMD_float &fjy, const SIMD_float &fjz, + SIMD_double &fxtmp, SIMD_double &fytmp, + SIMD_double &fztmp, SIMD_double &fjxtmp, + SIMD_double &fjytmp, SIMD_double &fjztmp, + SIMD_double &fxtmp2, SIMD_double &fytmp2, + SIMD_double &fztmp2, SIMD_double &fjxtmp2, + SIMD_double &fjytmp2, SIMD_double &fjztmp2) { SIMD_mask kmask2 = kmask >> 8; SIMD_double delfd = _mm512_cvtps_pd(_mm512_castps512_ps256(fjx)); fxtmp = SIMD_sub(fxtmp, kmask, fxtmp, delfd); fjxtmp = SIMD_sub(fjxtmp, kmask, fjxtmp, delfd); delfd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fjx,fjx,238))); + _mm512_shuffle_f32x4(fjx,fjx,238))); fxtmp2 = SIMD_sub(fxtmp2, kmask2, fxtmp2, delfd); fjxtmp2 = SIMD_sub(fjxtmp2, kmask2, fjxtmp2, delfd); @@ -1267,7 +1267,7 @@ namespace ip_simd { fytmp = SIMD_sub(fytmp, kmask, fytmp, delfd); fjytmp = SIMD_sub(fjytmp, kmask, fjytmp, delfd); delfd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fjy,fjy,238))); + _mm512_shuffle_f32x4(fjy,fjy,238))); fytmp2 = SIMD_sub(fytmp2, kmask2, fytmp2, delfd); fjytmp2 = SIMD_sub(fjytmp2, kmask2, fjytmp2, delfd); @@ -1275,22 +1275,22 @@ namespace ip_simd { fztmp = SIMD_sub(fztmp, kmask, fztmp, delfd); fjztmp = SIMD_sub(fjztmp, kmask, fjztmp, delfd); delfd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fjz,fjz,238))); + _mm512_shuffle_f32x4(fjz,fjz,238))); fztmp2 = SIMD_sub(fztmp2, kmask2, fztmp2, delfd); fjztmp2 = SIMD_sub(fjztmp2, kmask2, fjztmp2, delfd); } - inline void SIMD_acc_cache3(const SIMD_mask &kmask, const SIMD_float &fjx, - const SIMD_float &fjy, const SIMD_float &fjz, - const SIMD_float &fkx, const SIMD_float &fky, - const SIMD_float &fkz, - SIMD_float &fxtmp, SIMD_float &fytmp, - SIMD_float &fztmp, SIMD_float &fjxtmp, - SIMD_float &fjytmp, SIMD_float &fjztmp, - SIMD_float &fxtmp2, SIMD_float &fytmp2, - SIMD_float &fztmp2, SIMD_float &fjxtmp2, - SIMD_float &fjytmp2, SIMD_float &fjztmp2, - float *pr, const int offset) { + inline void SIMD_acc_cache3(const SIMD_mask &kmask, const SIMD_float &fjx, + const SIMD_float &fjy, const SIMD_float &fjz, + const SIMD_float &fkx, const SIMD_float &fky, + const SIMD_float &fkz, + SIMD_float &fxtmp, SIMD_float &fytmp, + SIMD_float &fztmp, SIMD_float &fjxtmp, + SIMD_float &fjytmp, SIMD_float &fjztmp, + SIMD_float &fxtmp2, SIMD_float &fytmp2, + SIMD_float &fztmp2, SIMD_float &fjxtmp2, + SIMD_float &fjytmp2, SIMD_float &fjztmp2, + float *pr, const int offset) { fxtmp = SIMD_sub(fxtmp, kmask, fxtmp, fjx - fkx); fjxtmp = SIMD_sub(fjxtmp, kmask, fjxtmp, fjx); fytmp = SIMD_sub(fytmp, kmask, fytmp, fjy - fky); @@ -1312,17 +1312,17 @@ namespace ip_simd { SIMD_store(p, t); } - inline void SIMD_acc_cache3(const SIMD_mask &kmask, const SIMD_double &fjx, - const SIMD_double &fjy, const SIMD_double &fjz, - const SIMD_double &fkx, const SIMD_double &fky, - const SIMD_double &fkz, - SIMD_double &fxtmp, SIMD_double &fytmp, - SIMD_double &fztmp, SIMD_double &fjxtmp, - SIMD_double &fjytmp, SIMD_double &fjztmp, - SIMD_double &fxtmp2, SIMD_double &fytmp2, - SIMD_double &fztmp2, SIMD_double &fjxtmp2, - SIMD_double &fjytmp2, SIMD_double &fjztmp2, - double *pr, const int offset) { + inline void SIMD_acc_cache3(const SIMD_mask &kmask, const SIMD_double &fjx, + const SIMD_double &fjy, const SIMD_double &fjz, + const SIMD_double &fkx, const SIMD_double &fky, + const SIMD_double &fkz, + SIMD_double &fxtmp, SIMD_double &fytmp, + SIMD_double &fztmp, SIMD_double &fjxtmp, + SIMD_double &fjytmp, SIMD_double &fjztmp, + SIMD_double &fxtmp2, SIMD_double &fytmp2, + SIMD_double &fztmp2, SIMD_double &fjxtmp2, + SIMD_double &fjytmp2, SIMD_double &fjztmp2, + double *pr, const int offset) { fxtmp = SIMD_sub(fxtmp, kmask, fxtmp, fjx - fkx); fjxtmp = SIMD_sub(fjxtmp, kmask, fjxtmp, fjx); fytmp = SIMD_sub(fytmp, kmask, fytmp, fjy - fky); @@ -1344,17 +1344,17 @@ namespace ip_simd { SIMD_store(p, t); } - inline void SIMD_acc_cache3(const SIMD_mask &kmask, const SIMD_float &fjx, - const SIMD_float &fjy, const SIMD_float &fjz, - const SIMD_float &fkx, const SIMD_float &fky, - const SIMD_float &fkz, - SIMD_double &fxtmp, SIMD_double &fytmp, - SIMD_double &fztmp, SIMD_double &fjxtmp, - SIMD_double &fjytmp, SIMD_double &fjztmp, - SIMD_double &fxtmp2, SIMD_double &fytmp2, - SIMD_double &fztmp2, SIMD_double &fjxtmp2, - SIMD_double &fjytmp2, SIMD_double &fjztmp2, - double *pr, const int foffset) { + inline void SIMD_acc_cache3(const SIMD_mask &kmask, const SIMD_float &fjx, + const SIMD_float &fjy, const SIMD_float &fjz, + const SIMD_float &fkx, const SIMD_float &fky, + const SIMD_float &fkz, + SIMD_double &fxtmp, SIMD_double &fytmp, + SIMD_double &fztmp, SIMD_double &fjxtmp, + SIMD_double &fjytmp, SIMD_double &fjztmp, + SIMD_double &fxtmp2, SIMD_double &fytmp2, + SIMD_double &fztmp2, SIMD_double &fjxtmp2, + SIMD_double &fjytmp2, SIMD_double &fjztmp2, + double *pr, const int foffset) { SIMD_mask kmask2 = kmask >> 8; const int offset = foffset >> 1; double *p = pr; @@ -1368,9 +1368,9 @@ namespace ip_simd { fxtmp = SIMD_sub(fxtmp, kmask, fxtmp, delfd - delfdk); fjxtmp = SIMD_sub(fjxtmp, kmask, fjxtmp, delfd); delfd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fjx,fjx,238))); + _mm512_shuffle_f32x4(fjx,fjx,238))); delfdk = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fkx,fkx,238))); + _mm512_shuffle_f32x4(fkx,fkx,238))); p = p + offset; t = SIMD_load(p); t = t + delfdk; @@ -1387,9 +1387,9 @@ namespace ip_simd { fytmp = SIMD_sub(fytmp, kmask, fytmp, delfd - delfdk); fjytmp = SIMD_sub(fjytmp, kmask, fjytmp, delfd); delfd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fjy,fjy,238))); + _mm512_shuffle_f32x4(fjy,fjy,238))); delfdk = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fky,fky,238))); + _mm512_shuffle_f32x4(fky,fky,238))); p = p + offset; t = SIMD_load(p); t = t + delfdk; @@ -1406,9 +1406,9 @@ namespace ip_simd { fztmp = SIMD_sub(fztmp, kmask, fztmp, delfd - delfdk); fjztmp = SIMD_sub(fjztmp, kmask, fjztmp, delfd); delfd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fjz,fjz,238))); + _mm512_shuffle_f32x4(fjz,fjz,238))); delfdk = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(fkz,fkz,238))); + _mm512_shuffle_f32x4(fkz,fkz,238))); p = p + offset; t = SIMD_load(p); t = t + delfdk; @@ -1417,11 +1417,11 @@ namespace ip_simd { fjztmp2 = SIMD_sub(fjztmp2, kmask2, fjztmp2, delfd); } - inline void SIMD_acc_energy3(const SIMD_mask &hmask, - const SIMD_float &evdwl, const int eatom, - SIMD_float &sevdwl, SIMD_float &fwtmp, - SIMD_float &fjtmp, SIMD_float &fwtmp2, - SIMD_float &fjtmp2) { + inline void SIMD_acc_energy3(const SIMD_mask &hmask, + const SIMD_float &evdwl, const int eatom, + SIMD_float &sevdwl, SIMD_float &fwtmp, + SIMD_float &fjtmp, SIMD_float &fwtmp2, + SIMD_float &fjtmp2) { sevdwl = SIMD_add(sevdwl, hmask, sevdwl, evdwl); if (eatom) { const SIMD_float hevdwl = evdwl * (float)0.5; @@ -1430,11 +1430,11 @@ namespace ip_simd { } } - inline void SIMD_acc_energy3(const SIMD_mask &hmask, - const SIMD_double &evdwl, const int eatom, - SIMD_double &sevdwl, SIMD_double &fwtmp, - SIMD_double &fjtmp, SIMD_double &fwtmp2, - SIMD_double &fjtmp2) { + inline void SIMD_acc_energy3(const SIMD_mask &hmask, + const SIMD_double &evdwl, const int eatom, + SIMD_double &sevdwl, SIMD_double &fwtmp, + SIMD_double &fjtmp, SIMD_double &fwtmp2, + SIMD_double &fjtmp2) { sevdwl = SIMD_add(sevdwl, hmask, sevdwl, evdwl); if (eatom) { const SIMD_double hevdwl = evdwl * (double)0.5; @@ -1443,11 +1443,11 @@ namespace ip_simd { } } - inline void SIMD_acc_energy3(const SIMD_mask &hmask, - const SIMD_float &evdwl, const int eatom, - SIMD_double &sevdwl, SIMD_double &fwtmp, - SIMD_double &fjtmp, SIMD_double &fwtmp2, - SIMD_double &fjtmp2) { + inline void SIMD_acc_energy3(const SIMD_mask &hmask, + const SIMD_float &evdwl, const int eatom, + SIMD_double &sevdwl, SIMD_double &fwtmp, + SIMD_double &fjtmp, SIMD_double &fwtmp2, + SIMD_double &fjtmp2) { SIMD_double evdwld; evdwld = _mm512_cvtps_pd(_mm512_castps512_ps256(evdwl)); sevdwl = SIMD_add(sevdwl, hmask, sevdwl, evdwld); @@ -1458,7 +1458,7 @@ namespace ip_simd { } SIMD_mask hmask2 = hmask >> 8; evdwld = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(evdwl,evdwl,238))); + _mm512_shuffle_f32x4(evdwl,evdwl,238))); sevdwl = SIMD_add(sevdwl, hmask2, sevdwl, evdwld); if (eatom) { const SIMD_double hevdwl = evdwld * (double)0.5; @@ -1467,48 +1467,48 @@ namespace ip_simd { } } - inline void SIMD_acc_three(const SIMD_mask &hmask, const SIMD_float &facrad, - const int eatom, SIMD_float &sevdwl, - SIMD_float &fwtmp, SIMD_float &fjtmp, - SIMD_float &fwtmp2, SIMD_float &fjtmp2, - const SIMD_int &k, float *force) { + inline void SIMD_acc_three(const SIMD_mask &hmask, const SIMD_float &facrad, + const int eatom, SIMD_float &sevdwl, + SIMD_float &fwtmp, SIMD_float &fjtmp, + SIMD_float &fwtmp2, SIMD_float &fjtmp2, + const SIMD_int &k, float *force) { sevdwl = SIMD_add(sevdwl, hmask, sevdwl, facrad); if (eatom) { SIMD_float hevdwl = facrad * SIMD_set((float)0.33333333); fwtmp = SIMD_add(fwtmp, hmask, fwtmp, hevdwl); fjtmp = SIMD_add(fjtmp, hmask, fjtmp, hevdwl); SIMD_conflict_pi_reduce1(hmask, k, hevdwl); - SIMD_float keng = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), hmask, - k, force + 3, _MM_SCALE_1); + SIMD_float keng = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), hmask, + k, force + 3, _MM_SCALE_1); keng = keng + hevdwl; _mm512_mask_i32scatter_ps(force + 3, hmask, k, keng, _MM_SCALE_1); } } inline void SIMD_acc_three(const SIMD_mask &hmask, const SIMD_double &facrad, - const int eatom, SIMD_double &sevdwl, - SIMD_double &fwtmp, SIMD_double &fjtmp, - SIMD_double &fwtmp2, SIMD_double &fjtmp2, - const SIMD_int &k, double *force) { + const int eatom, SIMD_double &sevdwl, + SIMD_double &fwtmp, SIMD_double &fjtmp, + SIMD_double &fwtmp2, SIMD_double &fjtmp2, + const SIMD_int &k, double *force) { sevdwl = SIMD_add(sevdwl, hmask, sevdwl, facrad); if (eatom) { SIMD_double hevdwl = facrad * SIMD_set((double)0.33333333); fwtmp = SIMD_add(fwtmp, hmask, fwtmp, hevdwl); fjtmp = SIMD_add(fjtmp, hmask, fjtmp, hevdwl); SIMD_conflict_pi_reduce1(hmask, k, hevdwl); - SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), - hmask, k, force + 3, - _MM_SCALE_2); + SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), + hmask, k, force + 3, + _MM_SCALE_2); keng = keng + hevdwl; _mm512_mask_i32loscatter_pd(force + 3, hmask, k, keng, _MM_SCALE_2); } } - inline void SIMD_acc_three(const SIMD_mask &hmask, const SIMD_float &facrad, - const int eatom, SIMD_double &sevdwl, - SIMD_double &fwtmp, SIMD_double &fjtmp, - SIMD_double &fwtmp2, SIMD_double &fjtmp2, - const SIMD_int &k, double *force) { + inline void SIMD_acc_three(const SIMD_mask &hmask, const SIMD_float &facrad, + const int eatom, SIMD_double &sevdwl, + SIMD_double &fwtmp, SIMD_double &fjtmp, + SIMD_double &fwtmp2, SIMD_double &fjtmp2, + const SIMD_int &k, double *force) { SIMD_double facradd; facradd = _mm512_cvtps_pd(_mm512_castps512_ps256(facrad)); sevdwl = SIMD_add(sevdwl, hmask, sevdwl, facradd); @@ -1517,15 +1517,15 @@ namespace ip_simd { fwtmp = SIMD_add(fwtmp, hmask, fwtmp, hevdwl); fjtmp = SIMD_add(fjtmp, hmask, fjtmp, hevdwl); SIMD_conflict_pi_reduce1(hmask, k, hevdwl); - SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), - hmask, k, force + 3, - _MM_SCALE_2); + SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), + hmask, k, force + 3, + _MM_SCALE_2); keng = keng + hevdwl; _mm512_mask_i32loscatter_pd(force + 3, hmask, k, keng, _MM_SCALE_2); } SIMD_mask hmask2 = hmask >> 8; facradd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(facrad,facrad,238))); + _mm512_shuffle_f32x4(facrad,facrad,238))); sevdwl = SIMD_add(sevdwl, hmask2, sevdwl, facradd); if (eatom) { SIMD_double hevdwl = facradd * SIMD_set((double)0.33333333); @@ -1533,20 +1533,20 @@ namespace ip_simd { fjtmp2 = SIMD_add(fjtmp2, hmask2, fjtmp2, hevdwl); SIMD_int k2 = _mm512_shuffle_i32x4(k, k, 238); SIMD_conflict_pi_reduce1(hmask2, k2, hevdwl); - SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), - hmask2, k2, force + 3, - _MM_SCALE_2); + SIMD_double keng = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), + hmask2, k2, force + 3, + _MM_SCALE_2); keng = keng + hevdwl; _mm512_mask_i32loscatter_pd(force + 3, hmask2, k2, keng, _MM_SCALE_2); } } - inline void SIMD_ev_tally_nbor(const SIMD_mask &m, const int vflag, - const float ev_pre, - const SIMD_float &fpair, const SIMD_float &delx, - const SIMD_float &dely, const SIMD_float &delz, - SIMD_float &sv0, SIMD_float &sv1, SIMD_float &sv2, - SIMD_float &sv3, SIMD_float &sv4, SIMD_float &sv5) { + inline void SIMD_ev_tally_nbor(const SIMD_mask &m, const int vflag, + const float ev_pre, + const SIMD_float &fpair, const SIMD_float &delx, + const SIMD_float &dely, const SIMD_float &delz, + SIMD_float &sv0, SIMD_float &sv1, SIMD_float &sv2, + SIMD_float &sv3, SIMD_float &sv4, SIMD_float &sv5) { if (vflag == 1) { const SIMD_float prefpair = SIMD_set(ev_pre) * fpair; sv0 = SIMD_add(sv0, m, sv0, delx * delx * prefpair); @@ -1558,12 +1558,12 @@ namespace ip_simd { } } - inline void SIMD_ev_tally_nbor(const SIMD_mask &m, const int vflag, - const double ev_pre, - const SIMD_double &fpair, const SIMD_double &delx, - const SIMD_double &dely, const SIMD_double &delz, - SIMD_double &sv0, SIMD_double &sv1, SIMD_double &sv2, - SIMD_double &sv3, SIMD_double &sv4, SIMD_double &sv5) { + inline void SIMD_ev_tally_nbor(const SIMD_mask &m, const int vflag, + const double ev_pre, + const SIMD_double &fpair, const SIMD_double &delx, + const SIMD_double &dely, const SIMD_double &delz, + SIMD_double &sv0, SIMD_double &sv1, SIMD_double &sv2, + SIMD_double &sv3, SIMD_double &sv4, SIMD_double &sv5) { if (vflag == 1) { const SIMD_double prefpair = SIMD_set(ev_pre) * fpair; sv0 = SIMD_add(sv0, m, sv0, delx * delx * prefpair); @@ -1575,12 +1575,12 @@ namespace ip_simd { } } - inline void SIMD_ev_tally_nbor(const SIMD_mask &m, const int vflag, - const float ev_pre, - const SIMD_float &fpair, const SIMD_float &delx, - const SIMD_float &dely, const SIMD_float &delz, - SIMD_double &sv0, SIMD_double &sv1, SIMD_double &sv2, - SIMD_double &sv3, SIMD_double &sv4, SIMD_double &sv5) { + inline void SIMD_ev_tally_nbor(const SIMD_mask &m, const int vflag, + const float ev_pre, + const SIMD_float &fpair, const SIMD_float &delx, + const SIMD_float &dely, const SIMD_float &delz, + SIMD_double &sv0, SIMD_double &sv1, SIMD_double &sv2, + SIMD_double &sv3, SIMD_double &sv4, SIMD_double &sv5) { if (vflag == 1) { const SIMD_mask m2 = m >> 8; const SIMD_float prefpair = SIMD_set(ev_pre) * fpair; @@ -1588,55 +1588,55 @@ namespace ip_simd { SIMD_double dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv0 = SIMD_add(sv0, m, sv0, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv0 = SIMD_add(sv0, m2, sv0, dpaird); dpair = dely * dely * prefpair; dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv1 = SIMD_add(sv1, m, sv1, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv1 = SIMD_add(sv1, m2, sv1, dpaird); dpair = delz * delz * prefpair; dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv2 = SIMD_add(sv2, m, sv2, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv2 = SIMD_add(sv2, m2, sv2, dpaird); dpair = delx * dely * prefpair; dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv3 = SIMD_add(sv3, m, sv3, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv3 = SIMD_add(sv3, m2, sv3, dpaird); dpair = delx * delz * prefpair; dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv4 = SIMD_add(sv4, m, sv4, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv4 = SIMD_add(sv4, m2, sv4, dpaird); dpair = dely * delz * prefpair; dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv5 = SIMD_add(sv5, m, sv5, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv5 = SIMD_add(sv5, m2, sv5, dpaird); } } - inline void SIMD_ev_tally_nbor3v(const SIMD_mask &m, const int vflag, - const SIMD_float &fj0, const SIMD_float &fj1, - const SIMD_float &fj2, const SIMD_float &fk0, - const SIMD_float &fk1, const SIMD_float &fk2, + inline void SIMD_ev_tally_nbor3v(const SIMD_mask &m, const int vflag, + const SIMD_float &fj0, const SIMD_float &fj1, + const SIMD_float &fj2, const SIMD_float &fk0, + const SIMD_float &fk1, const SIMD_float &fk2, const SIMD_float &delx, const SIMD_float &dely, const SIMD_float &delz, const SIMD_float &delr2x, const SIMD_float &delr2y, const SIMD_float &delr2z, - SIMD_float &sv0, SIMD_float &sv1, SIMD_float &sv2, - SIMD_float &sv3, SIMD_float &sv4, SIMD_float &sv5) { + SIMD_float &sv0, SIMD_float &sv1, SIMD_float &sv2, + SIMD_float &sv3, SIMD_float &sv4, SIMD_float &sv5) { if (vflag == 1) { sv0 = SIMD_add(sv0, m, sv0, delx * fj0 + delr2x * fk0); sv1 = SIMD_add(sv1, m, sv1, dely * fj1 + delr2y * fk1); @@ -1647,15 +1647,15 @@ namespace ip_simd { } } - inline void SIMD_ev_tally_nbor3v(const SIMD_mask &m, const int vflag, - const SIMD_double &fj0, const SIMD_double &fj1, - const SIMD_double &fj2, const SIMD_double &fk0, - const SIMD_double &fk1, const SIMD_double &fk2, - const SIMD_double &delx, const SIMD_double &dely, - const SIMD_double &delz, const SIMD_double &delr2x, - const SIMD_double &delr2y, const SIMD_double &delr2z, - SIMD_double &sv0, SIMD_double &sv1, SIMD_double &sv2, - SIMD_double &sv3, SIMD_double &sv4, SIMD_double &sv5) { + inline void SIMD_ev_tally_nbor3v(const SIMD_mask &m, const int vflag, + const SIMD_double &fj0, const SIMD_double &fj1, + const SIMD_double &fj2, const SIMD_double &fk0, + const SIMD_double &fk1, const SIMD_double &fk2, + const SIMD_double &delx, const SIMD_double &dely, + const SIMD_double &delz, const SIMD_double &delr2x, + const SIMD_double &delr2y, const SIMD_double &delr2z, + SIMD_double &sv0, SIMD_double &sv1, SIMD_double &sv2, + SIMD_double &sv3, SIMD_double &sv4, SIMD_double &sv5) { if (vflag == 1) { sv0 = SIMD_add(sv0, m, sv0, delx * fj0 + delr2x * fk0); sv1 = SIMD_add(sv1, m, sv1, dely * fj1 + delr2y * fk1); @@ -1666,62 +1666,62 @@ namespace ip_simd { } } - inline void SIMD_ev_tally_nbor3v(const SIMD_mask &m, const int vflag, - const SIMD_float &fj0, const SIMD_float &fj1, - const SIMD_float &fj2, const SIMD_float &fk0, - const SIMD_float &fk1, const SIMD_float &fk2, + inline void SIMD_ev_tally_nbor3v(const SIMD_mask &m, const int vflag, + const SIMD_float &fj0, const SIMD_float &fj1, + const SIMD_float &fj2, const SIMD_float &fk0, + const SIMD_float &fk1, const SIMD_float &fk2, const SIMD_float &delx, const SIMD_float &dely, const SIMD_float &delz, const SIMD_float &delr2x, const SIMD_float &delr2y, const SIMD_float &delr2z, - SIMD_double &sv0, SIMD_double &sv1, SIMD_double &sv2, - SIMD_double &sv3, SIMD_double &sv4, SIMD_double &sv5) { + SIMD_double &sv0, SIMD_double &sv1, SIMD_double &sv2, + SIMD_double &sv3, SIMD_double &sv4, SIMD_double &sv5) { if (vflag == 1) { const SIMD_mask m2 = m >> 8; SIMD_float dpair = delx * fj0 + delr2x * fk0; SIMD_double dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv0 = SIMD_add(sv0, m, sv0, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv0 = SIMD_add(sv0, m2, sv0, dpaird); dpair = dely * fj1 + delr2y * fk1; dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv1 = SIMD_add(sv1, m, sv1, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv1 = SIMD_add(sv1, m2, sv1, dpaird); dpair = delz * fj2 + delr2z * fk2; dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv2 = SIMD_add(sv2, m, sv2, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv2 = SIMD_add(sv2, m2, sv2, dpaird); dpair = delx * fj1 + delr2x * fk1; dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv3 = SIMD_add(sv3, m, sv3, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv3 = SIMD_add(sv3, m2, sv3, dpaird); dpair = delx * fj2 + delr2x * fk2; dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv4 = SIMD_add(sv4, m, sv4, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv4 = SIMD_add(sv4, m2, sv4, dpaird); dpair = dely * fj2 + delr2y * fk2; dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256(dpair)); sv5 = SIMD_add(sv5, m, sv5, dpaird); dpaird = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(dpair,dpair,238))); + _mm512_shuffle_f32x4(dpair,dpair,238))); sv5 = SIMD_add(sv5, m2, sv5, dpaird); } } - inline void SIMD_safe_force_accumulate(const SIMD_mask &rmask, + inline void SIMD_safe_force_accumulate(const SIMD_mask &rmask, float *force, const SIMD_int &joffset, SIMD_float &amx, SIMD_float &amy, SIMD_float &amz, SIMD_float &fxtmp, SIMD_float &fytmp, SIMD_float &fztmp, SIMD_float &fxtmp2, @@ -1733,10 +1733,10 @@ namespace ip_simd { SIMD_jforce_update(rmask, force, joffset, amx, amy, amz); } - inline void SIMD_safe_force_accumulate(const SIMD_mask &rmask, + inline void SIMD_safe_force_accumulate(const SIMD_mask &rmask, double *force, const SIMD_int &joffset, SIMD_double &amx, SIMD_double &amy, SIMD_double &amz, SIMD_double &fxtmp, - SIMD_double &fytmp, SIMD_double &fztmp, SIMD_double &fxtmp2, + SIMD_double &fytmp, SIMD_double &fztmp, SIMD_double &fxtmp2, SIMD_double &fytmp2, SIMD_double &fztmp2) { fxtmp = SIMD_add(fxtmp, rmask, fxtmp, amx); fytmp = SIMD_add(fytmp, rmask, fytmp, amy); @@ -1745,10 +1745,10 @@ namespace ip_simd { SIMD_jforce_update(rmask, force, joffset, amx, amy, amz); } - inline void SIMD_safe_force_accumulate(const SIMD_mask &rmask, + inline void SIMD_safe_force_accumulate(const SIMD_mask &rmask, double *force, const SIMD_int &joffset, SIMD_float &amx, SIMD_float &amy, SIMD_float &amz, SIMD_double &fxtmp, - SIMD_double &fytmp, SIMD_double &fztmp, SIMD_double &fxtmp2, + SIMD_double &fytmp, SIMD_double &fztmp, SIMD_double &fxtmp2, SIMD_double &fytmp2, SIMD_double &fztmp2) { SIMD_double amxd, amyd, amzd; amxd = _mm512_cvtps_pd(_mm512_castps512_ps256(amx)); @@ -1762,7 +1762,7 @@ namespace ip_simd { SIMD_mask rmask2 = rmask >> 8; amxd = _mm512_cvtps_pd(_mm512_castps512_ps256( - _mm512_shuffle_f32x4(amx,amx,238))); + _mm512_shuffle_f32x4(amx,amx,238))); fxtmp2 = SIMD_add(fxtmp2, rmask2, fxtmp2, amxd); amyd = _mm512_cvtps_pd(_mm512_castps512_ps256( _mm512_shuffle_f32x4(amy,amy,238))); @@ -1776,57 +1776,57 @@ namespace ip_simd { } inline void SIMD_iforce_update(const SIMD_mask &m, float *force, - const SIMD_int &i, const SIMD_float &fx, - const SIMD_float &fy, const SIMD_float &fz, - const int EVFLAG, const int eatom, - const SIMD_float &fwtmp) { + const SIMD_int &i, const SIMD_float &fx, + const SIMD_float &fy, const SIMD_float &fz, + const int EFLAG, const int eatom, + const SIMD_float &fwtmp) { SIMD_float jfrc; - jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force, - _MM_SCALE_1); + jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force, + _MM_SCALE_1); jfrc = jfrc + fx; _mm512_mask_i32scatter_ps(force, m, i, jfrc, _MM_SCALE_1); - jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 1, - _MM_SCALE_1); + jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 1, + _MM_SCALE_1); jfrc = jfrc + fy; _mm512_mask_i32scatter_ps(force+1, m, i, jfrc, _MM_SCALE_1); jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 2, - _MM_SCALE_1); + _MM_SCALE_1); jfrc = jfrc + fz; _mm512_mask_i32scatter_ps(force+2, m, i, jfrc, _MM_SCALE_1); - if (EVFLAG) { + if (EFLAG) { if (eatom) { - jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 3, - _MM_SCALE_1); - jfrc = jfrc + fwtmp; - _mm512_mask_i32scatter_ps(force+3, m, i, jfrc, _MM_SCALE_1); + jfrc = _mm512_mask_i32gather_ps(_mm512_undefined_ps(), m, i, force + 3, + _MM_SCALE_1); + jfrc = jfrc + fwtmp; + _mm512_mask_i32scatter_ps(force+3, m, i, jfrc, _MM_SCALE_1); } } } inline void SIMD_iforce_update(const SIMD_mask &m, double *force, - const SIMD_int &i, const SIMD_double &fx, - const SIMD_double &fy, const SIMD_double &fz, - const int EVFLAG, const int eatom, - const SIMD_double &fwtmp) { + const SIMD_int &i, const SIMD_double &fx, + const SIMD_double &fy, const SIMD_double &fz, + const int EFLAG, const int eatom, + const SIMD_double &fwtmp) { SIMD_double jfrc; - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force, - _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force, + _MM_SCALE_2); jfrc = jfrc + fx; _mm512_mask_i32loscatter_pd(force, m, i, jfrc, _MM_SCALE_2); - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1, - _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 1, + _MM_SCALE_2); jfrc = jfrc + fy; _mm512_mask_i32loscatter_pd(force+1, m, i, jfrc, _MM_SCALE_2); jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, force + 2, - _MM_SCALE_2); + _MM_SCALE_2); jfrc = jfrc + fz; _mm512_mask_i32loscatter_pd(force+2, m, i, jfrc, _MM_SCALE_2); - if (EVFLAG) { + if (EFLAG) { if (eatom) { - jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, - force + 3, _MM_SCALE_2); - jfrc = jfrc + fwtmp; - _mm512_mask_i32loscatter_pd(force+3, m, i, jfrc, _MM_SCALE_2); + jfrc = _mm512_mask_i32logather_pd(_mm512_undefined_pd(), m, i, + force + 3, _MM_SCALE_2); + jfrc = jfrc + fwtmp; + _mm512_mask_i32loscatter_pd(force+3, m, i, jfrc, _MM_SCALE_2); } } } @@ -1834,8 +1834,8 @@ namespace ip_simd { #ifdef SW_GATHER_TEST template <class atom_t> inline void SIMD_atom_gather(const SIMD_mask &m, const atom_t *atom, - const SIMD_int &i, SIMD_float &x, SIMD_float &y, - SIMD_float &z, SIMD_int &type) { + const SIMD_int &i, SIMD_float &x, SIMD_float &y, + SIMD_float &z, SIMD_int &type) { int jv_scalar[16] __attribute__((aligned(64))); int jm_scalar[16] __attribute__((aligned(64))); _mm512_store_epi32(jv_scalar, i); @@ -1846,65 +1846,65 @@ namespace ip_simd { pl1 = _mm512_loadu_ps((float *)((char *)atom + js)); js = jv_scalar[1]; pl1 = _mm512_insertf32x4(pl1, _mm_load_ps((float *)((char *)atom + - js)), 1); + js)), 1); js = jv_scalar[2]; pl1 = _mm512_insertf32x4(pl1, _mm_load_ps((float *)((char *)atom + - js)), 2); + js)), 2); js = jv_scalar[3]; pl1 = _mm512_insertf32x4(pl1, _mm_load_ps((float *)((char *)atom + - js)), 3); - + js)), 3); + js = jv_scalar[4]; pl2 = _mm512_loadu_ps((float *)((char *)atom + js)); js = jv_scalar[5]; pl2 = _mm512_insertf32x4(pl2, _mm_load_ps((float *)((char *)atom + - js)), 1); + js)), 1); js = jv_scalar[6]; pl2 = _mm512_insertf32x4(pl2, _mm_load_ps((float *)((char *)atom + - js)), 2); + js)), 2); js = jv_scalar[7]; pl2 = _mm512_insertf32x4(pl2, _mm_load_ps((float *)((char *)atom + - js)), 3); - + js)), 3); + js = jv_scalar[8]; pl3 = _mm512_loadu_ps((float *)((char *)atom + js)); js = jv_scalar[9]; pl3 = _mm512_insertf32x4(pl3, _mm_load_ps((float *)((char *)atom + - js)), 1); + js)), 1); js = jv_scalar[10]; pl3 = _mm512_insertf32x4(pl3, _mm_load_ps((float *)((char *)atom + - js)), 2); + js)), 2); js = jv_scalar[11]; pl3 = _mm512_insertf32x4(pl3, _mm_load_ps((float *)((char *)atom + - js)), 3); - + js)), 3); + js = jv_scalar[12]; pl4 = _mm512_loadu_ps((float *)((char *)atom + js)); js = jv_scalar[13]; pl4 = _mm512_insertf32x4(pl4, _mm_load_ps((float *)((char *)atom + - js)), 1); + js)), 1); js = jv_scalar[14]; pl4 = _mm512_insertf32x4(pl4, _mm_load_ps((float *)((char *)atom + - js)), 2); + js)), 2); js = jv_scalar[15]; pl4 = _mm512_insertf32x4(pl4, _mm_load_ps((float *)((char *)atom + - js)), 3); - + js)), 3); + SIMD_int c0 = _mm512_setr_epi32(0x0,0x4,0x8,0xc,0x10,0x14,0x18,0x1c, - 0x1,0x5,0x9,0xd,0x11,0x15,0x19,0x1d); + 0x1,0x5,0x9,0xd,0x11,0x15,0x19,0x1d); SIMD_int c1 = _mm512_setr_epi32(0x1,0x5,0x9,0xd,0x11,0x15,0x19,0x1d, - 0x0,0x4,0x8,0xc,0x10,0x14,0x18,0x1c); + 0x0,0x4,0x8,0xc,0x10,0x14,0x18,0x1c); SIMD_int c2 = _mm512_setr_epi32(0x2,0x6,0xa,0xe,0x12,0x16,0x1a,0x1e, - 0x3,0x7,0xb,0xf,0x13,0x17,0x1b,0x1f); + 0x3,0x7,0xb,0xf,0x13,0x17,0x1b,0x1f); SIMD_int c3 = _mm512_setr_epi32(0x3,0x7,0xb,0xf,0x13,0x17,0x1b,0x1f, - 0x2,0x6,0xa,0xe,0x12,0x16,0x1a,0x1e); + 0x2,0x6,0xa,0xe,0x12,0x16,0x1a,0x1e); SIMD_mask k_1 = _mm512_int2mask(65280); SIMD_float sl1 = _mm512_permutex2var_ps(pl3, c0, pl4); SIMD_float sl2 = _mm512_permutex2var_ps(pl1, c1, pl2); SIMD_float sl3 = _mm512_permutex2var_ps(pl3, c2, pl4); SIMD_float sl4 = _mm512_permutex2var_ps(pl1, c3, pl2); - + x = _mm512_shuffle_f32x4(sl2, sl1, 78); z = _mm512_shuffle_f32x4(sl4, sl3, 78); y = _mm512_mask_blend_ps(k_1, sl2, sl1); diff --git a/src/USER-INTEL/math_extra_intel.h b/src/USER-INTEL/math_extra_intel.h index 403b74d8fe58c86e9c3b20c0526dd33d8f39c9c8..547fadb6e9a16feb28a1df269215ec45eefe52f4 100644 --- a/src/USER-INTEL/math_extra_intel.h +++ b/src/USER-INTEL/math_extra_intel.h @@ -18,110 +18,110 @@ #ifndef LMP_MATH_EXTRA_INTEL_H #define LMP_MATH_EXTRA_INTEL_H -#define ME_quat_to_mat_trans(quat, mat) \ -{ \ - flt_t quat_w = quat.w; \ - flt_t quat_i = quat.i; \ - flt_t quat_j = quat.j; \ - flt_t quat_k = quat.k; \ - flt_t w2 = quat_w * quat_w; \ - flt_t i2 = quat_i * quat_i; \ - flt_t j2 = quat_j * quat_j; \ - flt_t k2 = quat_k * quat_k; \ - flt_t twoij = (flt_t)2.0 * quat_i * quat_j; \ - flt_t twoik = (flt_t)2.0 * quat_i * quat_k; \ - flt_t twojk = (flt_t)2.0 * quat_j * quat_k; \ - flt_t twoiw = (flt_t)2.0 * quat_i * quat_w; \ - flt_t twojw = (flt_t)2.0 * quat_j * quat_w; \ - flt_t twokw = (flt_t)2.0 * quat_k * quat_w; \ - \ - mat##_0 = w2 + i2 - j2 - k2; \ - mat##_3 = twoij - twokw; \ - mat##_6 = twojw + twoik; \ - \ - mat##_1 = twoij + twokw; \ - mat##_4 = w2 - i2 + j2 - k2; \ - mat##_7 = twojk - twoiw; \ - \ - mat##_2 = twoik - twojw; \ - mat##_5 = twojk + twoiw; \ - mat##_8 = w2 - i2 - j2 + k2; \ +#define ME_quat_to_mat_trans(quat, mat) \ +{ \ + flt_t quat_w = quat.w; \ + flt_t quat_i = quat.i; \ + flt_t quat_j = quat.j; \ + flt_t quat_k = quat.k; \ + flt_t w2 = quat_w * quat_w; \ + flt_t i2 = quat_i * quat_i; \ + flt_t j2 = quat_j * quat_j; \ + flt_t k2 = quat_k * quat_k; \ + flt_t twoij = (flt_t)2.0 * quat_i * quat_j; \ + flt_t twoik = (flt_t)2.0 * quat_i * quat_k; \ + flt_t twojk = (flt_t)2.0 * quat_j * quat_k; \ + flt_t twoiw = (flt_t)2.0 * quat_i * quat_w; \ + flt_t twojw = (flt_t)2.0 * quat_j * quat_w; \ + flt_t twokw = (flt_t)2.0 * quat_k * quat_w; \ + \ + mat##_0 = w2 + i2 - j2 - k2; \ + mat##_3 = twoij - twokw; \ + mat##_6 = twojw + twoik; \ + \ + mat##_1 = twoij + twokw; \ + mat##_4 = w2 - i2 + j2 - k2; \ + mat##_7 = twojk - twoiw; \ + \ + mat##_2 = twoik - twojw; \ + mat##_5 = twojk + twoiw; \ + mat##_8 = w2 - i2 - j2 + k2; \ } /* ---------------------------------------------------------------------- diagonal matrix times a full matrix ------------------------------------------------------------------------- */ -#define ME_diag_times3(d, m, ans) \ - { \ - ans##_0 = d[0] * m##_0; \ - ans##_1 = d[0] * m##_1; \ - ans##_2 = d[0] * m##_2; \ - ans##_3 = d[1] * m##_3; \ - ans##_4 = d[1] * m##_4; \ - ans##_5 = d[1] * m##_5; \ - ans##_6 = d[2] * m##_6; \ - ans##_7 = d[2] * m##_7; \ - ans##_8 = d[2] * m##_8; \ +#define ME_diag_times3(d, m, ans) \ + { \ + ans##_0 = d[0] * m##_0; \ + ans##_1 = d[0] * m##_1; \ + ans##_2 = d[0] * m##_2; \ + ans##_3 = d[1] * m##_3; \ + ans##_4 = d[1] * m##_4; \ + ans##_5 = d[1] * m##_5; \ + ans##_6 = d[2] * m##_6; \ + ans##_7 = d[2] * m##_7; \ + ans##_8 = d[2] * m##_8; \ } -#define ME_diag_times3a(d, m, ans) \ - { \ - ans##_0 = d##_0 * m##_0; \ - ans##_1 = d##_0 * m##_1; \ - ans##_2 = d##_0 * m##_2; \ - ans##_3 = d##_1 * m##_3; \ - ans##_4 = d##_1 * m##_4; \ - ans##_5 = d##_1 * m##_5; \ - ans##_6 = d##_2 * m##_6; \ - ans##_7 = d##_2 * m##_7; \ - ans##_8 = d##_2 * m##_8; \ +#define ME_diag_times3a(d, m, ans) \ + { \ + ans##_0 = d##_0 * m##_0; \ + ans##_1 = d##_0 * m##_1; \ + ans##_2 = d##_0 * m##_2; \ + ans##_3 = d##_1 * m##_3; \ + ans##_4 = d##_1 * m##_4; \ + ans##_5 = d##_1 * m##_5; \ + ans##_6 = d##_2 * m##_6; \ + ans##_7 = d##_2 * m##_7; \ + ans##_8 = d##_2 * m##_8; \ } /* ---------------------------------------------------------------------- multiply the transpose of mat1 times mat2 ------------------------------------------------------------------------- */ -#define ME_transpose_times3(m1, m2, ans) \ -{ \ - ans##_0 = m1##_0*m2##_0 + m1##_3*m2##_3 + m1##_6*m2##_6; \ - ans##_1 = m1##_0*m2##_1 + m1##_3*m2##_4 + m1##_6*m2##_7; \ - ans##_2 = m1##_0*m2##_2 + m1##_3*m2##_5 + m1##_6*m2##_8; \ - ans##_3 = m1##_1*m2##_0 + m1##_4*m2##_3 + m1##_7*m2##_6; \ - ans##_4 = m1##_1*m2##_1 + m1##_4*m2##_4 + m1##_7*m2##_7; \ - ans##_5 = m1##_1*m2##_2 + m1##_4*m2##_5 + m1##_7*m2##_8; \ - ans##_6 = m1##_2*m2##_0 + m1##_5*m2##_3 + m1##_8*m2##_6; \ - ans##_7 = m1##_2*m2##_1 + m1##_5*m2##_4 + m1##_8*m2##_7; \ - ans##_8 = m1##_2*m2##_2 + m1##_5*m2##_5 + m1##_8*m2##_8; \ +#define ME_transpose_times3(m1, m2, ans) \ +{ \ + ans##_0 = m1##_0*m2##_0 + m1##_3*m2##_3 + m1##_6*m2##_6; \ + ans##_1 = m1##_0*m2##_1 + m1##_3*m2##_4 + m1##_6*m2##_7; \ + ans##_2 = m1##_0*m2##_2 + m1##_3*m2##_5 + m1##_6*m2##_8; \ + ans##_3 = m1##_1*m2##_0 + m1##_4*m2##_3 + m1##_7*m2##_6; \ + ans##_4 = m1##_1*m2##_1 + m1##_4*m2##_4 + m1##_7*m2##_7; \ + ans##_5 = m1##_1*m2##_2 + m1##_4*m2##_5 + m1##_7*m2##_8; \ + ans##_6 = m1##_2*m2##_0 + m1##_5*m2##_3 + m1##_8*m2##_6; \ + ans##_7 = m1##_2*m2##_1 + m1##_5*m2##_4 + m1##_8*m2##_7; \ + ans##_8 = m1##_2*m2##_2 + m1##_5*m2##_5 + m1##_8*m2##_8; \ } /* ---------------------------------------------------------------------- normalize a vector, return in ans ------------------------------------------------------------------------- */ -#define ME_normalize3(v0, v1, v2, ans) \ -{ \ - flt_t scale = (flt_t)1.0 / sqrt(v0*v0+v1*v1+v2*v2); \ - ans##_0 = v0 * scale; \ - ans##_1 = v1 * scale; \ - ans##_2 = v2 * scale; \ +#define ME_normalize3(v0, v1, v2, ans) \ +{ \ + flt_t scale = (flt_t)1.0 / sqrt(v0*v0+v1*v1+v2*v2); \ + ans##_0 = v0 * scale; \ + ans##_1 = v1 * scale; \ + ans##_2 = v2 * scale; \ } /* ---------------------------------------------------------------------- add two matrices ------------------------------------------------------------------------- */ -#define ME_plus3(m1, m2, ans) \ -{ \ - ans##_0 = m1##_0 + m2##_0; \ - ans##_1 = m1##_1 + m2##_1; \ - ans##_2 = m1##_2 + m2##_2; \ - ans##_3 = m1##_3 + m2##_3; \ - ans##_4 = m1##_4 + m2##_4; \ - ans##_5 = m1##_5 + m2##_5; \ - ans##_6 = m1##_6 + m2##_6; \ - ans##_7 = m1##_7 + m2##_7; \ - ans##_8 = m1##_8 + m2##_8; \ +#define ME_plus3(m1, m2, ans) \ +{ \ + ans##_0 = m1##_0 + m2##_0; \ + ans##_1 = m1##_1 + m2##_1; \ + ans##_2 = m1##_2 + m2##_2; \ + ans##_3 = m1##_3 + m2##_3; \ + ans##_4 = m1##_4 + m2##_4; \ + ans##_5 = m1##_5 + m2##_5; \ + ans##_6 = m1##_6 + m2##_6; \ + ans##_7 = m1##_7 + m2##_7; \ + ans##_8 = m1##_8 + m2##_8; \ } /* ---------------------------------------------------------------------- @@ -135,7 +135,7 @@ determinant of a matrix ------------------------------------------------------------------------- */ -#define ME_det3(m) \ +#define ME_det3(m) \ ( m##_0 * m##_4 * m##_8 - m##_0 * m##_5 * m##_7 - \ m##_3 * m##_1 * m##_8 + m##_3 * m##_2 * m##_7 + \ m##_6 * m##_1 * m##_5 - m##_6 * m##_2 * m##_4 ) @@ -144,8 +144,8 @@ row vector times matrix ------------------------------------------------------------------------- */ -#define ME_vecmat(v, m, ans) \ -{ \ +#define ME_vecmat(v, m, ans) \ +{ \ ans##_0 = v##_0 * m##_0 + v##_1 * m##_3 + v##_2 * m##_6; \ ans##_1 = v##_0 * m##_1 + v##_1 * m##_4 + v##_2 * m##_7; \ ans##_2 = v##_0 * m##_2 + v##_1 * m##_5 + v##_2 * m##_8; \ @@ -155,214 +155,214 @@ cross product of 2 vectors ------------------------------------------------------------------------- */ -#define ME_cross3(v1, v2, ans) \ -{ \ - ans##_0 = v1##_1 * v2##_2 - v1##_2 * v2##_1; \ - ans##_1 = v1##_2 * v2##_0 - v1##_0 * v2##_2; \ - ans##_2 = v1##_0 * v2##_1 - v1##_1 * v2##_0; \ +#define ME_cross3(v1, v2, ans) \ +{ \ + ans##_0 = v1##_1 * v2##_2 - v1##_2 * v2##_1; \ + ans##_1 = v1##_2 * v2##_0 - v1##_0 * v2##_2; \ + ans##_2 = v1##_0 * v2##_1 - v1##_1 * v2##_0; \ } /* ---------------------------------------------------------------------- cross product of 2 vectors ------------------------------------------------------------------------- */ -#define ME_mv0_cross3(m1, v2, ans) \ -{ \ - ans##_0 = m1##_1 * v2##_2 - m1##_2 * v2##_1; \ - ans##_1 = m1##_2 * v2##_0 - m1##_0 * v2##_2; \ - ans##_2 = m1##_0 * v2##_1 - m1##_1 * v2##_0; \ +#define ME_mv0_cross3(m1, v2, ans) \ +{ \ + ans##_0 = m1##_1 * v2##_2 - m1##_2 * v2##_1; \ + ans##_1 = m1##_2 * v2##_0 - m1##_0 * v2##_2; \ + ans##_2 = m1##_0 * v2##_1 - m1##_1 * v2##_0; \ } -#define ME_mv1_cross3(m1, v2, ans) \ -{ \ - ans##_0 = m1##_4 * v2##_2 - m1##_5 * v2##_1; \ - ans##_1 = m1##_5 * v2##_0 - m1##_3 * v2##_2; \ - ans##_2 = m1##_3 * v2##_1 - m1##_4 * v2##_0; \ +#define ME_mv1_cross3(m1, v2, ans) \ +{ \ + ans##_0 = m1##_4 * v2##_2 - m1##_5 * v2##_1; \ + ans##_1 = m1##_5 * v2##_0 - m1##_3 * v2##_2; \ + ans##_2 = m1##_3 * v2##_1 - m1##_4 * v2##_0; \ } -#define ME_mv2_cross3(m1, v2, ans) \ -{ \ - ans##_0 = m1##_7 * v2##_2 - m1##_8 * v2##_1; \ - ans##_1 = m1##_8 * v2##_0 - m1##_6 * v2##_2; \ - ans##_2 = m1##_6 * v2##_1 - m1##_7 * v2##_0; \ +#define ME_mv2_cross3(m1, v2, ans) \ +{ \ + ans##_0 = m1##_7 * v2##_2 - m1##_8 * v2##_1; \ + ans##_1 = m1##_8 * v2##_0 - m1##_6 * v2##_2; \ + ans##_2 = m1##_6 * v2##_1 - m1##_7 * v2##_0; \ } #define ME_compute_eta_torque(m1, m2, s1, ans) \ -{ \ - flt_t den = m1##_3*m1##_2*m1##_7-m1##_0*m1##_5*m1##_7- \ - m1##_2*m1##_6*m1##_4+m1##_1*m1##_6*m1##_5- \ - m1##_3*m1##_1*m1##_8+m1##_0*m1##_4*m1##_8; \ - den = (flt_t)1.0 / den; \ - \ +{ \ + flt_t den = m1##_3*m1##_2*m1##_7-m1##_0*m1##_5*m1##_7- \ + m1##_2*m1##_6*m1##_4+m1##_1*m1##_6*m1##_5- \ + m1##_3*m1##_1*m1##_8+m1##_0*m1##_4*m1##_8; \ + den = (flt_t)1.0 / den; \ + \ ans##_0 = s1##_0*(m1##_5*m1##_1*m2##_2+(flt_t)2.0*m1##_4*m1##_8*m2##_0- \ - m1##_4*m2##_2*m1##_2-(flt_t)2.0*m1##_5*m2##_0*m1##_7+ \ - m2##_1*m1##_2*m1##_7-m2##_1*m1##_1*m1##_8- \ - m1##_3*m1##_8*m2##_1+m1##_6*m1##_5*m2##_1+ \ - m1##_3*m2##_2*m1##_7-m2##_2*m1##_6*m1##_4)*den; \ - \ - ans##_1 = s1##_0*(m1##_2*m2##_0*m1##_7-m1##_8*m2##_0*m1##_1+ \ - (flt_t)2.0*m1##_0*m1##_8*m2##_1-m1##_0*m2##_2*m1##_5- \ - (flt_t)2.0*m1##_6*m1##_2*m2##_1+m2##_2*m1##_3*m1##_2- \ - m1##_8*m1##_3*m2##_0+m1##_6*m2##_0*m1##_5+ \ - m1##_6*m2##_2*m1##_1-m2##_2*m1##_0*m1##_7)*den; \ - \ + m1##_4*m2##_2*m1##_2-(flt_t)2.0*m1##_5*m2##_0*m1##_7+ \ + m2##_1*m1##_2*m1##_7-m2##_1*m1##_1*m1##_8- \ + m1##_3*m1##_8*m2##_1+m1##_6*m1##_5*m2##_1+ \ + m1##_3*m2##_2*m1##_7-m2##_2*m1##_6*m1##_4)*den; \ + \ + ans##_1 = s1##_0*(m1##_2*m2##_0*m1##_7-m1##_8*m2##_0*m1##_1+ \ + (flt_t)2.0*m1##_0*m1##_8*m2##_1-m1##_0*m2##_2*m1##_5- \ + (flt_t)2.0*m1##_6*m1##_2*m2##_1+m2##_2*m1##_3*m1##_2- \ + m1##_8*m1##_3*m2##_0+m1##_6*m2##_0*m1##_5+ \ + m1##_6*m2##_2*m1##_1-m2##_2*m1##_0*m1##_7)*den; \ + \ ans##_2 = s1##_0*(m1##_1*m1##_5*m2##_0-m1##_2*m2##_0*m1##_4- \ - m1##_0*m1##_5*m2##_1+m1##_3*m1##_2*m2##_1- \ - m2##_1*m1##_0*m1##_7-m1##_6*m1##_4*m2##_0+ \ - (flt_t)2.0*m1##_4*m1##_0*m2##_2- \ - (flt_t)2.0*m1##_3*m2##_2*m1##_1+ \ - m1##_3*m1##_7*m2##_0+m1##_6*m2##_1*m1##_1)*den; \ - \ + m1##_0*m1##_5*m2##_1+m1##_3*m1##_2*m2##_1- \ + m2##_1*m1##_0*m1##_7-m1##_6*m1##_4*m2##_0+ \ + (flt_t)2.0*m1##_4*m1##_0*m2##_2- \ + (flt_t)2.0*m1##_3*m2##_2*m1##_1+ \ + m1##_3*m1##_7*m2##_0+m1##_6*m2##_1*m1##_1)*den; \ + \ ans##_3 = s1##_1*(-m1##_4*m2##_5*m1##_2+(flt_t)2.0*m1##_4*m1##_8*m2##_3+ \ - m1##_5*m1##_1*m2##_5-(flt_t)2.0*m1##_5*m2##_3*m1##_7+ \ - m2##_4*m1##_2*m1##_7-m2##_4*m1##_1*m1##_8- \ - m1##_3*m1##_8*m2##_4+m1##_6*m1##_5*m2##_4- \ - m2##_5*m1##_6*m1##_4+m1##_3*m2##_5*m1##_7)*den; \ - \ - ans##_4 = s1##_1*(m1##_2*m2##_3*m1##_7-m1##_1*m1##_8*m2##_3+ \ - (flt_t)2.0*m1##_8*m1##_0*m2##_4-m2##_5*m1##_0*m1##_5- \ - (flt_t)2.0*m1##_6*m2##_4*m1##_2-m1##_3*m1##_8*m2##_3+ \ - m1##_6*m1##_5*m2##_3+m1##_3*m2##_5*m1##_2- \ - m1##_0*m2##_5*m1##_7+m2##_5*m1##_1*m1##_6)*den; \ - \ - ans##_5 = s1##_1*(m1##_1*m1##_5*m2##_3-m1##_2*m2##_3*m1##_4- \ - m1##_0*m1##_5*m2##_4+m1##_3*m1##_2*m2##_4+ \ - (flt_t)2.0*m1##_4*m1##_0*m2##_5-m1##_0*m2##_4*m1##_7+ \ - m1##_1*m1##_6*m2##_4-m2##_3*m1##_6*m1##_4- \ - (flt_t)2.0*m1##_3*m1##_1*m2##_5+m1##_3*m2##_3*m1##_7)* \ - den; \ - \ - ans##_6 = s1##_2*(-m1##_4*m1##_2*m2##_8+m1##_1*m1##_5*m2##_8+ \ - (flt_t)2.0*m1##_4*m2##_6*m1##_8-m1##_1*m2##_7*m1##_8+ \ - m1##_2*m1##_7*m2##_7-(flt_t)2.0*m2##_6*m1##_7*m1##_5- \ - m1##_3*m2##_7*m1##_8+m1##_5*m1##_6*m2##_7- \ - m1##_4*m1##_6*m2##_8+m1##_7*m1##_3*m2##_8)*den; \ - \ - ans##_7 = s1##_2*-(m1##_1*m1##_8*m2##_6-m1##_2*m2##_6*m1##_7- \ - (flt_t)2.0*m2##_7*m1##_0*m1##_8+m1##_5*m2##_8*m1##_0+ \ - (flt_t)2.0*m2##_7*m1##_2*m1##_6+m1##_3*m2##_6*m1##_8- \ - m1##_3*m1##_2*m2##_8-m1##_5*m1##_6*m2##_6+ \ - m1##_0*m2##_8*m1##_7-m2##_8*m1##_1*m1##_6)*den; \ - \ - ans##_8 = s1##_2*(m1##_1*m1##_5*m2##_6-m1##_2*m2##_6*m1##_4- \ - m1##_0*m1##_5*m2##_7+m1##_3*m1##_2*m2##_7- \ - m1##_4*m1##_6*m2##_6-m1##_7*m2##_7*m1##_0+ \ - (flt_t)2.0*m1##_4*m2##_8*m1##_0+m1##_7*m1##_3*m2##_6+ \ + m1##_5*m1##_1*m2##_5-(flt_t)2.0*m1##_5*m2##_3*m1##_7+ \ + m2##_4*m1##_2*m1##_7-m2##_4*m1##_1*m1##_8- \ + m1##_3*m1##_8*m2##_4+m1##_6*m1##_5*m2##_4- \ + m2##_5*m1##_6*m1##_4+m1##_3*m2##_5*m1##_7)*den; \ + \ + ans##_4 = s1##_1*(m1##_2*m2##_3*m1##_7-m1##_1*m1##_8*m2##_3+ \ + (flt_t)2.0*m1##_8*m1##_0*m2##_4-m2##_5*m1##_0*m1##_5- \ + (flt_t)2.0*m1##_6*m2##_4*m1##_2-m1##_3*m1##_8*m2##_3+ \ + m1##_6*m1##_5*m2##_3+m1##_3*m2##_5*m1##_2- \ + m1##_0*m2##_5*m1##_7+m2##_5*m1##_1*m1##_6)*den; \ + \ + ans##_5 = s1##_1*(m1##_1*m1##_5*m2##_3-m1##_2*m2##_3*m1##_4- \ + m1##_0*m1##_5*m2##_4+m1##_3*m1##_2*m2##_4+ \ + (flt_t)2.0*m1##_4*m1##_0*m2##_5-m1##_0*m2##_4*m1##_7+ \ + m1##_1*m1##_6*m2##_4-m2##_3*m1##_6*m1##_4- \ + (flt_t)2.0*m1##_3*m1##_1*m2##_5+m1##_3*m2##_3*m1##_7)* \ + den; \ + \ + ans##_6 = s1##_2*(-m1##_4*m1##_2*m2##_8+m1##_1*m1##_5*m2##_8+ \ + (flt_t)2.0*m1##_4*m2##_6*m1##_8-m1##_1*m2##_7*m1##_8+ \ + m1##_2*m1##_7*m2##_7-(flt_t)2.0*m2##_6*m1##_7*m1##_5- \ + m1##_3*m2##_7*m1##_8+m1##_5*m1##_6*m2##_7- \ + m1##_4*m1##_6*m2##_8+m1##_7*m1##_3*m2##_8)*den; \ + \ + ans##_7 = s1##_2*-(m1##_1*m1##_8*m2##_6-m1##_2*m2##_6*m1##_7- \ + (flt_t)2.0*m2##_7*m1##_0*m1##_8+m1##_5*m2##_8*m1##_0+ \ + (flt_t)2.0*m2##_7*m1##_2*m1##_6+m1##_3*m2##_6*m1##_8- \ + m1##_3*m1##_2*m2##_8-m1##_5*m1##_6*m2##_6+ \ + m1##_0*m2##_8*m1##_7-m2##_8*m1##_1*m1##_6)*den; \ + \ + ans##_8 = s1##_2*(m1##_1*m1##_5*m2##_6-m1##_2*m2##_6*m1##_4- \ + m1##_0*m1##_5*m2##_7+m1##_3*m1##_2*m2##_7- \ + m1##_4*m1##_6*m2##_6-m1##_7*m2##_7*m1##_0+ \ + (flt_t)2.0*m1##_4*m2##_8*m1##_0+m1##_7*m1##_3*m2##_6+ \ m1##_6*m1##_1*m2##_7-(flt_t)2.0*m2##_8*m1##_3*m1##_1)* \ - den; \ + den; \ } -#define ME_vcopy4(dst,src) \ - dst##_0 = src##_0; \ - dst##_1 = src##_1; \ - dst##_2 = src##_2; \ +#define ME_vcopy4(dst,src) \ + dst##_0 = src##_0; \ + dst##_1 = src##_1; \ + dst##_2 = src##_2; \ dst##_3 = src##_3; -#define ME_mldivide3(m1, v_0, v_1, v_2, ans, error) \ -{ \ - flt_t aug_0, aug_1, aug_2, aug_3, aug_4, aug_5; \ - flt_t aug_6, aug_7, aug_8, aug_9, aug_10, aug_11, t; \ - \ - aug_3 = v_0; \ - aug_0 = m1##_0; \ - aug_1 = m1##_1; \ - aug_2 = m1##_2; \ - aug_7 = v_1; \ - aug_4 = m1##_3; \ - aug_5 = m1##_4; \ - aug_6 = m1##_5; \ - aug_11 = v_2; \ - aug_8 = m1##_6; \ - aug_9 = m1##_7; \ - aug_10 = m1##_8; \ - \ - if (fabs(aug_4) > fabs(aug_0)) { \ - flt_t swapt; \ - swapt = aug_0; aug_0 = aug_4; aug_4 = swapt; \ - swapt = aug_1; aug_1 = aug_5; aug_5 = swapt; \ - swapt = aug_2; aug_2 = aug_6; aug_6 = swapt; \ - swapt = aug_3; aug_3 = aug_7; aug_7 = swapt; \ - } \ - if (fabs(aug_8) > fabs(aug_0)) { \ - flt_t swapt; \ - swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \ +#define ME_mldivide3(m1, v_0, v_1, v_2, ans, error) \ +{ \ + flt_t aug_0, aug_1, aug_2, aug_3, aug_4, aug_5; \ + flt_t aug_6, aug_7, aug_8, aug_9, aug_10, aug_11, t; \ + \ + aug_3 = v_0; \ + aug_0 = m1##_0; \ + aug_1 = m1##_1; \ + aug_2 = m1##_2; \ + aug_7 = v_1; \ + aug_4 = m1##_3; \ + aug_5 = m1##_4; \ + aug_6 = m1##_5; \ + aug_11 = v_2; \ + aug_8 = m1##_6; \ + aug_9 = m1##_7; \ + aug_10 = m1##_8; \ + \ + if (fabs(aug_4) > fabs(aug_0)) { \ + flt_t swapt; \ + swapt = aug_0; aug_0 = aug_4; aug_4 = swapt; \ + swapt = aug_1; aug_1 = aug_5; aug_5 = swapt; \ + swapt = aug_2; aug_2 = aug_6; aug_6 = swapt; \ + swapt = aug_3; aug_3 = aug_7; aug_7 = swapt; \ + } \ + if (fabs(aug_8) > fabs(aug_0)) { \ + flt_t swapt; \ + swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \ swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \ swapt = aug_2; aug_2 = aug_10; aug_10 = swapt; \ swapt = aug_3; aug_3 = aug_11; aug_11 = swapt; \ - } \ - \ - if (aug_0 != (flt_t)0.0) { \ - } else if (aug_4 != (flt_t)0.0) { \ - flt_t swapt; \ - swapt = aug_0; aug_0 = aug_4; aug_4 = swapt; \ - swapt = aug_1; aug_1 = aug_5; aug_5 = swapt; \ - swapt = aug_2; aug_2 = aug_6; aug_6 = swapt; \ - swapt = aug_3; aug_3 = aug_7; aug_7 = swapt; \ - } else if (aug_8 != (flt_t)0.0) { \ - flt_t swapt; \ - swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \ - swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \ - swapt = aug_2; aug_2 = aug_10; aug_10 = swapt; \ - swapt = aug_3; aug_3 = aug_11; aug_11 = swapt; \ - } else \ - error = 1; \ - \ - t = aug_4 / aug_0; \ - aug_5 -= t * aug_1; \ - aug_6 -= t * aug_2; \ - aug_7 -= t * aug_3; \ - t = aug_8 / aug_0; \ - aug_9 -= t * aug_1; \ - aug_10 -= t * aug_2; \ - aug_11 -= t * aug_3; \ - \ - if (fabs(aug_9) > fabs(aug_5)) { \ - flt_t swapt; \ - swapt = aug_4; aug_4 = aug_8; aug_8 = swapt; \ - swapt = aug_5; aug_5 = aug_9; aug_9 = swapt; \ - swapt = aug_6; aug_6 = aug_10; aug_10 = swapt; \ - swapt = aug_7; aug_7 = aug_11; aug_11 = swapt; \ - } \ - \ - if (aug_5 != (flt_t)0.0) { \ - } else if (aug_9 != (flt_t)0.0) { \ - flt_t swapt; \ + } \ + \ + if (aug_0 != (flt_t)0.0) { \ + } else if (aug_4 != (flt_t)0.0) { \ + flt_t swapt; \ + swapt = aug_0; aug_0 = aug_4; aug_4 = swapt; \ + swapt = aug_1; aug_1 = aug_5; aug_5 = swapt; \ + swapt = aug_2; aug_2 = aug_6; aug_6 = swapt; \ + swapt = aug_3; aug_3 = aug_7; aug_7 = swapt; \ + } else if (aug_8 != (flt_t)0.0) { \ + flt_t swapt; \ + swapt = aug_0; aug_0 = aug_8; aug_8 = swapt; \ + swapt = aug_1; aug_1 = aug_9; aug_9 = swapt; \ + swapt = aug_2; aug_2 = aug_10; aug_10 = swapt; \ + swapt = aug_3; aug_3 = aug_11; aug_11 = swapt; \ + } else \ + error = 1; \ + \ + t = aug_4 / aug_0; \ + aug_5 -= t * aug_1; \ + aug_6 -= t * aug_2; \ + aug_7 -= t * aug_3; \ + t = aug_8 / aug_0; \ + aug_9 -= t * aug_1; \ + aug_10 -= t * aug_2; \ + aug_11 -= t * aug_3; \ + \ + if (fabs(aug_9) > fabs(aug_5)) { \ + flt_t swapt; \ + swapt = aug_4; aug_4 = aug_8; aug_8 = swapt; \ + swapt = aug_5; aug_5 = aug_9; aug_9 = swapt; \ + swapt = aug_6; aug_6 = aug_10; aug_10 = swapt; \ + swapt = aug_7; aug_7 = aug_11; aug_11 = swapt; \ + } \ + \ + if (aug_5 != (flt_t)0.0) { \ + } else if (aug_9 != (flt_t)0.0) { \ + flt_t swapt; \ swapt = aug_4; aug_4 = aug_8; aug_8 = swapt; \ - swapt = aug_5; aug_5 = aug_9; aug_9 = swapt; \ - swapt = aug_6; aug_6 = aug_10; aug_10 = swapt; \ - swapt = aug_7; aug_7 = aug_11; aug_11 = swapt; \ - } \ - \ - t = aug_9 / aug_5; \ - aug_10 -= t * aug_6; \ - aug_11 -= t * aug_7; \ - \ - if (aug_10 == (flt_t)0.0) \ - error = 1; \ - \ - ans##_2 = aug_11/aug_10; \ - t = (flt_t)0.0; \ - t += aug_6 * ans##_2; \ - ans##_1 = (aug_7-t) / aug_5; \ - t = (flt_t)0.0; \ - t += aug_1 * ans##_1; \ - t += aug_2 * ans##_2; \ - ans##_0 = (aug_3 - t) / aug_0; \ + swapt = aug_5; aug_5 = aug_9; aug_9 = swapt; \ + swapt = aug_6; aug_6 = aug_10; aug_10 = swapt; \ + swapt = aug_7; aug_7 = aug_11; aug_11 = swapt; \ + } \ + \ + t = aug_9 / aug_5; \ + aug_10 -= t * aug_6; \ + aug_11 -= t * aug_7; \ + \ + if (aug_10 == (flt_t)0.0) \ + error = 1; \ + \ + ans##_2 = aug_11/aug_10; \ + t = (flt_t)0.0; \ + t += aug_6 * ans##_2; \ + ans##_1 = (aug_7-t) / aug_5; \ + t = (flt_t)0.0; \ + t += aug_1 * ans##_1; \ + t += aug_2 * ans##_2; \ + ans##_0 = (aug_3 - t) / aug_0; \ } /* ---------------------------------------------------------------------- normalize a quaternion ------------------------------------------------------------------------- */ -#define ME_qnormalize(q) \ -{ \ - double norm = 1.0 / \ - sqrt(q##_w*q##_w + q##_i*q##_i + q##_j*q##_j + q##_k*q##_k); \ - q##_w *= norm; \ - q##_i *= norm; \ - q##_j *= norm; \ - q##_k *= norm; \ +#define ME_qnormalize(q) \ +{ \ + double norm = 1.0 / \ + sqrt(q##_w*q##_w + q##_i*q##_i + q##_j*q##_j + q##_k*q##_k); \ + q##_w *= norm; \ + q##_i *= norm; \ + q##_j *= norm; \ + q##_k *= norm; \ } /* ---------------------------------------------------------------------- @@ -373,106 +373,106 @@ and divide by principal moments ------------------------------------------------------------------------- */ -#define ME_mq_to_omega(m, quat, moments_0, moments_1, moments_2, w) \ -{ \ - double wbody_0, wbody_1, wbody_2; \ - double rot_0, rot_1, rot_2, rot_3, rot_4, rot_5, rot_6, rot_7, rot_8; \ - \ - double w2 = quat##_w * quat##_w; \ - double i2 = quat##_i * quat##_i; \ - double j2 = quat##_j * quat##_j; \ - double k2 = quat##_k * quat##_k; \ - double twoij = 2.0 * quat##_i * quat##_j; \ - double twoik = 2.0 * quat##_i * quat##_k; \ - double twojk = 2.0 * quat##_j * quat##_k; \ - double twoiw = 2.0 * quat##_i * quat##_w; \ - double twojw = 2.0 * quat##_j * quat##_w; \ - double twokw = 2.0 * quat##_k * quat##_w; \ - \ - rot##_0 = w2 + i2 - j2 - k2; \ - rot##_1 = twoij - twokw; \ - rot##_2 = twojw + twoik; \ - \ - rot##_3 = twoij + twokw; \ - rot##_4 = w2 - i2 + j2 - k2; \ - rot##_5 = twojk - twoiw; \ - \ - rot##_6 = twoik - twojw; \ - rot##_7 = twojk + twoiw; \ - rot##_8 = w2 - i2 - j2 + k2; \ - \ +#define ME_mq_to_omega(m, quat, moments_0, moments_1, moments_2, w) \ +{ \ + double wbody_0, wbody_1, wbody_2; \ + double rot_0, rot_1, rot_2, rot_3, rot_4, rot_5, rot_6, rot_7, rot_8; \ + \ + double w2 = quat##_w * quat##_w; \ + double i2 = quat##_i * quat##_i; \ + double j2 = quat##_j * quat##_j; \ + double k2 = quat##_k * quat##_k; \ + double twoij = 2.0 * quat##_i * quat##_j; \ + double twoik = 2.0 * quat##_i * quat##_k; \ + double twojk = 2.0 * quat##_j * quat##_k; \ + double twoiw = 2.0 * quat##_i * quat##_w; \ + double twojw = 2.0 * quat##_j * quat##_w; \ + double twokw = 2.0 * quat##_k * quat##_w; \ + \ + rot##_0 = w2 + i2 - j2 - k2; \ + rot##_1 = twoij - twokw; \ + rot##_2 = twojw + twoik; \ + \ + rot##_3 = twoij + twokw; \ + rot##_4 = w2 - i2 + j2 - k2; \ + rot##_5 = twojk - twoiw; \ + \ + rot##_6 = twoik - twojw; \ + rot##_7 = twojk + twoiw; \ + rot##_8 = w2 - i2 - j2 + k2; \ + \ wbody_0 = rot##_0*m##_0 + rot##_3*m##_1 + rot##_6*m##_2; \ wbody_1 = rot##_1*m##_0 + rot##_4*m##_1 + rot##_7*m##_2; \ wbody_2 = rot##_2*m##_0 + rot##_5*m##_1 + rot##_8*m##_2; \ - \ - wbody_0 *= moments_0; \ - wbody_1 *= moments_1; \ - wbody_2 *= moments_2; \ - \ + \ + wbody_0 *= moments_0; \ + wbody_1 *= moments_1; \ + wbody_2 *= moments_2; \ + \ w##_0 = rot##_0*wbody_0 + rot##_1*wbody_1 + rot##_2*wbody_2; \ w##_1 = rot##_3*wbody_0 + rot##_4*wbody_1 + rot##_5*wbody_2; \ w##_2 = rot##_6*wbody_0 + rot##_7*wbody_1 + rot##_8*wbody_2; \ } -#define ME_omega_richardson(dtf,dtq,angmomin,quatin,torque,i0,i1,i2) \ -{ \ - angmomin[0] += dtf * torque[0]; \ - double angmom_0 = angmomin[0]; \ - angmomin[1] += dtf * torque[1]; \ - double angmom_1 = angmomin[1]; \ - angmomin[2] += dtf * torque[2]; \ - double angmom_2 = angmomin[2]; \ - \ - double quat_w = quatin[0]; \ - double quat_i = quatin[1]; \ - double quat_j = quatin[2]; \ - double quat_k = quatin[3]; \ - \ - double omega_0, omega_1, omega_2; \ - ME_mq_to_omega(angmom,quat,i0,i1,i2,omega); \ - \ - double wq_0, wq_1, wq_2, wq_3; \ - wq_0 = -omega_0*quat_i - omega_1*quat_j - omega_2*quat_k; \ - wq_1 = quat_w*omega_0 + omega_1*quat_k - omega_2*quat_j; \ - wq_2 = quat_w*omega_1 + omega_2*quat_i - omega_0*quat_k; \ - wq_3 = quat_w*omega_2 + omega_0*quat_j - omega_1*quat_i; \ - \ - double qfull_w, qfull_i, qfull_j, qfull_k; \ - qfull_w = quat_w + dtq * wq_0; \ - qfull_i = quat_i + dtq * wq_1; \ - qfull_j = quat_j + dtq * wq_2; \ - qfull_k = quat_k + dtq * wq_3; \ - ME_qnormalize(qfull); \ - \ - double qhalf_w, qhalf_i, qhalf_j, qhalf_k; \ - qhalf_w = quat_w + 0.5*dtq * wq_0; \ - qhalf_i = quat_i + 0.5*dtq * wq_1; \ - qhalf_j = quat_j + 0.5*dtq * wq_2; \ - qhalf_k = quat_k + 0.5*dtq * wq_3; \ - ME_qnormalize(qhalf); \ - \ - ME_mq_to_omega(angmom,qhalf,i0,i1,i2,omega); \ - wq_0 = -omega_0*qhalf_i - omega_1*qhalf_j - omega_2*qhalf_k; \ - wq_1 = qhalf_w*omega_0 + omega_1*qhalf_k - omega_2*qhalf_j; \ - wq_2 = qhalf_w*omega_1 + omega_2*qhalf_i - omega_0*qhalf_k; \ - wq_3 = qhalf_w*omega_2 + omega_0*qhalf_j - omega_1*qhalf_i; \ - \ - qhalf_w += 0.5*dtq * wq_0; \ - qhalf_i += 0.5*dtq * wq_1; \ - qhalf_j += 0.5*dtq * wq_2; \ - qhalf_k += 0.5*dtq * wq_3; \ - ME_qnormalize(qhalf); \ - \ - quat_w = 2.0*qhalf_w - qfull_w; \ - quat_i = 2.0*qhalf_i - qfull_i; \ - quat_j = 2.0*qhalf_j - qfull_j; \ - quat_k = 2.0*qhalf_k - qfull_k; \ - ME_qnormalize(quat); \ - \ - quatin[0] = quat_w; \ - quatin[1] = quat_i; \ - quatin[2] = quat_j; \ - quatin[3] = quat_k; \ +#define ME_omega_richardson(dtf,dtq,angmomin,quatin,torque,i0,i1,i2) \ +{ \ + angmomin[0] += dtf * torque[0]; \ + double angmom_0 = angmomin[0]; \ + angmomin[1] += dtf * torque[1]; \ + double angmom_1 = angmomin[1]; \ + angmomin[2] += dtf * torque[2]; \ + double angmom_2 = angmomin[2]; \ + \ + double quat_w = quatin[0]; \ + double quat_i = quatin[1]; \ + double quat_j = quatin[2]; \ + double quat_k = quatin[3]; \ + \ + double omega_0, omega_1, omega_2; \ + ME_mq_to_omega(angmom,quat,i0,i1,i2,omega); \ + \ + double wq_0, wq_1, wq_2, wq_3; \ + wq_0 = -omega_0*quat_i - omega_1*quat_j - omega_2*quat_k; \ + wq_1 = quat_w*omega_0 + omega_1*quat_k - omega_2*quat_j; \ + wq_2 = quat_w*omega_1 + omega_2*quat_i - omega_0*quat_k; \ + wq_3 = quat_w*omega_2 + omega_0*quat_j - omega_1*quat_i; \ + \ + double qfull_w, qfull_i, qfull_j, qfull_k; \ + qfull_w = quat_w + dtq * wq_0; \ + qfull_i = quat_i + dtq * wq_1; \ + qfull_j = quat_j + dtq * wq_2; \ + qfull_k = quat_k + dtq * wq_3; \ + ME_qnormalize(qfull); \ + \ + double qhalf_w, qhalf_i, qhalf_j, qhalf_k; \ + qhalf_w = quat_w + 0.5*dtq * wq_0; \ + qhalf_i = quat_i + 0.5*dtq * wq_1; \ + qhalf_j = quat_j + 0.5*dtq * wq_2; \ + qhalf_k = quat_k + 0.5*dtq * wq_3; \ + ME_qnormalize(qhalf); \ + \ + ME_mq_to_omega(angmom,qhalf,i0,i1,i2,omega); \ + wq_0 = -omega_0*qhalf_i - omega_1*qhalf_j - omega_2*qhalf_k; \ + wq_1 = qhalf_w*omega_0 + omega_1*qhalf_k - omega_2*qhalf_j; \ + wq_2 = qhalf_w*omega_1 + omega_2*qhalf_i - omega_0*qhalf_k; \ + wq_3 = qhalf_w*omega_2 + omega_0*qhalf_j - omega_1*qhalf_i; \ + \ + qhalf_w += 0.5*dtq * wq_0; \ + qhalf_i += 0.5*dtq * wq_1; \ + qhalf_j += 0.5*dtq * wq_2; \ + qhalf_k += 0.5*dtq * wq_3; \ + ME_qnormalize(qhalf); \ + \ + quat_w = 2.0*qhalf_w - qfull_w; \ + quat_i = 2.0*qhalf_i - qfull_i; \ + quat_j = 2.0*qhalf_j - qfull_j; \ + quat_k = 2.0*qhalf_k - qfull_k; \ + ME_qnormalize(quat); \ + \ + quatin[0] = quat_w; \ + quatin[1] = quat_i; \ + quatin[2] = quat_j; \ + quatin[3] = quat_k; \ } #endif diff --git a/src/USER-INTEL/nbin_intel.cpp b/src/USER-INTEL/nbin_intel.cpp index 194b9a5f97d2dc515e7198f89bd1a58fd087e988..c5574a78c7a59703fee558dd2e8a910f7e02048f 100644 --- a/src/USER-INTEL/nbin_intel.cpp +++ b/src/USER-INTEL/nbin_intel.cpp @@ -51,11 +51,11 @@ NBinIntel::~NBinIntel() { const int * bins = this->bins; const int * _atombin = this->_atombin; const int * _binpacked = this->_binpacked; - #pragma offload_transfer target(mic:_cop) \ + #pragma offload_transfer target(mic:_cop) \ nocopy(binhead,bins,_atombin,_binpacked:alloc_if(0) free_if(1)) } #endif -} +} /* ---------------------------------------------------------------------- setup for bin_atoms() @@ -70,8 +70,8 @@ void NBinIntel::bin_atoms_setup(int nall) #ifdef _LMP_INTEL_OFFLOAD if (_offload_alloc) { const int * binhead = this->binhead; - #pragma offload_transfer target(mic:_cop) \ - nocopy(binhead:alloc_if(0) free_if(1)) + #pragma offload_transfer target(mic:_cop) \ + nocopy(binhead:alloc_if(0) free_if(1)) } #endif @@ -98,8 +98,8 @@ void NBinIntel::bin_atoms_setup(int nall) const int * bins = this->bins; const int * _atombin = this->_atombin; const int * _binpacked = this->_binpacked; - #pragma offload_transfer target(mic:_cop) \ - nocopy(bins,_atombin,_binpacked:alloc_if(0) free_if(1)) + #pragma offload_transfer target(mic:_cop) \ + nocopy(bins,_atombin,_binpacked:alloc_if(0) free_if(1)) } #endif memory->destroy(bins); @@ -157,10 +157,10 @@ void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) { const flt_t dx = (INTEL_BIGP - bboxhi[0]); const flt_t dy = (INTEL_BIGP - bboxhi[1]); const flt_t dz = (INTEL_BIGP - bboxhi[2]); - if (dx * dx + dy * dy + dz * dz < - static_cast<flt_t>(neighbor->cutneighmaxsq)) + if (dx * dx + dy * dy + dz * dz < + static_cast<flt_t>(neighbor->cutneighmaxsq)) error->one(FLERR, - "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}."); + "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}."); } // ---------- Grow and cast/pack buffers ------------- @@ -174,14 +174,16 @@ void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) { biga.w = 1; buffers->get_x()[nall] = biga; - const int nthreads = comm->nthreads; + int nthreads; + if (comm->nthreads > INTEL_HTHREADS) nthreads = comm->nthreads; + else nthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(buffers) + #pragma omp parallel if(nthreads > INTEL_HTHREADS) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, - sizeof(ATOM_T)); + sizeof(ATOM_T)); buffers->thr_pack(ifrom, ito, 0); } _fix->stop_watch(TIME_PACK); diff --git a/src/USER-INTEL/npair_full_bin_intel.cpp b/src/USER-INTEL/npair_full_bin_intel.cpp index 1ec93bf11385d55c46e3f422dee569c2f82cb1a4..06c10c080fd9e506760c0d1e66c93ff76233ee7c 100644 --- a/src/USER-INTEL/npair_full_bin_intel.cpp +++ b/src/USER-INTEL/npair_full_bin_intel.cpp @@ -70,483 +70,62 @@ fbi(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) { #endif buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end, - _fix->nbor_pack_width()); + _fix->nbor_pack_width()); int need_ic = 0; if (atom->molecular) dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax, - neighbor->cutneighmax); + neighbor->cutneighmax); #ifdef _LMP_INTEL_OFFLOAD - if (need_ic) { - if (offload_noghost) { - fbi<flt_t,acc_t,1,1>(1, list, buffers, 0, off_end); - fbi<flt_t,acc_t,1,1>(0, list, buffers, host_start, nlocal, off_end); + if (_fix->three_body_neighbor()) { + if (need_ic) { + if (offload_noghost) { + bin_newton<flt_t,acc_t,1,1,1,0,1>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,1,1,1,0,1>(0, list, buffers, host_start, nlocal, off_end); + } else { + bin_newton<flt_t,acc_t,0,1,1,0,1>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal); + } } else { - fbi<flt_t,acc_t,0,1>(1, list, buffers, 0, off_end); - fbi<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal); + if (offload_noghost) { + bin_newton<flt_t,acc_t,1,0,1,0,1>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,1,0,1,0,1>(0, list, buffers, host_start, nlocal, off_end); + } else { + bin_newton<flt_t,acc_t,0,0,1,0,1>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal); + } } } else { - if (offload_noghost) { - fbi<flt_t,acc_t,1,0>(1, list, buffers, 0, off_end); - fbi<flt_t,acc_t,1,0>(0, list, buffers, host_start, nlocal, off_end); + if (need_ic) { + if (offload_noghost) { + bin_newton<flt_t,acc_t,1,1,1,0,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,1,1,1,0,0>(0, list, buffers, host_start, nlocal, off_end); + } else { + bin_newton<flt_t,acc_t,0,1,1,0,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal); + } } else { - fbi<flt_t,acc_t,0,0>(1, list, buffers, 0, off_end); - fbi<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal); + if (offload_noghost) { + bin_newton<flt_t,acc_t,1,0,1,0,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,1,0,1,0,0>(0, list, buffers, host_start, nlocal, off_end); + } else { + bin_newton<flt_t,acc_t,0,0,1,0,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal); + } } } #else - if (need_ic) - fbi<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal); - else - fbi<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal); - #endif -} - -template <class flt_t, class acc_t, int offload_noghost, int need_ic> -void NPairFullBinIntel:: -fbi(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers, - const int astart, const int aend, const int offload_end) { - - if (aend-astart == 0) return; - - const int nall = atom->nlocal + atom->nghost; - int pad = 1; - int nall_t = nall; - #ifdef _LMP_INTEL_OFFLOAD - if (offload_noghost && offload) nall_t = atom->nlocal; - #endif - - const int pack_width = _fix->nbor_pack_width(); - const int pad_width = pad; - - const ATOM_T * _noalias const x = buffers->get_x(); - int * _noalias const firstneigh = buffers->firstneigh(list); - const int e_nall = nall_t; - - const int molecular = atom->molecular; - int *ns = NULL; - tagint *s = NULL; - int tag_size = 0, special_size; - if (buffers->need_tag()) tag_size = e_nall; - if (molecular) { - s = atom->special[0]; - ns = atom->nspecial[0]; - special_size = aend; + if (_fix->three_body_neighbor()) { + if (need_ic) + bin_newton<flt_t,acc_t,0,1,1,0,1>(0, list, buffers, host_start, nlocal); + else + bin_newton<flt_t,acc_t,0,0,1,0,1>(0, list, buffers, host_start, nlocal); } else { - s = &buffers->_special_holder; - ns = &buffers->_nspecial_holder; - special_size = 0; + if (need_ic) + bin_newton<flt_t,acc_t,0,1,1,0,0>(0, list, buffers, host_start, nlocal); + else + bin_newton<flt_t,acc_t,0,0,1,0,0>(0, list, buffers, host_start, nlocal); } - const tagint * _noalias const special = s; - const int * _noalias const nspecial = ns; - const int maxspecial = atom->maxspecial; - const tagint * _noalias const tag = atom->tag; - - int * _noalias const ilist = list->ilist; - int * _noalias numneigh = list->numneigh; - int * _noalias const cnumneigh = buffers->cnumneigh(list); - const int nstencil = this->nstencil; - const int * _noalias const stencil = this->stencil; - const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0]; - const int ntypes = atom->ntypes + 1; - const int nlocal = atom->nlocal; - - #ifndef _LMP_INTEL_OFFLOAD - int * const mask = atom->mask; - tagint * const molecule = atom->molecule; - #endif - - int tnum; - int *overflow; - double *timer_compute; - #ifdef _LMP_INTEL_OFFLOAD - if (offload) { - timer_compute = _fix->off_watch_neighbor(); - tnum = buffers->get_off_threads(); - overflow = _fix->get_off_overflow_flag(); - _fix->stop_watch(TIME_HOST_NEIGHBOR); - _fix->start_watch(TIME_OFFLOAD_LATENCY); - } else - #endif - { - tnum = comm->nthreads; - overflow = _fix->get_overflow_flag(); - } - const int nthreads = tnum; - const int maxnbors = buffers->get_max_nbors(); - int * _noalias const atombin = buffers->get_atombin(); - const int * _noalias const binpacked = buffers->get_binpacked(); - - const int xperiodic = domain->xperiodic; - const int yperiodic = domain->yperiodic; - const int zperiodic = domain->zperiodic; - const flt_t xprd_half = domain->xprd_half; - const flt_t yprd_half = domain->yprd_half; - const flt_t zprd_half = domain->zprd_half; - - #ifdef _LMP_INTEL_OFFLOAD - const int * _noalias const binhead = this->binhead; - const int * _noalias const bins = this->bins; - const int cop = _fix->coprocessor_number(); - const int separate_buffers = _fix->separate_buffers(); - #pragma offload target(mic:cop) if(offload) \ - in(x:length(e_nall+1) alloc_if(0) free_if(0)) \ - in(tag:length(tag_size) alloc_if(0) free_if(0)) \ - in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ - in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ - in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \ - in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \ - in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ - in(firstneigh:length(0) alloc_if(0) free_if(0)) \ - in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ - out(numneigh:length(0) alloc_if(0) free_if(0)) \ - in(ilist:length(0) alloc_if(0) free_if(0)) \ - in(atombin:length(aend) alloc_if(0) free_if(0)) \ - in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ - in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload,pack_width) \ - in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \ - in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ - out(overflow:length(5) alloc_if(0) free_if(0)) \ - out(timer_compute:length(1) alloc_if(0) free_if(0)) \ - signal(tag) - #endif - { - #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) - *timer_compute = MIC_Wtime(); - #endif - - #ifdef _LMP_INTEL_OFFLOAD - overflow[LMP_LOCAL_MIN] = astart; - overflow[LMP_LOCAL_MAX] = aend - 1; - overflow[LMP_GHOST_MIN] = e_nall; - overflow[LMP_GHOST_MAX] = -1; - #endif - - int nstencilp = 0; - int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL]; - for (int k = 0; k < nstencil; k++) { - binstart[nstencilp] = stencil[k]; - int end = stencil[k] + 1; - for (int kk = k + 1; kk < nstencil; kk++) { - if (stencil[kk-1]+1 == stencil[kk]) { - end++; - k++; - } else break; - } - binend[nstencilp] = end; - nstencilp++; - } - - #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(numneigh, overflow, nstencilp, binstart, binend) - #endif - { - #ifdef _LMP_INTEL_OFFLOAD - int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1; - #endif - - const int num = aend - astart; - int tid, ifrom, ito; - - IP_PRE_omp_range_id_vec(ifrom, ito, tid, num, nthreads, pack_width); - ifrom += astart; - ito += astart; - int e_ito = ito; - if (ito == num) { - int imod = ito % pack_width; - if (imod) e_ito += pack_width - imod; - } - const int list_size = (e_ito + tid * 2 + 2) * maxnbors; - int which; - int pack_offset = maxnbors * pack_width; - int ct = (ifrom + tid * 2) * maxnbors; - int *neighptr = firstneigh + ct; - const int obound = pack_offset + maxnbors * 2; - - int max_chunk = 0; - int lane = 0; - for (int i = ifrom; i < ito; i++) { - const flt_t xtmp = x[i].x; - const flt_t ytmp = x[i].y; - const flt_t ztmp = x[i].z; - const int itype = x[i].w; - const tagint itag = tag[i]; - const int ioffset = ntypes * itype; - - const int ibin = atombin[i]; - int raw_count = pack_offset; - - // loop over all atoms in surrounding bins in stencil including self - // skip i = j - if (exclude) { - for (int k = 0; k < nstencilp; k++) { - const int bstart = binhead[ibin + binstart[k]]; - const int bend = binhead[ibin + binend[k]]; - #ifndef _LMP_INTEL_OFFLOAD - #ifdef INTEL_VMASK - #pragma simd - #endif - #endif - for (int jj = bstart; jj < bend; jj++) { - int j = binpacked[jj]; - - if (i == j) j=e_nall; - - #ifdef _LMP_INTEL_OFFLOAD - if (offload_noghost) { - if (j < nlocal) { - if (i < offload_end) continue; - } else if (offload) continue; - } - #endif - - #ifndef _LMP_INTEL_OFFLOAD - const int jtype = x[j].w; - if (exclusion(i,j,itype,jtype,mask,molecule)) continue; - #endif - - neighptr[raw_count++] = j; - } - } - } else { - for (int k = 0; k < nstencilp; k++) { - const int bstart = binhead[ibin + binstart[k]]; - const int bend = binhead[ibin + binend[k]]; - #ifndef _LMP_INTEL_OFFLOAD - #ifdef INTEL_VMASK - #pragma simd - #endif - #endif - for (int jj = bstart; jj < bend; jj++) { - int j = binpacked[jj]; - - if (i == j) j=e_nall; - - #ifdef _LMP_INTEL_OFFLOAD - if (offload_noghost) { - if (j < nlocal) { - if (i < offload_end) continue; - } else if (offload) continue; - } - #endif - - neighptr[raw_count++] = j; - } - } - } - - if (raw_count > obound) *overflow = 1; - - #if defined(LMP_SIMD_COMPILER) - #ifdef _LMP_INTEL_OFFLOAD - int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax; - #if __INTEL_COMPILER+0 > 1499 - #pragma vector aligned - #pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin) - #endif - #else - #pragma vector aligned - #pragma simd - #endif - #endif - for (int u = pack_offset; u < raw_count; u++) { - int j = neighptr[u]; - const flt_t delx = xtmp - x[j].x; - const flt_t dely = ytmp - x[j].y; - const flt_t delz = ztmp - x[j].z; - const int jtype = x[j].w; - const flt_t rsq = delx * delx + dely * dely + delz * delz; - if (rsq > cutneighsq[ioffset + jtype]) - neighptr[u] = e_nall; - else { - if (need_ic) { - int no_special; - ominimum_image_check(no_special, delx, dely, delz); - if (no_special) - neighptr[u] = -j - 1; - } - #ifdef _LMP_INTEL_OFFLOAD - if (j < nlocal) { - if (j < vlmin) vlmin = j; - if (j > vlmax) vlmax = j; - } else { - if (j < vgmin) vgmin = j; - if (j > vgmax) vgmax = j; - } - #endif - } - } - #ifdef _LMP_INTEL_OFFLOAD - lmin = MIN(lmin,vlmin); - gmin = MIN(gmin,vgmin); - lmax = MAX(lmax,vlmax); - gmax = MAX(gmax,vgmax); - #endif - - int n = lane, n2 = pack_offset; - for (int u = pack_offset; u < raw_count; u++) { - const int j = neighptr[u]; - int pj = j; - if (pj < e_nall) { - if (need_ic) - if (pj < 0) pj = -pj - 1; - - const int jtag = tag[pj]; - int flist = 0; - if (itag > jtag) { - if ((itag+jtag) % 2 == 0) flist = 1; - } else if (itag < jtag) { - if ((itag+jtag) % 2 == 1) flist = 1; - } else { - if (x[pj].z < ztmp) flist = 1; - else if (x[pj].z == ztmp && x[pj].y < ytmp) flist = 1; - else if (x[pj].z == ztmp && x[pj].y == ytmp && x[pj].x < xtmp) - flist = 1; - } - if (flist) { - neighptr[n2++] = j; - } else { - neighptr[n] = j; - n += pack_width; - } - } - } - int ns = (n - lane) / pack_width; - atombin[i] = ns; - for (int u = pack_offset; u < n2; u++) { - neighptr[n] = neighptr[u]; - n += pack_width; - } - - ilist[i] = i; - cnumneigh[i] = ct + lane; - ns += n2 - pack_offset; - numneigh[i] = ns; - - if (ns > max_chunk) max_chunk = ns; - lane++; - if (lane == pack_width) { - ct += max_chunk * pack_width; - const int alignb = (INTEL_DATA_ALIGN / sizeof(int)); - const int edge = (ct % alignb); - if (edge) ct += alignb - edge; - neighptr = firstneigh + ct; - max_chunk = 0; - pack_offset = maxnbors * pack_width; - lane = 0; - if (ct + obound > list_size) { - if (i < ito - 1) { - *overflow = 1; - ct = (ifrom + tid * 2) * maxnbors; - } - } - } - } - - if (*overflow == 1) - for (int i = ifrom; i < ito; i++) - numneigh[i] = 0; - - #ifdef _LMP_INTEL_OFFLOAD - if (separate_buffers) { - #if defined(_OPENMP) - #pragma omp critical - #endif - { - if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; - if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; - if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; - if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; - } - #pragma omp barrier - } - - int ghost_offset = 0, nall_offset = e_nall; - if (separate_buffers) { - int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; - if (nghost < 0) nghost = 0; - if (offload) { - ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; - nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; - } else { - ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; - nall_offset = nlocal + nghost; - } - } - #endif - - if (molecular) { - for (int i = ifrom; i < ito; ++i) { - int * _noalias jlist = firstneigh + cnumneigh[i]; - const int jnum = numneigh[i]; - - const int trip = jnum * pack_width; - for (int jj = 0; jj < trip; jj+=pack_width) { - const int j = jlist[jj]; - if (need_ic && j < 0) { - which = 0; - jlist[jj] = -j - 1; - } else - ofind_special(which, special, nspecial, i, tag[j]); - #ifdef _LMP_INTEL_OFFLOAD - if (j >= nlocal) { - if (j == e_nall) - jlist[jj] = nall_offset; - else if (which) - jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); - else jlist[jj]-=ghost_offset; - } else - #endif - if (which) jlist[jj] = j ^ (which << SBBITS); - } - } - } - #ifdef _LMP_INTEL_OFFLOAD - else if (separate_buffers) { - for (int i = ifrom; i < ito; ++i) { - int * _noalias jlist = firstneigh + cnumneigh[i]; - const int jnum = numneigh[i]; - int jj = 0; - for (jj = 0; jj < jnum; jj++) { - if (jlist[jj] >= nlocal) { - if (jlist[jj] == e_nall) jlist[jj] = nall_offset; - else jlist[jj] -= ghost_offset; - } - } - } - } - #endif - } // end omp - #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) - *timer_compute = MIC_Wtime() - *timer_compute; - #endif - } // end offload - - #ifdef _LMP_INTEL_OFFLOAD - if (offload) { - _fix->stop_watch(TIME_OFFLOAD_LATENCY); - _fix->start_watch(TIME_HOST_NEIGHBOR); - for (int n = 0; n < aend; n++) { - ilist[n] = n; - numneigh[n] = 0; - } - } else { - for (int i = astart; i < aend; i++) - list->firstneigh[i] = firstneigh + cnumneigh[i]; - if (separate_buffers) { - _fix->start_watch(TIME_PACK); - _fix->set_neighbor_host_sizes(); - buffers->pack_sep_from_single(_fix->host_min_local(), - _fix->host_used_local(), - _fix->host_min_ghost(), - _fix->host_used_ghost()); - _fix->stop_watch(TIME_PACK); - } - } - #else - for (int i = astart; i < aend; i++) - list->firstneigh[i] = firstneigh + cnumneigh[i]; #endif } diff --git a/src/USER-INTEL/npair_full_bin_intel.h b/src/USER-INTEL/npair_full_bin_intel.h index 608bd0f5ddabaac02148386733c274369f7b5a3c..0f8a27b3b42b1dedf8c1b517e8be70233a8618f1 100644 --- a/src/USER-INTEL/npair_full_bin_intel.h +++ b/src/USER-INTEL/npair_full_bin_intel.h @@ -15,7 +15,7 @@ NPairStyle(full/bin/intel, NPairFullBinIntel, - NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | + NP_FULL | NP_BIN | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_INTEL) #else @@ -36,9 +36,6 @@ class NPairFullBinIntel : public NPairIntel { private: template <class flt_t, class acc_t> void fbi(NeighList *, IntelBuffers<flt_t,acc_t> *); - template <class flt_t, class acc_t, int, int> - void fbi(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int, - const int, const int offload_end = 0); }; } diff --git a/src/USER-INTEL/npair_half_bin_newtoff_intel.cpp b/src/USER-INTEL/npair_half_bin_newtoff_intel.cpp deleted file mode 100644 index 1fcc3f0759b606d4d58d72f2ea08c02732d72601..0000000000000000000000000000000000000000 --- a/src/USER-INTEL/npair_half_bin_newtoff_intel.cpp +++ /dev/null @@ -1,451 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: W. Michael Brown (Intel) -------------------------------------------------------------------------- */ - -#include "npair_half_bin_newtoff_intel.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "atom.h" -#include "comm.h" -#include "group.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -NPairHalfBinNewtoffIntel::NPairHalfBinNewtoffIntel(LAMMPS *lmp) : - NPairIntel(lmp) {} - -/* ---------------------------------------------------------------------- - binned neighbor list construction with partial Newton's 3rd law - each owned atom i checks own bin and other bins in stencil - pair stored once if i,j are both owned and i < j - pair stored by me if j is ghost (also stored by proc owning j) -------------------------------------------------------------------------- */ - -void NPairHalfBinNewtoffIntel::build(NeighList *list) -{ - if (nstencil > INTEL_MAX_STENCIL_CHECK) - error->all(FLERR, "Too many neighbor bins for USER-INTEL package."); - - #ifdef _LMP_INTEL_OFFLOAD - if (exclude) - error->all(FLERR, "Exclusion lists not yet supported for Intel offload"); - #endif - - if (_fix->precision() == FixIntel::PREC_MODE_MIXED) - hbnni(list, _fix->get_mixed_buffers()); - else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) - hbnni(list, _fix->get_double_buffers()); - else - hbnni(list, _fix->get_single_buffers()); - - _fix->stop_watch(TIME_HOST_NEIGHBOR); -} - -template <class flt_t, class acc_t> -void NPairHalfBinNewtoffIntel:: -hbnni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) { - const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; - list->inum = nlocal; - - const int off_end = _fix->offload_end_neighbor(); - int host_start = off_end;; - - #ifdef _LMP_INTEL_OFFLOAD - if (off_end) grow_stencil(); - if (_fix->full_host_list()) host_start = 0; - #endif - - buffers->grow_list(list, atom->nlocal, comm->nthreads, off_end); - - int need_ic = 0; - if (atom->molecular) - dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax, - neighbor->cutneighmax); - - #ifdef _LMP_INTEL_OFFLOAD - if (need_ic) { - hbnni<flt_t,acc_t,1>(1, list, buffers, 0, off_end); - hbnni<flt_t,acc_t,1>(0, list, buffers, host_start, nlocal); - } else { - hbnni<flt_t,acc_t,0>(1, list, buffers, 0, off_end); - hbnni<flt_t,acc_t,0>(0, list, buffers, host_start, nlocal); - } - #else - if (need_ic) - hbnni<flt_t,acc_t,1>(0, list, buffers, host_start, nlocal); - else - hbnni<flt_t,acc_t,0>(0, list, buffers, host_start, nlocal); - #endif -} - -template <class flt_t, class acc_t, int need_ic> -void NPairHalfBinNewtoffIntel:: -hbnni(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers, - const int astart, const int aend) { - - if (aend-astart == 0) return; - - const int nall = atom->nlocal + atom->nghost; - int pad = 1; - - #ifdef _LMP_INTEL_OFFLOAD - if (offload) { - if (INTEL_MIC_NBOR_PAD > 1) - pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); - } else - #endif - if (INTEL_NBOR_PAD > 1) - pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); - const int pad_width = pad; - - const ATOM_T * _noalias const x = buffers->get_x(); - int * _noalias const firstneigh = buffers->firstneigh(list); - - const int molecular = atom->molecular; - int *ns = NULL; - tagint *s = NULL; - int tag_size = 0, special_size; - if (buffers->need_tag()) tag_size = nall; - if (molecular) { - s = atom->special[0]; - ns = atom->nspecial[0]; - special_size = aend; - } else { - s = &buffers->_special_holder; - ns = &buffers->_nspecial_holder; - special_size = 0; - } - const tagint * _noalias const special = s; - const int * _noalias const nspecial = ns; - const int maxspecial = atom->maxspecial; - const tagint * _noalias const tag = atom->tag; - - int * _noalias const ilist = list->ilist; - int * _noalias numneigh = list->numneigh; - int * _noalias const cnumneigh = buffers->cnumneigh(list); - const int nstencil = this->nstencil; - const int * _noalias const stencil = this->stencil; - const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0]; - const int ntypes = atom->ntypes + 1; - const int nlocal = atom->nlocal; - - #ifndef _LMP_INTEL_OFFLOAD - int * const mask = atom->mask; - tagint * const molecule = atom->molecule; - #endif - - int tnum; - int *overflow; - double *timer_compute; - #ifdef _LMP_INTEL_OFFLOAD - if (offload) { - timer_compute = _fix->off_watch_neighbor(); - tnum = buffers->get_off_threads(); - overflow = _fix->get_off_overflow_flag(); - _fix->stop_watch(TIME_HOST_NEIGHBOR); - _fix->start_watch(TIME_OFFLOAD_LATENCY); - } else - #endif - { - tnum = comm->nthreads; - overflow = _fix->get_overflow_flag(); - } - const int nthreads = tnum; - const int maxnbors = buffers->get_max_nbors(); - int * _noalias const atombin = buffers->get_atombin(); - const int * _noalias const binpacked = buffers->get_binpacked(); - - const int xperiodic = domain->xperiodic; - const int yperiodic = domain->yperiodic; - const int zperiodic = domain->zperiodic; - const flt_t xprd_half = domain->xprd_half; - const flt_t yprd_half = domain->yprd_half; - const flt_t zprd_half = domain->zprd_half; - - #ifdef _LMP_INTEL_OFFLOAD - const int * _noalias const binhead = this->binhead; - const int * _noalias const bins = this->bins; - const int cop = _fix->coprocessor_number(); - const int separate_buffers = _fix->separate_buffers(); - #pragma offload target(mic:cop) if(offload) \ - in(x:length(nall+1) alloc_if(0) free_if(0)) \ - in(tag:length(tag_size) alloc_if(0) free_if(0)) \ - in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ - in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ - in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \ - in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \ - in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ - in(firstneigh:length(0) alloc_if(0) free_if(0)) \ - in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ - out(numneigh:length(0) alloc_if(0) free_if(0)) \ - in(ilist:length(0) alloc_if(0) free_if(0)) \ - in(atombin:length(aend) alloc_if(0) free_if(0)) \ - in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ - in(maxnbors,nthreads,maxspecial,nstencil,pad_width,offload,nall) \ - in(separate_buffers, astart, aend, nlocal, molecular, ntypes) \ - in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ - out(overflow:length(5) alloc_if(0) free_if(0)) \ - out(timer_compute:length(1) alloc_if(0) free_if(0)) \ - signal(tag) - #endif - { - #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) - *timer_compute = MIC_Wtime(); - #endif - - #ifdef _LMP_INTEL_OFFLOAD - overflow[LMP_LOCAL_MIN] = astart; - overflow[LMP_LOCAL_MAX] = aend - 1; - overflow[LMP_GHOST_MIN] = nall; - overflow[LMP_GHOST_MAX] = -1; - #endif - - int nstencilp = 0; - int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL]; - for (int k = 0; k < nstencil; k++) { - binstart[nstencilp] = stencil[k]; - int end = stencil[k] + 1; - for (int kk = k + 1; kk < nstencil; kk++) { - if (stencil[kk-1]+1 == stencil[kk]) { - end++; - k++; - } else break; - } - binend[nstencilp] = end; - nstencilp++; - } - - #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(numneigh, overflow, nstencilp, binstart, binend) - #endif - { - #ifdef _LMP_INTEL_OFFLOAD - int lmin = nall, lmax = -1, gmin = nall, gmax = -1; - #endif - - const int num = aend - astart; - int tid, ifrom, ito; - IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads); - ifrom += astart; - ito += astart; - - int which; - - const int list_size = (ito + tid + 1) * maxnbors; - int ct = (ifrom + tid) * maxnbors; - int *neighptr = firstneigh + ct; - - for (int i = ifrom; i < ito; i++) { - int j, k, n, n2, itype, jtype, ibin; - double xtmp, ytmp, ztmp, delx, dely, delz, rsq; - - n = 0; - n2 = maxnbors; - - xtmp = x[i].x; - ytmp = x[i].y; - ztmp = x[i].z; - itype = x[i].w; - const int ioffset = ntypes*itype; - - // loop over all atoms in other bins in stencil including self - // only store pair if i < j - // stores own/own pairs only once - // stores own/ghost pairs on both procs - - ibin = atombin[i]; - - for (k = 0; k < nstencilp; k++) { - const int bstart = binhead[ibin + binstart[k]]; - const int bend = binhead[ibin + binend[k]]; - for (int jj = bstart; jj < bend; jj++) { - const int j = binpacked[jj]; - if (j <= i) continue; - - jtype = x[j].w; - #ifndef _LMP_INTEL_OFFLOAD - if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; - #endif - - delx = xtmp - x[j].x; - dely = ytmp - x[j].y; - delz = ztmp - x[j].z; - rsq = delx * delx + dely * dely + delz * delz; - if (rsq <= cutneighsq[ioffset + jtype]) { - if (j < nlocal) { - if (need_ic) { - int no_special; - ominimum_image_check(no_special, delx, dely, delz); - if (no_special) - neighptr[n++] = -j - 1; - else - neighptr[n++] = j; - } else - neighptr[n++] = j; - #ifdef _LMP_INTEL_OFFLOAD - if (j < lmin) lmin = j; - if (j > lmax) lmax = j; - #endif - } else { - if (need_ic) { - int no_special; - ominimum_image_check(no_special, delx, dely, delz); - if (no_special) - neighptr[n2++] = -j - 1; - else - neighptr[n2++] = j; - } else - neighptr[n2++] = j; - #ifdef _LMP_INTEL_OFFLOAD - if (j < gmin) gmin = j; - if (j > gmax) gmax = j; - #endif - } - } - } - } - ilist[i] = i; - - cnumneigh[i] = ct; - if (n > maxnbors) *overflow = 1; - for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k]; - - const int edge = (n % pad_width); - if (edge) { - const int pad_end = n + (pad_width - edge); - #if defined(LMP_SIMD_COMPILER) - #pragma loop_count min=1, max=15, avg=8 - #endif - for ( ; n < pad_end; n++) - neighptr[n] = nall; - } - numneigh[i] = n; - while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++; - ct += n; - neighptr += n; - if (ct + n + maxnbors > list_size) { - *overflow = 1; - ct = (ifrom + tid) * maxnbors; - } - } - - if (*overflow == 1) - for (int i = ifrom; i < ito; i++) - numneigh[i] = 0; - - #ifdef _LMP_INTEL_OFFLOAD - if (separate_buffers) { - #if defined(_OPENMP) - #pragma omp critical - #endif - { - if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; - if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; - if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; - if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; - } - #pragma omp barrier - } - - int ghost_offset = 0, nall_offset = nall; - if (separate_buffers) { - int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; - if (nghost < 0) nghost = 0; - if (offload) { - ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; - nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; - } else { - ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; - nall_offset = nlocal + nghost; - } - } - #endif - - if (molecular) { - for (int i = ifrom; i < ito; ++i) { - int * _noalias jlist = firstneigh + cnumneigh[i]; - const int jnum = numneigh[i]; - for (int jj = 0; jj < jnum; jj++) { - const int j = jlist[jj]; - if (need_ic && j < 0) { - which = 0; - jlist[jj] = -j - 1; - } else - ofind_special(which, special, nspecial, i, tag[j]); - #ifdef _LMP_INTEL_OFFLOAD - if (j >= nlocal) { - if (j == nall) - jlist[jj] = nall_offset; - else if (which) - jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); - else jlist[jj]-=ghost_offset; - } else - #endif - if (which) jlist[jj] = j ^ (which << SBBITS); - } - } - } - #ifdef _LMP_INTEL_OFFLOAD - else if (separate_buffers) { - for (int i = ifrom; i < ito; ++i) { - int * _noalias jlist = firstneigh + cnumneigh[i]; - const int jnum = numneigh[i]; - int jj = 0; - for (jj = 0; jj < jnum; jj++) - if (jlist[jj] >= nlocal) break; - while (jj < jnum) { - if (jlist[jj] == nall) jlist[jj] = nall_offset; - else jlist[jj] -= ghost_offset; - jj++; - } - } - } - #endif - } // end omp - #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) - *timer_compute = MIC_Wtime() - *timer_compute; - #endif - } // end offload - - #ifdef _LMP_INTEL_OFFLOAD - if (offload) { - _fix->stop_watch(TIME_OFFLOAD_LATENCY); - _fix->start_watch(TIME_HOST_NEIGHBOR); - for (int n = 0; n < aend; n++) { - ilist[n] = n; - numneigh[n] = 0; - } - } else { - for (int i = astart; i < aend; i++) - list->firstneigh[i] = firstneigh + cnumneigh[i]; - if (separate_buffers) { - _fix->start_watch(TIME_PACK); - _fix->set_neighbor_host_sizes(); - buffers->pack_sep_from_single(_fix->host_min_local(), - _fix->host_used_local(), - _fix->host_min_ghost(), - _fix->host_used_ghost()); - _fix->stop_watch(TIME_PACK); - } - } - #else - for (int i = astart; i < aend; i++) - list->firstneigh[i] = firstneigh + cnumneigh[i]; - #endif -} diff --git a/src/USER-INTEL/npair_half_bin_newton_intel.cpp b/src/USER-INTEL/npair_half_bin_newton_intel.cpp index 5584f962e95862718289a1bc6cb1d240ec252f00..c761557097cf96b677dc20a355a94e186ac5d8c5 100644 --- a/src/USER-INTEL/npair_half_bin_newton_intel.cpp +++ b/src/USER-INTEL/npair_half_bin_newton_intel.cpp @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalfBinNewtonIntel::NPairHalfBinNewtonIntel(LAMMPS *lmp) : +NPairHalfBinNewtonIntel::NPairHalfBinNewtonIntel(LAMMPS *lmp) : NPairIntel(lmp) {} /* ---------------------------------------------------------------------- @@ -75,536 +75,32 @@ hbni(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) { int need_ic = 0; if (atom->molecular) dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax, - neighbor->cutneighmax); + neighbor->cutneighmax); #ifdef _LMP_INTEL_OFFLOAD if (need_ic) { if (offload_noghost) { - hbni<flt_t,acc_t,1,1>(1, list, buffers, 0, off_end); - hbni<flt_t,acc_t,1,1>(0, list, buffers, host_start, nlocal, off_end); + bin_newton<flt_t,acc_t,1,1,0,0,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,1,1,0,0,0>(0, list, buffers, host_start, nlocal, + off_end); } else { - hbni<flt_t,acc_t,0,1>(1, list, buffers, 0, off_end); - hbni<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal); + bin_newton<flt_t,acc_t,0,1,0,0,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal); } } else { if (offload_noghost) { - hbni<flt_t,acc_t,1,0>(1, list, buffers, 0, off_end); - hbni<flt_t,acc_t,1,0>(0, list, buffers, host_start, nlocal, off_end); + bin_newton<flt_t,acc_t,1,0,0,0,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,1,0,0,0,0>(0, list, buffers, host_start, nlocal, + off_end); } else { - hbni<flt_t,acc_t,0,0>(1, list, buffers, 0, off_end); - hbni<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal); + bin_newton<flt_t,acc_t,0,0,0,0,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal); } } #else - if (need_ic) - hbni<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal); + if (need_ic) + bin_newton<flt_t,acc_t,0,1,0,0,0>(0, list, buffers, host_start, nlocal); else - hbni<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal); - #endif -} - -template <class flt_t, class acc_t, int offload_noghost, int need_ic> -void NPairHalfBinNewtonIntel:: -hbni(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers, - const int astart, const int aend, const int offload_end) { - - if (aend-astart == 0) return; - - const int nall = atom->nlocal + atom->nghost; - int pad = 1; - int nall_t = nall; - - #ifdef _LMP_INTEL_OFFLOAD - if (offload_noghost && offload) nall_t = atom->nlocal; - if (offload) { - if (INTEL_MIC_NBOR_PAD > 1) - pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); - } else - #endif - if (INTEL_NBOR_PAD > 1) - pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); - const int pad_width = pad; - - const ATOM_T * _noalias const x = buffers->get_x(); - int * _noalias const firstneigh = buffers->firstneigh(list); - const int e_nall = nall_t; - - const int molecular = atom->molecular; - int *ns = NULL; - tagint *s = NULL; - int tag_size = 0, special_size; - if (buffers->need_tag()) tag_size = e_nall; - if (molecular) { - s = atom->special[0]; - ns = atom->nspecial[0]; - special_size = aend; - } else { - s = &buffers->_special_holder; - ns = &buffers->_nspecial_holder; - special_size = 0; - } - const tagint * _noalias const special = s; - const int * _noalias const nspecial = ns; - const int maxspecial = atom->maxspecial; - const tagint * _noalias const tag = atom->tag; - - int * _noalias const ilist = list->ilist; - int * _noalias numneigh = list->numneigh; - int * _noalias const cnumneigh = buffers->cnumneigh(list); - const int nstencil = this->nstencil; - const int * _noalias const stencil = this->stencil; - const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0]; - const int ntypes = atom->ntypes + 1; - const int nlocal = atom->nlocal; - - #ifndef _LMP_INTEL_OFFLOAD - int * const mask = atom->mask; - tagint * const molecule = atom->molecule; - #endif - - int tnum; - int *overflow; - double *timer_compute; - #ifdef _LMP_INTEL_OFFLOAD - if (offload) { - timer_compute = _fix->off_watch_neighbor(); - tnum = buffers->get_off_threads(); - overflow = _fix->get_off_overflow_flag(); - _fix->stop_watch(TIME_HOST_NEIGHBOR); - _fix->start_watch(TIME_OFFLOAD_LATENCY); - } else - #endif - { - tnum = comm->nthreads; - overflow = _fix->get_overflow_flag(); - } - const int nthreads = tnum; - const int maxnbors = buffers->get_max_nbors(); - int * _noalias const atombin = buffers->get_atombin(); - const int * _noalias const binpacked = buffers->get_binpacked(); - - const int xperiodic = domain->xperiodic; - const int yperiodic = domain->yperiodic; - const int zperiodic = domain->zperiodic; - const flt_t xprd_half = domain->xprd_half; - const flt_t yprd_half = domain->yprd_half; - const flt_t zprd_half = domain->zprd_half; - - #ifdef _LMP_INTEL_OFFLOAD - const int * _noalias const binhead = this->binhead; - const int * _noalias const bins = this->bins; - const int cop = _fix->coprocessor_number(); - const int separate_buffers = _fix->separate_buffers(); - #pragma offload target(mic:cop) if(offload) \ - in(x:length(e_nall+1) alloc_if(0) free_if(0)) \ - in(tag:length(tag_size) alloc_if(0) free_if(0)) \ - in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ - in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ - in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \ - in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \ - in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ - in(firstneigh:length(0) alloc_if(0) free_if(0)) \ - in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ - out(numneigh:length(0) alloc_if(0) free_if(0)) \ - in(ilist:length(0) alloc_if(0) free_if(0)) \ - in(atombin:length(aend) alloc_if(0) free_if(0)) \ - in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ - in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload,pad_width) \ - in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \ - in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ - out(overflow:length(5) alloc_if(0) free_if(0)) \ - out(timer_compute:length(1) alloc_if(0) free_if(0)) \ - signal(tag) - #endif - { - #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) - *timer_compute = MIC_Wtime(); - #endif - - #ifdef _LMP_INTEL_OFFLOAD - overflow[LMP_LOCAL_MIN] = astart; - overflow[LMP_LOCAL_MAX] = aend - 1; - overflow[LMP_GHOST_MIN] = e_nall; - overflow[LMP_GHOST_MAX] = -1; - #endif - - int nstencilp = 0; - int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL]; - for (int k = 0; k < nstencil; k++) { - binstart[nstencilp] = stencil[k]; - int end = stencil[k] + 1; - for (int kk = k + 1; kk < nstencil; kk++) { - if (stencil[kk-1]+1 == stencil[kk]) { - end++; - k++; - } else break; - } - binend[nstencilp] = end; - nstencilp++; - } - - #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(numneigh, overflow, nstencilp, binstart, binend) - #endif - { - #ifdef _LMP_INTEL_OFFLOAD - int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1; - #endif - - const int num = aend - astart; - int tid, ifrom, ito; - - #ifdef OUTER_CHUNK - const int swidth = ip_simd::SIMD_type<flt_t>::width(); - IP_PRE_omp_range_id_vec(ifrom, ito, tid, num, nthreads, swidth); - ifrom += astart; - ito += astart; - int e_ito = ito; - if (ito == num) { - int imod = ito % swidth; - if (imod) e_ito += swidth - imod; - } - const int list_size = (e_ito + tid * 2 + 2) * maxnbors; - #else - const int swidth = 1; - IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads); - ifrom += astart; - ito += astart; - const int list_size = (ito + tid * 2 + 2) * maxnbors; - #endif - - int which; - - int pack_offset = maxnbors * swidth; - int ct = (ifrom + tid * 2) * maxnbors; - int *neighptr = firstneigh + ct; - const int obound = pack_offset + maxnbors * 2; - - int max_chunk = 0; - int lane = 0; - for (int i = ifrom; i < ito; i++) { - const flt_t xtmp = x[i].x; - const flt_t ytmp = x[i].y; - const flt_t ztmp = x[i].z; - const int itype = x[i].w; - const int ioffset = ntypes * itype; - - // loop over rest of atoms in i's bin, ghosts are at end of linked list - // if j is owned atom, store it, since j is beyond i in linked list - // if j is ghost, only store if j coords are "above/to the right" of i - - int raw_count = pack_offset; - for (int j = bins[i]; j >= 0; j = bins[j]) { - if (j >= nlocal) { - #ifdef _LMP_INTEL_OFFLOAD - if (offload_noghost && offload) continue; - #endif - if (x[j].z < ztmp) continue; - if (x[j].z == ztmp) { - if (x[j].y < ytmp) continue; - if (x[j].y == ytmp && x[j].x < xtmp) continue; - } - } - #ifdef _LMP_INTEL_OFFLOAD - else if (offload_noghost && i < offload_end) continue; - #endif - - #ifndef _LMP_INTEL_OFFLOAD - if (exclude) { - const int jtype = x[j].w; - if (exclusion(i,j,itype,jtype,mask,molecule)) continue; - } - #endif - - neighptr[raw_count++] = j; - } - - // loop over all atoms in other bins in stencil, store every pair - - const int ibin = atombin[i]; - if (exclude) { - for (int k = 0; k < nstencilp; k++) { - const int bstart = binhead[ibin + binstart[k]]; - const int bend = binhead[ibin + binend[k]]; - #ifndef _LMP_INTEL_OFFLOAD - #ifdef INTEL_VMASK - #pragma simd - #endif - #endif - for (int jj = bstart; jj < bend; jj++) { - const int j = binpacked[jj]; - - #ifdef _LMP_INTEL_OFFLOAD - if (offload_noghost) { - if (j < nlocal) { - if (i < offload_end) continue; - } else if (offload) continue; - } - #endif - - #ifndef _LMP_INTEL_OFFLOAD - const int jtype = x[j].w; - if (exclusion(i,j,itype,jtype,mask,molecule)) continue; - #endif - - neighptr[raw_count++] = j; - } - } - } else { - for (int k = 0; k < nstencilp; k++) { - const int bstart = binhead[ibin + binstart[k]]; - const int bend = binhead[ibin + binend[k]]; - #ifndef _LMP_INTEL_OFFLOAD - #ifdef INTEL_VMASK - #pragma simd - #endif - #endif - for (int jj = bstart; jj < bend; jj++) { - const int j = binpacked[jj]; - - #ifdef _LMP_INTEL_OFFLOAD - if (offload_noghost) { - if (j < nlocal) { - if (i < offload_end) continue; - } else if (offload) continue; - } - #endif - - neighptr[raw_count++] = j; - } - } - } - - if (raw_count > obound) *overflow = 1; - - #if defined(LMP_SIMD_COMPILER) - #ifdef _LMP_INTEL_OFFLOAD - int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax; - #if __INTEL_COMPILER+0 > 1499 - #pragma vector aligned - #pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin) - #endif - #else - #pragma vector aligned - #pragma simd - #endif - #endif - for (int u = pack_offset; u < raw_count; u++) { - int j = neighptr[u]; - const flt_t delx = xtmp - x[j].x; - const flt_t dely = ytmp - x[j].y; - const flt_t delz = ztmp - x[j].z; - const int jtype = x[j].w; - const flt_t rsq = delx * delx + dely * dely + delz * delz; - if (rsq > cutneighsq[ioffset + jtype]) - neighptr[u] = e_nall; - else { - if (need_ic) { - int no_special; - ominimum_image_check(no_special, delx, dely, delz); - if (no_special) - neighptr[u] = -j - 1; - } - #ifdef _LMP_INTEL_OFFLOAD - if (j < nlocal) { - if (j < vlmin) vlmin = j; - if (j > vlmax) vlmax = j; - } else { - if (j < vgmin) vgmin = j; - if (j > vgmax) vgmax = j; - } - #endif - } - } - #ifdef _LMP_INTEL_OFFLOAD - lmin = MIN(lmin,vlmin); - gmin = MIN(gmin,vgmin); - lmax = MAX(lmax,vlmax); - gmax = MAX(gmax,vgmax); - #endif - - int n = lane, n2 = pack_offset; - for (int u = pack_offset; u < raw_count; u++) { - const int j = neighptr[u]; - int pj = j; - if (pj < e_nall) { - if (need_ic) - if (pj < 0) pj = -pj - 1; - - if (pj < nlocal) { - neighptr[n] = j; - n += swidth; - } else - neighptr[n2++] = j; - } - } - int ns = (n - lane) / swidth; - for (int u = pack_offset; u < n2; u++) { - neighptr[n] = neighptr[u]; - n += swidth; - } - - ilist[i] = i; - cnumneigh[i] = ct + lane; - ns += n2 - pack_offset; - #ifndef OUTER_CHUNK - int edge = (ns % pad_width); - if (edge) { - const int pad_end = ns + (pad_width - edge); - #if defined(LMP_SIMD_COMPILER) - #pragma loop_count min=1, max=15, avg=8 - #endif - for ( ; ns < pad_end; ns++) - neighptr[ns] = e_nall; - } - #endif - numneigh[i] = ns; - - #ifdef OUTER_CHUNK - if (ns > max_chunk) max_chunk = ns; - lane++; - if (lane == swidth) { - ct += max_chunk * swidth; - const int alignb = (INTEL_DATA_ALIGN / sizeof(int)); - int edge = (ct % alignb); - if (edge) ct += alignb - edge; - neighptr = firstneigh + ct; - max_chunk = 0; - pack_offset = maxnbors * swidth; - lane = 0; - if (ct + obound > list_size) { - if (i < ito - 1) { - *overflow = 1; - ct = (ifrom + tid * 2) * maxnbors; - } - } - } - #else - ct += ns; - const int alignb = (INTEL_DATA_ALIGN / sizeof(int)); - edge = (ct % alignb); - if (edge) ct += alignb - edge; - neighptr = firstneigh + ct; - if (ct + obound > list_size) { - if (i < ito - 1) { - *overflow = 1; - ct = (ifrom + tid * 2) * maxnbors; - } - } - #endif - } - - if (*overflow == 1) - for (int i = ifrom; i < ito; i++) - numneigh[i] = 0; - - #ifdef _LMP_INTEL_OFFLOAD - if (separate_buffers) { - #if defined(_OPENMP) - #pragma omp critical - #endif - { - if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; - if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; - if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; - if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; - } - #pragma omp barrier - } - - int ghost_offset = 0, nall_offset = e_nall; - if (separate_buffers) { - int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; - if (nghost < 0) nghost = 0; - if (offload) { - ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; - nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; - } else { - ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; - nall_offset = nlocal + nghost; - } - } - #endif - - if (molecular) { - for (int i = ifrom; i < ito; ++i) { - int * _noalias jlist = firstneigh + cnumneigh[i]; - const int jnum = numneigh[i]; - #ifndef OUTER_CHUNK - #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma simd - #endif - for (int jj = 0; jj < jnum; jj++) { - #else - const int trip = jnum * swidth; - for (int jj = 0; jj < trip; jj+= swidth) { - #endif - const int j = jlist[jj]; - if (need_ic && j < 0) { - which = 0; - jlist[jj] = -j - 1; - } else - ofind_special(which, special, nspecial, i, tag[j]); - #ifdef _LMP_INTEL_OFFLOAD - if (j >= nlocal) { - if (j == e_nall) - jlist[jj] = nall_offset; - else if (which) - jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); - else jlist[jj]-=ghost_offset; - } else - #endif - if (which) jlist[jj] = j ^ (which << SBBITS); - } - } - } - #ifdef _LMP_INTEL_OFFLOAD - else if (separate_buffers) { - for (int i = ifrom; i < ito; ++i) { - int * _noalias jlist = firstneigh + cnumneigh[i]; - const int jnum = numneigh[i]; - int jj = 0; - for (jj = 0; jj < jnum; jj++) - if (jlist[jj] >= nlocal) break; - while (jj < jnum) { - if (jlist[jj] == e_nall) jlist[jj] = nall_offset; - else jlist[jj] -= ghost_offset; - jj++; - } - } - } - #endif - } // end omp - #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) - *timer_compute = MIC_Wtime() - *timer_compute; - #endif - } // end offload - - #ifdef _LMP_INTEL_OFFLOAD - if (offload) { - _fix->stop_watch(TIME_OFFLOAD_LATENCY); - _fix->start_watch(TIME_HOST_NEIGHBOR); - for (int n = 0; n < aend; n++) { - ilist[n] = n; - numneigh[n] = 0; - } - } else { - for (int i = astart; i < aend; i++) - list->firstneigh[i] = firstneigh + cnumneigh[i]; - if (separate_buffers) { - _fix->start_watch(TIME_PACK); - _fix->set_neighbor_host_sizes(); - buffers->pack_sep_from_single(_fix->host_min_local(), - _fix->host_used_local(), - _fix->host_min_ghost(), - _fix->host_used_ghost()); - _fix->stop_watch(TIME_PACK); - } - } - #else - for (int i = astart; i < aend; i++) - list->firstneigh[i] = firstneigh + cnumneigh[i]; + bin_newton<flt_t,acc_t,0,0,0,0,0>(0, list, buffers, host_start, nlocal); #endif } diff --git a/src/USER-INTEL/npair_half_bin_newton_intel.h b/src/USER-INTEL/npair_half_bin_newton_intel.h index 4e496986b4581bef8d2b57545a324cf5b6fc1b7c..54a8e241354c18b95abc394bbf085762f3792dcc 100644 --- a/src/USER-INTEL/npair_half_bin_newton_intel.h +++ b/src/USER-INTEL/npair_half_bin_newton_intel.h @@ -36,9 +36,6 @@ class NPairHalfBinNewtonIntel : public NPairIntel { private: template <class flt_t, class acc_t> void hbni(NeighList *, IntelBuffers<flt_t,acc_t> *); - template <class flt_t, class acc_t, int, int> - void hbni(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int, - const int, const int offload_end = 0); }; } diff --git a/src/USER-INTEL/npair_half_bin_newton_tri_intel.cpp b/src/USER-INTEL/npair_half_bin_newton_tri_intel.cpp index 3b6d68d4de9a5ea836838a019561abef1bd95901..d70f1ec5897a0d7b1e8a7b5b9633f9882ce2c54f 100644 --- a/src/USER-INTEL/npair_half_bin_newton_tri_intel.cpp +++ b/src/USER-INTEL/npair_half_bin_newton_tri_intel.cpp @@ -26,7 +26,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalfBinNewtonTriIntel::NPairHalfBinNewtonTriIntel(LAMMPS *lmp) : +NPairHalfBinNewtonTriIntel::NPairHalfBinNewtonTriIntel(LAMMPS *lmp) : NPairIntel(lmp) {} /* ---------------------------------------------------------------------- @@ -75,439 +75,32 @@ hbnti(NeighList *list, IntelBuffers<flt_t,acc_t> *buffers) { int need_ic = 0; if (atom->molecular) dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax, - neighbor->cutneighmax); + neighbor->cutneighmax); #ifdef _LMP_INTEL_OFFLOAD if (need_ic) { if (offload_noghost) { - hbnti<flt_t,acc_t,1,1>(1, list, buffers, 0, off_end); - hbnti<flt_t,acc_t,1,1>(0, list, buffers, host_start, nlocal, off_end); + bin_newton<flt_t,acc_t,1,1,0,1,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,1,1,0,1,0>(0, list, buffers, host_start, nlocal, + off_end); } else { - hbnti<flt_t,acc_t,0,1>(1, list, buffers, 0, off_end); - hbnti<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal); + bin_newton<flt_t,acc_t,0,1,0,1,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal); } } else { if (offload_noghost) { - hbnti<flt_t,acc_t,1,0>(1, list, buffers, 0, off_end); - hbnti<flt_t,acc_t,1,0>(0, list, buffers, host_start, nlocal, off_end); + bin_newton<flt_t,acc_t,1,0,0,1,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,1,0,0,1,0>(0, list, buffers, host_start, nlocal, + off_end); } else { - hbnti<flt_t,acc_t,0,0>(1, list, buffers, 0, off_end); - hbnti<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal); + bin_newton<flt_t,acc_t,0,0,0,1,0>(1, list, buffers, 0, off_end); + bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal); } } #else if (need_ic) - hbnti<flt_t,acc_t,0,1>(0, list, buffers, host_start, nlocal); + bin_newton<flt_t,acc_t,0,1,0,1,0>(0, list, buffers, host_start, nlocal); else - hbnti<flt_t,acc_t,0,0>(0, list, buffers, host_start, nlocal); - #endif -} - -template <class flt_t, class acc_t, int offload_noghost, int need_ic> -void NPairHalfBinNewtonTriIntel:: -hbnti(const int offload, NeighList *list, IntelBuffers<flt_t,acc_t> *buffers, - const int astart, const int aend, const int offload_end) { - if (aend-astart == 0) return; - - const int nall = atom->nlocal + atom->nghost; - int pad = 1; - int nall_t = nall; - - #ifdef _LMP_INTEL_OFFLOAD - if (offload_noghost && offload) nall_t = atom->nlocal; - if (offload) { - if (INTEL_MIC_NBOR_PAD > 1) - pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); - } else - #endif - if (INTEL_NBOR_PAD > 1) - pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); - const int pad_width = pad; - - const ATOM_T * _noalias const x = buffers->get_x(); - int * _noalias const firstneigh = buffers->firstneigh(list); - const int e_nall = nall_t; - - const int molecular = atom->molecular; - int *ns = NULL; - tagint *s = NULL; - int tag_size = 0, special_size; - if (buffers->need_tag()) tag_size = e_nall; - if (molecular) { - s = atom->special[0]; - ns = atom->nspecial[0]; - special_size = aend; - } else { - s = &buffers->_special_holder; - ns = &buffers->_nspecial_holder; - special_size = 0; - } - const tagint * _noalias const special = s; - const int * _noalias const nspecial = ns; - const int maxspecial = atom->maxspecial; - const tagint * _noalias const tag = atom->tag; - - int * _noalias const ilist = list->ilist; - int * _noalias numneigh = list->numneigh; - int * _noalias const cnumneigh = buffers->cnumneigh(list); - const int nstencil = this->nstencil; - const int * _noalias const stencil = this->stencil; - const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0]; - const int ntypes = atom->ntypes + 1; - const int nlocal = atom->nlocal; - - #ifndef _LMP_INTEL_OFFLOAD - int * const mask = atom->mask; - tagint * const molecule = atom->molecule; - #endif - - int tnum; - int *overflow; - double *timer_compute; - #ifdef _LMP_INTEL_OFFLOAD - if (offload) { - timer_compute = _fix->off_watch_neighbor(); - tnum = buffers->get_off_threads(); - overflow = _fix->get_off_overflow_flag(); - _fix->stop_watch(TIME_HOST_NEIGHBOR); - _fix->start_watch(TIME_OFFLOAD_LATENCY); - } else - #endif - { - tnum = comm->nthreads; - overflow = _fix->get_overflow_flag(); - } - const int nthreads = tnum; - const int maxnbors = buffers->get_max_nbors(); - int * _noalias const atombin = buffers->get_atombin(); - const int * _noalias const binpacked = buffers->get_binpacked(); - - const int xperiodic = domain->xperiodic; - const int yperiodic = domain->yperiodic; - const int zperiodic = domain->zperiodic; - const flt_t xprd_half = domain->xprd_half; - const flt_t yprd_half = domain->yprd_half; - const flt_t zprd_half = domain->zprd_half; - - #ifdef _LMP_INTEL_OFFLOAD - const int * _noalias const binhead = this->binhead; - const int * _noalias const bins = this->bins; - const int cop = _fix->coprocessor_number(); - const int separate_buffers = _fix->separate_buffers(); - #pragma offload target(mic:cop) if(offload) \ - in(x:length(e_nall+1) alloc_if(0) free_if(0)) \ - in(tag:length(tag_size) alloc_if(0) free_if(0)) \ - in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ - in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ - in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \ - in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \ - in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ - in(firstneigh:length(0) alloc_if(0) free_if(0)) \ - in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ - out(numneigh:length(0) alloc_if(0) free_if(0)) \ - in(ilist:length(0) alloc_if(0) free_if(0)) \ - in(atombin:length(aend) alloc_if(0) free_if(0)) \ - in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ - in(maxnbors,nthreads,maxspecial,nstencil,offload_end,pad_width,e_nall) \ - in(offload,separate_buffers, astart, aend, nlocal, molecular, ntypes) \ - in(xperiodic, yperiodic, zperiodic, xprd_half, yprd_half, zprd_half) \ - out(overflow:length(5) alloc_if(0) free_if(0)) \ - out(timer_compute:length(1) alloc_if(0) free_if(0)) \ - signal(tag) - #endif - { - #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) - *timer_compute = MIC_Wtime(); - #endif - - #ifdef _LMP_INTEL_OFFLOAD - overflow[LMP_LOCAL_MIN] = astart; - overflow[LMP_LOCAL_MAX] = aend - 1; - overflow[LMP_GHOST_MIN] = e_nall; - overflow[LMP_GHOST_MAX] = -1; - #endif - - int nstencilp = 0; - int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL]; - for (int k = 0; k < nstencil; k++) { - binstart[nstencilp] = stencil[k]; - int end = stencil[k] + 1; - for (int kk = k + 1; kk < nstencil; kk++) { - if (stencil[kk-1]+1 == stencil[kk]) { - end++; - k++; - } else break; - } - binend[nstencilp] = end; - nstencilp++; - } - - #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(numneigh, overflow, nstencilp, binstart, binend) - #endif - { - #ifdef _LMP_INTEL_OFFLOAD - int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1; - #endif - - const int num = aend - astart; - int tid, ifrom, ito; - IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads); - ifrom += astart; - ito += astart; - - int which; - - const int list_size = (ito + tid * 2 + 2) * maxnbors; - int ct = (ifrom + tid * 2) * maxnbors; - int *neighptr = firstneigh + ct; - const int obound = maxnbors * 3; - - for (int i = ifrom; i < ito; i++) { - const flt_t xtmp = x[i].x; - const flt_t ytmp = x[i].y; - const flt_t ztmp = x[i].z; - const int itype = x[i].w; - const int ioffset = ntypes * itype; - - // loop over all atoms in bins in stencil - // pairs for atoms j "below" i are excluded - // below = lower z or (equal z and lower y) or (equal zy and lower x) - // (equal zyx and j <= i) - // latter excludes self-self interaction but allows superposed atoms - - const int ibin = atombin[i]; - - int raw_count = maxnbors; - for (int k = 0; k < nstencilp; k++) { - const int bstart = binhead[ibin + binstart[k]]; - const int bend = binhead[ibin + binend[k]]; - for (int jj = bstart; jj < bend; jj++) { - const int j = binpacked[jj]; - - #ifdef _LMP_INTEL_OFFLOAD - if (offload_noghost) { - if (j < nlocal) { - if (i < offload_end) continue; - } else if (offload) continue; - } - #endif - - if (x[j].z < ztmp) continue; - if (x[j].z == ztmp) { - if (x[j].y < ytmp) continue; - if (x[j].y == ytmp) { - if (x[j].x < xtmp) continue; - if (x[j].x == xtmp && j <= i) continue; - } - } - - #ifndef _LMP_INTEL_OFFLOAD - if (exclude) { - const int jtype = x[j].w; - if (exclusion(i,j,itype,jtype,mask,molecule)) continue; - } - #endif - - neighptr[raw_count++] = j; - } - } - if (raw_count > obound) - *overflow = 1; - - #if defined(LMP_SIMD_COMPILER) - #ifdef _LMP_INTEL_OFFLOAD - int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax; - #if __INTEL_COMPILER+0 > 1499 - #pragma vector aligned - #pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin) - #endif - #else - #pragma vector aligned - #pragma simd - #endif - #endif - for (int u = maxnbors; u < raw_count; u++) { - int j = neighptr[u]; - const flt_t delx = xtmp - x[j].x; - const flt_t dely = ytmp - x[j].y; - const flt_t delz = ztmp - x[j].z; - const int jtype = x[j].w; - const flt_t rsq = delx * delx + dely * dely + delz * delz; - if (rsq > cutneighsq[ioffset + jtype]) - neighptr[u] = e_nall; - else { - if (need_ic) { - int no_special; - ominimum_image_check(no_special, delx, dely, delz); - if (no_special) - neighptr[u] = -j - 1; - } - - #ifdef _LMP_INTEL_OFFLOAD - if (j < nlocal) { - if (j < vlmin) vlmin = j; - if (j > vlmax) vlmax = j; - } else { - if (j < vgmin) vgmin = j; - if (j > vgmax) vgmax = j; - } - #endif - } - } - - int n = 0, n2 = maxnbors; - for (int u = maxnbors; u < raw_count; u++) { - const int j = neighptr[u]; - int pj = j; - if (pj < e_nall) { - if (need_ic) - if (pj < 0) pj = -pj - 1; - - if (pj < nlocal) - neighptr[n++] = j; - else - neighptr[n2++] = j; - } - } - int ns = n; - for (int u = maxnbors; u < n2; u++) - neighptr[n++] = neighptr[u]; - - ilist[i] = i; - cnumneigh[i] = ct; - ns += n2 - maxnbors; - - int edge = (ns % pad_width); - if (edge) { - const int pad_end = ns + (pad_width - edge); - #if defined(LMP_SIMD_COMPILER) - #pragma loop_count min=1, max=15, avg=8 - #endif - for ( ; ns < pad_end; ns++) - neighptr[ns] = e_nall; - } - numneigh[i] = ns; - - ct += ns; - const int alignb = (INTEL_DATA_ALIGN / sizeof(int)); - edge = (ct % alignb); - if (edge) ct += alignb - edge; - neighptr = firstneigh + ct; - if (ct + obound > list_size) { - if (i < ito - 1) { - *overflow = 1; - ct = (ifrom + tid * 2) * maxnbors; - } - } - } - - if (*overflow == 1) - for (int i = ifrom; i < ito; i++) - numneigh[i] = 0; - - #ifdef _LMP_INTEL_OFFLOAD - if (separate_buffers) { - #if defined(_OPENMP) - #pragma omp critical - #endif - { - if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; - if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; - if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; - if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; - } - #pragma omp barrier - } - - int ghost_offset = 0, nall_offset = e_nall; - if (separate_buffers) { - int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; - if (nghost < 0) nghost = 0; - if (offload) { - ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; - nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; - } else { - ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; - nall_offset = nlocal + nghost; - } - } - #endif - - if (molecular) { - for (int i = ifrom; i < ito; ++i) { - int * _noalias jlist = firstneigh + cnumneigh[i]; - const int jnum = numneigh[i]; - #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma simd - #endif - for (int jj = 0; jj < jnum; jj++) { - const int j = jlist[jj]; - if (need_ic && j < 0) { - which = 0; - jlist[jj] = -j - 1; - } else - ofind_special(which, special, nspecial, i, tag[j]); - #ifdef _LMP_INTEL_OFFLOAD - if (j >= nlocal) { - if (j == e_nall) - jlist[jj] = nall_offset; - else if (which) - jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); - else jlist[jj]-=ghost_offset; - } else - #endif - if (which) jlist[jj] = j ^ (which << SBBITS); - } - } - } - #ifdef _LMP_INTEL_OFFLOAD - else if (separate_buffers) { - for (int i = ifrom; i < ito; ++i) { - int * _noalias jlist = firstneigh + cnumneigh[i]; - const int jnum = numneigh[i]; - int jj = 0; - for (jj = 0; jj < jnum; jj++) - if (jlist[jj] >= nlocal) break; - while (jj < jnum) { - if (jlist[jj] == e_nall) jlist[jj] = nall_offset; - else jlist[jj] -= ghost_offset; - jj++; - } - } - } - #endif - } // end omp - #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) - *timer_compute = MIC_Wtime() - *timer_compute; - #endif - } // end offload - - #ifdef _LMP_INTEL_OFFLOAD - if (offload) { - _fix->stop_watch(TIME_OFFLOAD_LATENCY); - _fix->start_watch(TIME_HOST_NEIGHBOR); - for (int n = 0; n < aend; n++) { - ilist[n] = n; - numneigh[n] = 0; - } - } else { - for (int i = astart; i < aend; i++) - list->firstneigh[i] = firstneigh + cnumneigh[i]; - if (separate_buffers) { - _fix->start_watch(TIME_PACK); - _fix->set_neighbor_host_sizes(); - buffers->pack_sep_from_single(_fix->host_min_local(), - _fix->host_used_local(), - _fix->host_min_ghost(), - _fix->host_used_ghost()); - _fix->stop_watch(TIME_PACK); - } - } - #else - for (int i = astart; i < aend; i++) - list->firstneigh[i] = firstneigh + cnumneigh[i]; + bin_newton<flt_t,acc_t,0,0,0,1,0>(0, list, buffers, host_start, nlocal); #endif } diff --git a/src/USER-INTEL/npair_half_bin_newton_tri_intel.h b/src/USER-INTEL/npair_half_bin_newton_tri_intel.h index d1b9ee9cd1d35103f7d9390befb152b0bf6f161d..7a7f4c803003e0f5605204c8121c5cb7cc805e98 100644 --- a/src/USER-INTEL/npair_half_bin_newton_tri_intel.h +++ b/src/USER-INTEL/npair_half_bin_newton_tri_intel.h @@ -36,9 +36,6 @@ class NPairHalfBinNewtonTriIntel : public NPairIntel { private: template <class flt_t, class acc_t> void hbnti(NeighList *, IntelBuffers<flt_t,acc_t> *); - template <class flt_t, class acc_t, int, int> - void hbnti(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int, - const int, const int offload_end = 0); }; } diff --git a/src/USER-INTEL/npair_intel.cpp b/src/USER-INTEL/npair_intel.cpp index bffb31b71014ccad56b6a26f20798faa92b7314e..b20b1dcd08d95cc3afed1cbe50d6b0153d3eeb32 100644 --- a/src/USER-INTEL/npair_intel.cpp +++ b/src/USER-INTEL/npair_intel.cpp @@ -40,7 +40,7 @@ NPairIntel::~NPairIntel() { #ifdef _LMP_INTEL_OFFLOAD if (_off_map_stencil) { const int * stencil = this->stencil; - #pragma offload_transfer target(mic:_cop) \ + #pragma offload_transfer target(mic:_cop) \ nocopy(stencil:alloc_if(0) free_if(1)) } #endif @@ -48,6 +48,678 @@ NPairIntel::~NPairIntel() { /* ---------------------------------------------------------------------- */ +template <class flt_t, class acc_t, int offload_noghost, int need_ic, + int FULL, int TRI, int THREE> +void NPairIntel::bin_newton(const int offload, NeighList *list, + IntelBuffers<flt_t,acc_t> *buffers, + const int astart, const int aend, + const int offload_end) { + + if (aend-astart == 0) return; + + const int nall = atom->nlocal + atom->nghost; + int pad = 1; + int nall_t = nall; + + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost && offload) nall_t = atom->nlocal; + if (THREE == 0 && offload) { + if (INTEL_MIC_NBOR_PAD > 1) + pad = INTEL_MIC_NBOR_PAD * sizeof(float) / sizeof(flt_t); + } else + #endif + if (THREE == 0 && INTEL_NBOR_PAD > 1) + pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t); + const int pad_width = pad; + const int pack_width = _fix->nbor_pack_width(); + + const ATOM_T * _noalias const x = buffers->get_x(); + int * _noalias const firstneigh = buffers->firstneigh(list); + const int e_nall = nall_t; + + const int molecular = atom->molecular; + int *ns = NULL; + tagint *s = NULL; + int tag_size = 0, special_size; + if (buffers->need_tag()) tag_size = e_nall; + if (molecular) { + s = atom->special[0]; + ns = atom->nspecial[0]; + special_size = aend; + } else { + s = &buffers->_special_holder; + ns = &buffers->_nspecial_holder; + special_size = 0; + } + const tagint * _noalias const special = s; + const int * _noalias const nspecial = ns; + const int maxspecial = atom->maxspecial; + const tagint * _noalias const tag = atom->tag; + + int * _noalias const ilist = list->ilist; + int * _noalias numneigh = list->numneigh; + int * _noalias const cnumneigh = buffers->cnumneigh(list); + const int nstencil = this->nstencil; + const int * _noalias const stencil = this->stencil; + const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0]; + const int ntypes = atom->ntypes + 1; + const int nlocal = atom->nlocal; + + #ifndef _LMP_INTEL_OFFLOAD + int * const mask = atom->mask; + tagint * const molecule = atom->molecule; + #endif + + int tnum; + int *overflow; + double *timer_compute; + #ifdef _LMP_INTEL_OFFLOAD + if (offload) { + timer_compute = _fix->off_watch_neighbor(); + tnum = buffers->get_off_threads(); + overflow = _fix->get_off_overflow_flag(); + _fix->stop_watch(TIME_HOST_NEIGHBOR); + _fix->start_watch(TIME_OFFLOAD_LATENCY); + } else + #endif + { + tnum = comm->nthreads; + overflow = _fix->get_overflow_flag(); + } + const int nthreads = tnum; + const int maxnbors = buffers->get_max_nbors(); + int * _noalias const atombin = buffers->get_atombin(); + const int * _noalias const binpacked = buffers->get_binpacked(); + + const int xperiodic = domain->xperiodic; + const int yperiodic = domain->yperiodic; + const int zperiodic = domain->zperiodic; + const flt_t xprd_half = domain->xprd_half; + const flt_t yprd_half = domain->yprd_half; + const flt_t zprd_half = domain->zprd_half; + + flt_t * _noalias const ncachex = buffers->get_ncachex(); + flt_t * _noalias const ncachey = buffers->get_ncachey(); + flt_t * _noalias const ncachez = buffers->get_ncachez(); + int * _noalias const ncachej = buffers->get_ncachej(); + int * _noalias const ncachejtype = buffers->get_ncachejtype(); + const int ncache_stride = buffers->ncache_stride(); + + #ifdef _LMP_INTEL_OFFLOAD + const int * _noalias const binhead = this->binhead; + const int * _noalias const bins = this->bins; + const int cop = _fix->coprocessor_number(); + const int separate_buffers = _fix->separate_buffers(); + #pragma offload target(mic:cop) if(offload) \ + in(x:length(e_nall+1) alloc_if(0) free_if(0)) \ + in(tag:length(tag_size) alloc_if(0) free_if(0)) \ + in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \ + in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \ + in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \ + in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \ + in(cutneighsq:length(0) alloc_if(0) free_if(0)) \ + in(firstneigh:length(0) alloc_if(0) free_if(0)) \ + in(cnumneigh:length(0) alloc_if(0) free_if(0)) \ + out(numneigh:length(0) alloc_if(0) free_if(0)) \ + in(ilist:length(0) alloc_if(0) free_if(0)) \ + in(atombin:length(aend) alloc_if(0) free_if(0)) \ + in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ + in(ncachex,ncachey,ncachez,ncachej:length(0) alloc_if(0) free_if(0)) \ + in(ncachejtype:length(0) alloc_if(0) free_if(0)) \ + in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \ + in(pad_width,offload_end,separate_buffers,astart,aend,nlocal,molecular) \ + in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \ + in(pack_width) \ + out(overflow:length(5) alloc_if(0) free_if(0)) \ + out(timer_compute:length(1) alloc_if(0) free_if(0)) \ + signal(tag) + #endif + { + #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) + *timer_compute = MIC_Wtime(); + #endif + + #ifdef _LMP_INTEL_OFFLOAD + overflow[LMP_LOCAL_MIN] = astart; + overflow[LMP_LOCAL_MAX] = aend - 1; + overflow[LMP_GHOST_MIN] = e_nall; + overflow[LMP_GHOST_MAX] = -1; + #endif + + int nstencilp = 0; + int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL]; + for (int k = 0; k < nstencil; k++) { + binstart[nstencilp] = stencil[k]; + int end = stencil[k] + 1; + for (int kk = k + 1; kk < nstencil; kk++) { + if (stencil[kk-1]+1 == stencil[kk]) { + end++; + k++; + } else break; + } + binend[nstencilp] = end; + nstencilp++; + } + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(numneigh, overflow, nstencilp, binstart, binend) + #endif + { + #ifdef _LMP_INTEL_OFFLOAD + int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1; + #endif + + const int num = aend - astart; + int tid, ifrom, ito; + + if (THREE) { + IP_PRE_omp_range_id_vec(ifrom, ito, tid, num, nthreads, pack_width); + } else { + IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads); + } + ifrom += astart; + ito += astart; + int e_ito = ito; + if (THREE && ito == num) { + int imod = ito % pack_width; + if (imod) e_ito += pack_width - imod; + } + const int list_size = (e_ito + tid * 2 + 2) * maxnbors; + + int which; + + int pack_offset = maxnbors; + if (THREE) pack_offset *= pack_width; + int ct = (ifrom + tid * 2) * maxnbors; + int *neighptr = firstneigh + ct; + const int obound = pack_offset + maxnbors * 2; + + const int toffs = tid * ncache_stride; + flt_t * _noalias const tx = ncachex + toffs; + flt_t * _noalias const ty = ncachey + toffs; + flt_t * _noalias const tz = ncachez + toffs; + int * _noalias const tj = ncachej + toffs; + int * _noalias const tjtype = ncachejtype + toffs; + + flt_t * _noalias itx; + flt_t * _noalias ity; + flt_t * _noalias itz; + int * _noalias itj; + int * _noalias itjtype; + + // loop over all atoms in other bins in stencil, store every pair + int istart, icount, ncount, oldbin = -9999999, lane, max_chunk; + if (THREE) { + lane = 0; + max_chunk = 0; + } + for (int i = ifrom; i < ito; i++) { + const flt_t xtmp = x[i].x; + const flt_t ytmp = x[i].y; + const flt_t ztmp = x[i].z; + const int itype = x[i].w; + tagint itag; + if (THREE) itag = tag[i]; + const int ioffset = ntypes * itype; + + const int ibin = atombin[i]; + if (ibin != oldbin) { + oldbin = ibin; + ncount = 0; + for (int k = 0; k < nstencilp; k++) { + const int bstart = binhead[ibin + binstart[k]]; + const int bend = binhead[ibin + binend[k]]; + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd + #endif + for (int jj = bstart; jj < bend; jj++) + tj[ncount++] = binpacked[jj]; + } + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd + #endif + for (int u = 0; u < ncount; u++) { + const int j = tj[u]; + tx[u] = x[j].x; + ty[u] = x[j].y; + tz[u] = x[j].z; + tjtype[u] = x[j].w; + } + + if (FULL == 0 || TRI == 1) { + icount = 0; + istart = ncount; + const int alignb = INTEL_DATA_ALIGN / sizeof(int); + int nedge = istart % alignb; + if (nedge) istart + (alignb - nedge); + itx = tx + istart; + ity = ty + istart; + itz = tz + istart; + itj = tj + istart; + itjtype = tjtype + istart; + + const int bstart = binhead[ibin]; + const int bend = binhead[ibin + 1]; + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd + #endif + for (int jj = bstart; jj < bend; jj++) { + const int j = binpacked[jj]; + itj[icount] = j; + itx[icount] = x[j].x; + ity[icount] = x[j].y; + itz[icount] = x[j].z; + itjtype[icount] = x[j].w; + icount++; + } + if (icount + istart > obound) *overflow = 1; + } else + if (ncount > obound) *overflow = 1; + } + + // ---------------------- Loop over i bin + + int n = 0; + if (FULL == 0 || TRI == 1) { + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma ivdep + #endif + for (int u = 0; u < icount; u++) { + int addme = 1; + int j = itj[u]; + + // Cutoff Check + const flt_t delx = xtmp - itx[u]; + const flt_t dely = ytmp - ity[u]; + const flt_t delz = ztmp - itz[u]; + const int jtype = itjtype[u]; + const flt_t rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutneighsq[ioffset + jtype]) addme = 0; + + // i bin (half) check and offload ghost check + if (j < nlocal) { + const int ijmod = (i + j) % 2; + if (i > j) { + if (ijmod == 0) addme = 0; + } else if (i < j) { + if (ijmod == 1) addme = 0; + } else + addme = 0; + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost && i < offload_end) addme = 0; + #endif + } else { + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost && offload) addme = 0; + #endif + if (itz[u] < ztmp) addme = 0; + if (itz[u] == ztmp) { + if (ity[u] < ytmp) addme = 0; + if (ity[u] == ytmp && itx[u] < xtmp) addme = 0; + } + } + + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + j = -j - 1; + } + + if (addme) + neighptr[n++] = j; + } + } // if FULL==0 + + // ---------------------- Loop over other bins + + int n2, *neighptr2; + if (THREE) { + n = pack_offset; + n2 = pack_offset + maxnbors; + neighptr2 = neighptr; + } + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma ivdep + #endif + for (int u = 0; u < ncount; u++) { + int addme = 1; + int j = tj[u]; + + if (FULL) + if (i == j) addme = 0; + + // Cutoff Check + const flt_t delx = xtmp - tx[u]; + const flt_t dely = ytmp - ty[u]; + const flt_t delz = ztmp - tz[u]; + const int jtype = tjtype[u]; + const flt_t rsq = delx * delx + dely * dely + delz * delz; + if (rsq > cutneighsq[ioffset + jtype]) addme = 0; + + // Triclinic + if (TRI) { + if (tz[u] < ztmp) addme = 0; + if (tz[u] == ztmp) { + if (ty[u] < ytmp) addme = 0; + if (ty[u] == ytmp) { + if (tx[u] < xtmp) addme = 0; + if (tx[u] == xtmp && j <= i) addme = 0; + } + } + } + + // offload ghost check + #ifdef _LMP_INTEL_OFFLOAD + if (offload_noghost) { + if (j < nlocal) { + if (i < offload_end) addme = 0; + } else if (offload) addme = 0; + } + #endif + + int pj; + if (THREE) pj = j; + if (need_ic) { + int no_special; + ominimum_image_check(no_special, delx, dely, delz); + if (no_special) + j = -j - 1; + } + + if (THREE) { + const int jtag = tag[pj]; + int flist = 0; + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) flist = 1; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) flist = 1; + } else { + if (tz[u] < ztmp) flist = 1; + else if (tz[u] == ztmp && ty[u] < ytmp) flist = 1; + else if (tz[u] == ztmp && ty[u] == ytmp && tx[u] < xtmp) + flist = 1; + } + if (addme) { + if (flist) + neighptr2[n2++] = j; + else + neighptr[n++] = j; + } + } else { + if (addme) + neighptr[n++] = j; + } + } // for u + + #ifndef _LMP_INTEL_OFFLOAD + if (exclude) { + int alln = n; + if (THREE) n = pack_offset; + else n = 0; + for (int u = pack_offset; u < alln; u++) { + const int j = neighptr[u]; + int pj = j; + if (need_ic) + if (pj < 0) pj = -j - 1; + const int jtype = x[pj].w; + if (exclusion(i,pj,itype,jtype,mask,molecule)) continue; + neighptr[n++] = j; + } + if (THREE) { + alln = n2; + n2 = pack_offset + maxnbors; + for (int u = pack_offset + maxnbors; u < alln; u++) { + const int j = neighptr[u]; + int pj = j; + if (need_ic) + if (pj < 0) pj = -j - 1; + const int jtype = x[pj].w; + if (exclusion(i,pj,itype,jtype,mask,molecule)) continue; + neighptr[n2++] = j; + } + } + } + #endif + int ns; + if (THREE) { + int alln = n; + ns = n - pack_offset; + atombin[i] = ns; + n = lane; + for (int u = pack_offset; u < alln; u++) { + neighptr[n] = neighptr[u]; + n += pack_width; + } + ns += n2 - pack_offset - maxnbors; + for (int u = pack_offset + maxnbors; u < n2; u++) { + neighptr[n] = neighptr[u]; + n += pack_width; + } + if (ns > maxnbors) *overflow = 1; + } else + if (n > maxnbors) *overflow = 1; + + ilist[i] = i; + cnumneigh[i] = ct; + if (THREE) { + cnumneigh[i] += lane; + numneigh[i] = ns; + } else { + int edge = (n % pad_width); + if (edge) { + const int pad_end = n + (pad_width - edge); + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \ + avg=INTEL_COMPILE_WIDTH/2 + #endif + for ( ; n < pad_end; n++) + neighptr[n] = e_nall; + } + numneigh[i] = n; + } + + if (THREE) { + if (ns > max_chunk) max_chunk = ns; + lane++; + if (lane == pack_width) { + ct += max_chunk * pack_width; + const int alignb = (INTEL_DATA_ALIGN / sizeof(int)); + const int edge = (ct % alignb); + if (edge) ct += alignb - edge; + neighptr = firstneigh + ct; + max_chunk = 0; + pack_offset = maxnbors * pack_width; + lane = 0; + if (ct + obound > list_size) { + if (i < ito - 1) { + *overflow = 1; + ct = (ifrom + tid * 2) * maxnbors; + } + } + } + } else { + ct += n; + const int alignb = (INTEL_DATA_ALIGN / sizeof(int)); + const int edge = (ct % alignb); + if (edge) ct += alignb - edge; + neighptr = firstneigh + ct; + if (ct + obound > list_size) { + if (i < ito - 1) { + *overflow = 1; + ct = (ifrom + tid * 2) * maxnbors; + } + } + } + } + + if (*overflow == 1) + for (int i = ifrom; i < ito; i++) + numneigh[i] = 0; + + #ifdef _LMP_INTEL_OFFLOAD + int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax; + int ghost_offset = 0, nall_offset = e_nall; + if (separate_buffers) { + for (int i = ifrom; i < ito; ++i) { + int * _noalias jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + #if __INTEL_COMPILER+0 > 1499 + #pragma vector aligned + #pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin) + #endif + for (int jj = 0; jj < jnum; jj++) { + int j = jlist[jj]; + if (need_ic && j < 0) j = -j - 1; + if (j < nlocal) { + if (j < vlmin) vlmin = j; + if (j > vlmax) vlmax = j; + } else { + if (j < vgmin) vgmin = j; + if (j > vgmax) vgmax = j; + } + } + } + lmin = MIN(lmin,vlmin); + gmin = MIN(gmin,vgmin); + lmax = MAX(lmax,vlmax); + gmax = MAX(gmax,vgmax); + + #if defined(_OPENMP) + #pragma omp critical + #endif + { + if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin; + if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax; + if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin; + if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax; + } + #pragma omp barrier + + int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN]; + if (nghost < 0) nghost = 0; + if (offload) { + ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1; + nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost; + } else { + ghost_offset = overflow[LMP_GHOST_MIN] - nlocal; + nall_offset = nlocal + nghost; + } + } // if separate_buffers + #endif + + if (molecular) { + for (int i = ifrom; i < ito; ++i) { + int * _noalias jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + + if (THREE) { + const int trip = jnum * pack_width; + for (int jj = 0; jj < trip; jj+=pack_width) { + const int j = jlist[jj]; + if (need_ic && j < 0) { + which = 0; + jlist[jj] = -j - 1; + } else + ofind_special(which, special, nspecial, i, tag[j]); + #ifdef _LMP_INTEL_OFFLOAD + if (j >= nlocal) { + if (j == e_nall) + jlist[jj] = nall_offset; + else if (which) + jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); + else jlist[jj]-=ghost_offset; + } else + #endif + if (which) jlist[jj] = j ^ (which << SBBITS); + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd + #endif + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj]; + if (need_ic && j < 0) { + which = 0; + jlist[jj] = -j - 1; + } else + ofind_special(which, special, nspecial, i, tag[j]); + #ifdef _LMP_INTEL_OFFLOAD + if (j >= nlocal) { + if (j == e_nall) + jlist[jj] = nall_offset; + else if (which) + jlist[jj] = (j-ghost_offset) ^ (which << SBBITS); + else jlist[jj]-=ghost_offset; + } else + #endif + if (which) jlist[jj] = j ^ (which << SBBITS); + } + } + } // for i + } // if molecular + #ifdef _LMP_INTEL_OFFLOAD + else if (separate_buffers) { + for (int i = ifrom; i < ito; ++i) { + int * _noalias jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + int jj = 0; + #pragma vector aligned + #pragma simd + for (jj = 0; jj < jnum; jj++) { + if (jlist[jj] >= nlocal) { + if (jlist[jj] == e_nall) jlist[jj] = nall_offset; + else jlist[jj] -= ghost_offset; + } + } + } + } + #endif + } // end omp + #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) + *timer_compute = MIC_Wtime() - *timer_compute; + #endif + } // end offload + + #ifdef _LMP_INTEL_OFFLOAD + if (offload) { + _fix->stop_watch(TIME_OFFLOAD_LATENCY); + _fix->start_watch(TIME_HOST_NEIGHBOR); + for (int n = 0; n < aend; n++) { + ilist[n] = n; + numneigh[n] = 0; + } + } else { + for (int i = astart; i < aend; i++) + list->firstneigh[i] = firstneigh + cnumneigh[i]; + if (separate_buffers) { + _fix->start_watch(TIME_PACK); + _fix->set_neighbor_host_sizes(); + buffers->pack_sep_from_single(_fix->host_min_local(), + _fix->host_used_local(), + _fix->host_min_ghost(), + _fix->host_used_ghost()); + _fix->stop_watch(TIME_PACK); + } + } + #else + #pragma vector aligned + #pragma simd + for (int i = astart; i < aend; i++) + list->firstneigh[i] = firstneigh + cnumneigh[i]; + #endif +} + +/* ---------------------------------------------------------------------- */ + #ifdef _LMP_INTEL_OFFLOAD void NPairIntel::grow_stencil() { @@ -60,8 +732,206 @@ void NPairIntel::grow_stencil() _off_map_stencil = stencil; const int * stencil = _off_map_stencil; const int maxstencil = ns->get_maxstencil(); - #pragma offload_transfer target(mic:_cop) \ + #pragma offload_transfer target(mic:_cop) \ in(stencil:length(maxstencil) alloc_if(1) free_if(0)) - } + } } #endif + +/* ---------------------------------------------------------------------- */ + +// ---- Half, no IC + +template void NPairIntel::bin_newton<float, float, 0, 0, 0, 0, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 0, 0, 0, 0, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 0, 0, 0, 0, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- Half, IC + +template void NPairIntel::bin_newton<float, float, 0, 1, 0, 0, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 0, 1, 0, 0, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 0, 1, 0, 0, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- Tri, no IC + +template void NPairIntel::bin_newton<float, float, 0, 0, 0, 1, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 0, 0, 0, 1, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 0, 0, 0, 1, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- Tri, IC + +template void NPairIntel::bin_newton<float, float, 0, 1, 0, 1, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 0, 1, 0, 1, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 0, 1, 0, 1, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- Full, no IC + +template void NPairIntel::bin_newton<float, float, 0, 0, 1, 0, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 0, 0, 1, 0, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 0, 0, 1, 0, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- Full, IC + +template void NPairIntel::bin_newton<float, float, 0, 1, 1, 0, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 0, 1, 1, 0, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 0, 1, 1, 0, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- 3-body, no IC + +template void NPairIntel::bin_newton<float, float, 0, 0, 1, 0, 1> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 0, 0, 1, 0, 1> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 0, 0, 1, 0, 1> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- 3-body, IC + +template void NPairIntel::bin_newton<float, float, 0, 1, 1, 0, 1> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 0, 1, 1, 0, 1> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 0, 1, 1, 0, 1> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +#ifdef _LMP_INTEL_OFFLOAD + +// ---- Half, no IC, no ghost + +template void NPairIntel::bin_newton<float, float, 1, 0, 0, 0, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 1, 0, 0, 0, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 1, 0, 0, 0, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- Half, IC, no ghost + +template void NPairIntel::bin_newton<float, float, 1, 1, 0, 0, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 1, 1, 0, 0, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 1, 1, 0, 0, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- Tri, no IC, no ghost + +template void NPairIntel::bin_newton<float, float, 1, 0, 0, 1, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 1, 0, 0, 1, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 1, 0, 0, 1, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- Tri, IC, no ghost + +template void NPairIntel::bin_newton<float, float, 1, 1, 0, 1, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 1, 1, 0, 1, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 1, 1, 0, 1, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- Full, no IC, no ghost + +template void NPairIntel::bin_newton<float, float, 1, 0, 1, 0, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 1, 0, 1, 0, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 1, 0, 1, 0, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- Full, IC, no ghost + +template void NPairIntel::bin_newton<float, float, 1, 1, 1, 0, 0> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 1, 1, 1, 0, 0> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 1, 1, 1, 0, 0> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- 3-body, no IC, no ghost + +template void NPairIntel::bin_newton<float, float, 1, 0, 1, 0, 1> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 1, 0, 1, 0, 1> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 1, 0, 1, 0, 1> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +// ---- 3-body, IC, no ghost + +template void NPairIntel::bin_newton<float, float, 1, 1, 1, 0, 1> + (const int, NeighList *, IntelBuffers<float,float> *, const int, const int, + const int); +template void NPairIntel::bin_newton<float, double, 1, 1, 1, 0, 1> + (const int, NeighList *, IntelBuffers<float,double> *, const int, const int, + const int); +template void NPairIntel::bin_newton<double, double, 1, 1, 1, 0, 1> + (const int, NeighList *, IntelBuffers<double,double> *, const int, const int, + const int); + +#endif diff --git a/src/USER-INTEL/npair_intel.h b/src/USER-INTEL/npair_intel.h index 06d5d79cacaa0e7af28fdfed08001d5b392cf6c2..55a529b2cb53a044b20a3a0f21f7ce9480ca6e89 100644 --- a/src/USER-INTEL/npair_intel.h +++ b/src/USER-INTEL/npair_intel.h @@ -25,10 +25,6 @@ #include "intel_simd.h" #endif -#ifdef OUTER_CHUNK -#include "intel_simd.h" -#endif - #ifdef _LMP_INTEL_OFFLOAD #pragma offload_attribute(push,target(mic)) #endif @@ -87,6 +83,10 @@ class NPairIntel : public NPair { protected: FixIntel *_fix; + template <class flt_t, class acc_t, int, int, int, int, int> + void bin_newton(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, + const int, const int, const int offload_end = 0); + #ifdef _LMP_INTEL_OFFLOAD int _cop; int *_off_map_stencil; diff --git a/src/USER-INTEL/pair_buck_coul_cut_intel.cpp b/src/USER-INTEL/pair_buck_coul_cut_intel.cpp index 4f34a484cb760de22c68135b50ccfb80d2c5e0db..07beae1e41683d781b2f80df9dbe2e1d790c66cb 100644 --- a/src/USER-INTEL/pair_buck_coul_cut_intel.cpp +++ b/src/USER-INTEL/pair_buck_coul_cut_intel.cpp @@ -55,7 +55,7 @@ PairBuckCoulCutIntel::~PairBuckCoulCutIntel() void PairBuckCoulCutIntel::compute(int eflag, int vflag) { if (fix->precision()==FixIntel::PREC_MODE_MIXED) - compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), + compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), force_const_single); else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE) compute<double,double>(eflag, vflag, fix->get_double_buffers(), @@ -70,8 +70,8 @@ void PairBuckCoulCutIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void PairBuckCoulCutIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) { ev_setup(eflag,vflag); @@ -85,57 +85,51 @@ void PairBuckCoulCutIntel::compute(int eflag, int vflag, if (ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); + + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; - IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, - nthreads, sizeof(ATOM_T)); + IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, + packthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom,ito,ago); } fix->stop_watch(TIME_PACK); } - - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - if (force->newton_pair) { - eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); - } + + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + if (force->newton_pair) { + eval<1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1>(0, ovflag, buffers, fc, host_start, inum); } else { - if (force->newton_pair) { - eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); - } + eval<1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { - eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0>(0, 0, buffers, fc, host_start, inum); + eval<0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0>(0, ovflag, buffers, fc, host_start, inum); } } } /* ---------------------------------------------------------------------- */ -template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> +template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void PairBuckCoulCutIntel::eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc, - const int astart, const int aend) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc, + const int astart, const int aend) { const int inum = aend - astart; if (inum == 0) return; @@ -165,9 +159,9 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, - buffers, offload, fix, separate_flag, - x_size, q_size, ev_size, f_stride); + IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; @@ -204,31 +198,30 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, *timer_compute = MIC_Wtime(); #endif - IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, - f_stride, x, q); + IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, + f_stride, x, q); acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = oecoul = (acc_t)0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; - } + if (EFLAG) oevdwl = oecoul = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; // loop over neighbors of my atoms #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int iifrom, iito, tid; - IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + int iifrom, iip, iito, tid; + IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads); iifrom += astart; iito += astart; - FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride); - memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + int foff; + if (NEWTON_PAIR) foff = tid * f_stride - minlocal; + else foff = -minlocal; + FORCE_T * _noalias const f = f_start + foff; + if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); - for (int i = iifrom; i < iito; ++i) { + for (int i = iifrom; i < iito; i += iip) { const int itype = x[i].w; const int ptr_off = itype * ntypes; @@ -240,21 +233,20 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, acc_t fxtmp,fytmp,fztmp,fwtmp; acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5; - + const flt_t xtmp = x[i].x; const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; const flt_t qtmp = q[i]; fxtmp = fytmp = fztmp = (acc_t)0; - if (EVFLAG) { - if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; + if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; + if (NEWTON_PAIR == 0) if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; - } #if defined(LMP_SIMD_COMPILER) #pragma vector aligned - #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ - sv0, sv1, sv2, sv3, sv4, sv5) + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ + sv0, sv1, sv2, sv3, sv4, sv5) #endif for (int jj = 0; jj < jnum; jj++) { flt_t forcecoul, forcebuck, evdwl, ecoul; @@ -270,19 +262,19 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, const flt_t rsq = delx * delx + dely * dely + delz * delz; const flt_t r = sqrt(rsq); const flt_t r2inv = (flt_t)1.0 / rsq; - - #ifdef INTEL_VMASK + + #ifdef INTEL_VMASK if (rsq < c_cuti[jtype].cut_coulsq) { #endif forcecoul = qqrd2e * qtmp*q[j]/r; - if (EFLAG) + if (EFLAG) ecoul = forcecoul; if (sbindex){ const flt_t factor_coul = special_coul[sbindex]; forcecoul *= factor_coul; if(EFLAG) ecoul *= factor_coul; - + } #ifdef INTEL_VMASK } @@ -290,7 +282,7 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, if (rsq >= c_cuti[jtype].cut_coulsq) { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; } #endif - + #ifdef INTEL_VMASK if (rsq < c_cuti[jtype].cut_ljsq) { #endif @@ -298,14 +290,14 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, flt_t rexp = exp(-r * c_forcei[jtype].rhoinv); forcebuck = r * rexp * c_forcei[jtype].buck1 - r6inv * c_forcei[jtype].buck2; - if (EFLAG) + if (EFLAG) evdwl = rexp * c_energyi[jtype].a - r6inv * c_energyi[jtype].c - c_energyi[jtype].offset; if (sbindex) { const flt_t factor_lj = special_lj[sbindex]; forcebuck *= factor_lj; - if (EFLAG) + if (EFLAG) evdwl *= factor_lj; } #ifdef INTEL_VMASK @@ -319,71 +311,72 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, if (rsq < c_cuti[jtype].cutsq) { #endif const flt_t fpair = (forcecoul + forcebuck) * r2inv; - fxtmp += delx * fpair; - fytmp += dely * fpair; - fztmp += delz * fpair; - if (NEWTON_PAIR || j < nlocal) { - f[j].x -= delx * fpair; - f[j].y -= dely * fpair; - f[j].z -= delz * fpair; - } - - if (EVFLAG) { - flt_t ev_pre = (flt_t)0; - if (NEWTON_PAIR || i < nlocal) - ev_pre += (flt_t)0.5; - if (NEWTON_PAIR || j < nlocal) - ev_pre += (flt_t)0.5; - - if (EFLAG) { - sevdwl += ev_pre * evdwl; - secoul += ev_pre * ecoul; - if (eatom) { - if (NEWTON_PAIR || i < nlocal) - fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; - if (NEWTON_PAIR || j < nlocal) - f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; - } + const flt_t fpx = fpair * delx; + fxtmp += fpx; + if (NEWTON_PAIR) f[j].x -= fpx; + const flt_t fpy = fpair * dely; + fytmp += fpy; + if (NEWTON_PAIR) f[j].y -= fpy; + const flt_t fpz = fpair * delz; + fztmp += fpz; + if (NEWTON_PAIR) f[j].z -= fpz; + + + if (EFLAG) { + sevdwl += evdwl; + secoul += ecoul; + if (eatom) { + fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; + if (NEWTON_PAIR) + f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; } - IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz); } + if (NEWTON_PAIR == 0) + IP_PRE_ev_tally_nborv(vflag, delx, dely, delz, fpx, fpy, fpz); #ifdef INTEL_VMASK } #endif } // for jj - - f[i].x += fxtmp; - f[i].y += fytmp; - f[i].z += fztmp; - - IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp); + if (NEWTON_PAIR) { + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } else { + f[i].x = fxtmp; + f[i].y = fytmp; + f[i].z = fztmp; + } + IP_PRE_ev_tally_atomq(NEWTON_PAIR, EFLAG, vflag, f, fwtmp); } // for ii - #ifndef _LMP_INTEL_OFFLOAD - if (vflag == 2) - #endif - { - #if defined(_OPENMP) - #pragma omp barrier - #endif - IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, - nlocal, minlocal, nthreads, f_start, f_stride, - x, offload); - } + IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start, + f_stride, x, offload, vflag, ov0, ov1, ov2, ov3, + ov4, ov5); } // end of omp parallel region - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = oecoul; - } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; + + IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5; + ev_global[0] = oevdwl; + ev_global[1] = oecoul; + } + if (vflag) { + if (NEWTON_PAIR == 0) { + ov0 *= (acc_t)0.5; + ov1 *= (acc_t)0.5; + ov2 *= (acc_t)0.5; + ov3 *= (acc_t)0.5; + ov4 *= (acc_t)0.5; + ov5 *= (acc_t)0.5; } + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) *timer_compute = MIC_Wtime() - *timer_compute; @@ -395,7 +388,7 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); else fix->add_result_array(f_start, 0, offload); @@ -406,6 +399,10 @@ void PairBuckCoulCutIntel::eval(const int offload, const int vflag, void PairBuckCoulCutIntel::init_style() { PairBuckCoulCut::init_style(); + if (force->newton_pair == 0) { + neighbor->requests[neighbor->nrequest-1]->half = 0; + neighbor->requests[neighbor->nrequest-1]->full = 1; + } neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); @@ -413,7 +410,7 @@ void PairBuckCoulCutIntel::init_style() error->all(FLERR, "The 'package intel' command is required for /intel styles"); fix = static_cast<FixIntel *>(modify->fix[ifix]); - + fix->pair_init_check(); #ifdef _LMP_INTEL_OFFLOAD _cop = fix->coprocessor_number(); @@ -495,9 +492,9 @@ void PairBuckCoulCutIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void PairBuckCoulCutIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, - const int ntable, - Memory *memory, - const int cop) { + const int ntable, + Memory *memory, + const int cop) { if ( (ntypes != _ntypes || ntable != _ntable) ) { if (_ntypes > 0) { #ifdef _LMP_INTEL_OFFLOAD @@ -508,12 +505,12 @@ void PairBuckCoulCutIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, c_cut_t * oc_cut = c_cut[0]; if (ospecial_lj != NULL && oc_force != NULL && oc_cut != NULL && - oc_energy != NULL && ospecial_coul != NULL && + oc_energy != NULL && ospecial_coul != NULL && _cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \ nocopy(oc_force, oc_energy: alloc_if(0) free_if(1)) \ - nocopy(oc_cut: alloc_if(0) free_if(1)) + nocopy(oc_cut: alloc_if(0) free_if(1)) } #endif @@ -537,7 +534,7 @@ void PairBuckCoulCutIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, c_cut_t * oc_cut = c_cut[0]; int tp1sq = ntypes*ntypes; if (ospecial_lj != NULL && oc_force != NULL && oc_cut != NULL && - oc_energy != NULL && ospecial_coul != NULL && + oc_energy != NULL && ospecial_coul != NULL && cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \ diff --git a/src/USER-INTEL/pair_buck_coul_cut_intel.h b/src/USER-INTEL/pair_buck_coul_cut_intel.h index 6590cd9c16041b700164033e6b0019f535234dfb..72043239035fdaec85d0f29c853470fed8be6782 100644 --- a/src/USER-INTEL/pair_buck_coul_cut_intel.h +++ b/src/USER-INTEL/pair_buck_coul_cut_intel.h @@ -49,10 +49,10 @@ class PairBuckCoulCutIntel : public PairBuckCoulCut { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> * buffers, const ForceConst<flt_t> &fc); - template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> + template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc, const int astart, const int aend); + IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc, const int astart, const int aend); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, @@ -75,7 +75,7 @@ class PairBuckCoulCutIntel : public PairBuckCoulCut { ~ForceConst() { set_ntypes(0,0,NULL,_cop); } void set_ntypes(const int ntypes, const int ntable, Memory *memory, - const int cop); + const int cop); private: int _ntypes, _ntable, _cop; diff --git a/src/USER-INTEL/pair_buck_coul_long_intel.cpp b/src/USER-INTEL/pair_buck_coul_long_intel.cpp index 9319f531e159b9e7849528d905ab30475f894264..995e2e85839d3503fdefd8cc9100a7d49a78f19c 100644 --- a/src/USER-INTEL/pair_buck_coul_long_intel.cpp +++ b/src/USER-INTEL/pair_buck_coul_long_intel.cpp @@ -55,7 +55,7 @@ PairBuckCoulLongIntel::~PairBuckCoulLongIntel() void PairBuckCoulLongIntel::compute(int eflag, int vflag) { if (fix->precision()==FixIntel::PREC_MODE_MIXED) - compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), + compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), force_const_single); else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE) compute<double,double>(eflag, vflag, fix->get_double_buffers(), @@ -70,8 +70,8 @@ void PairBuckCoulLongIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void PairBuckCoulLongIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) { ev_setup(eflag,vflag); @@ -85,57 +85,51 @@ void PairBuckCoulLongIntel::compute(int eflag, int vflag, if (_lrt == 0 && ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); + + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; - IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, - nthreads, sizeof(ATOM_T)); + IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, + packthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom,ito,ago); } fix->stop_watch(TIME_PACK); } - - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - if (force->newton_pair) { - eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); - } + + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + if (force->newton_pair) { + eval<1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1>(0, ovflag, buffers, fc, host_start, inum); } else { - if (force->newton_pair) { - eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); - } + eval<1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { - eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0>(0, 0, buffers, fc, host_start, inum); + eval<0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0>(0, ovflag, buffers, fc, host_start, inum); } } } /* ---------------------------------------------------------------------- */ -template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> +template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void PairBuckCoulLongIntel::eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc, - const int astart, const int aend) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc, + const int astart, const int aend) { const int inum = aend - astart; if (inum == 0) return; @@ -170,11 +164,19 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag, const int ntypes = atom->ntypes + 1; const int eatom = this->eflag_atom; + flt_t * _noalias const ccachex = buffers->get_ccachex(); + flt_t * _noalias const ccachey = buffers->get_ccachey(); + flt_t * _noalias const ccachez = buffers->get_ccachez(); + flt_t * _noalias const ccachew = buffers->get_ccachew(); + int * _noalias const ccachei = buffers->get_ccachei(); + int * _noalias const ccachej = buffers->get_ccachej(); + const int ccache_stride = _ccache_stride; + // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, - buffers, offload, fix, separate_flag, - x_size, q_size, ev_size, f_stride); + IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; @@ -208,8 +210,10 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag, in(x:length(x_size) alloc_if(0) free_if(0)) \ in(q:length(q_size) alloc_if(0) free_if(0)) \ in(overflow:length(0) alloc_if(0) free_if(0)) \ + in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \ + in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \ in(astart,nthreads,qqrd2e,g_ewald,inum,nall,ntypes,vflag,eatom) \ - in(f_stride,nlocal,minlocal,separate_flag,offload) \ + in(ccache_stride,f_stride,nlocal,minlocal,separate_flag,offload) \ out(f_start:length(f_stride) alloc_if(0) free_if(0)) \ out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ @@ -220,129 +224,149 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag, *timer_compute = MIC_Wtime(); #endif - IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, - f_stride, x, q); + IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, + f_stride, x, q); acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = oecoul = (acc_t)0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; - } + if (EFLAG) oevdwl = oecoul = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; // loop over neighbors of my atoms #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int iifrom, iito, tid; - IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + int iifrom, iip, iito, tid; + IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads); iifrom += astart; iito += astart; - FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride); - memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); - - for (int i = iifrom; i < iito; ++i) { + int foff; + if (NEWTON_PAIR) foff = tid * f_stride - minlocal; + else foff = -minlocal; + FORCE_T * _noalias const f = f_start + foff; + if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + + const int toffs = tid * ccache_stride; + flt_t * _noalias const tdelx = ccachex + toffs; + flt_t * _noalias const tdely = ccachey + toffs; + flt_t * _noalias const tdelz = ccachez + toffs; + flt_t * _noalias const trsq = ccachew + toffs; + int * _noalias const tj = ccachei + toffs; + int * _noalias const tjtype = ccachej + toffs; + + for (int i = iifrom; i < iito; i += iip) { const int itype = x[i].w; const int ptr_off = itype * ntypes; const C_FORCE_T * _noalias const c_forcei = c_force + ptr_off; const C_ENERGY_T * _noalias const c_energyi = c_energy + ptr_off; - const flt_t * _noalias const rho_invi = rho_inv + ptr_off; + const flt_t * _noalias const rho_invi = rho_inv + ptr_off; const int * _noalias const jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; acc_t fxtmp,fytmp,fztmp,fwtmp; - acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5; + acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5; const flt_t xtmp = x[i].x; const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; const flt_t qtmp = q[i]; fxtmp = fytmp = fztmp = (acc_t)0; - if (EVFLAG) { - if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; - if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; - } + if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; + if (NEWTON_PAIR == 0) + if (vflag == 1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; + int ej = 0; #if defined(LMP_SIMD_COMPILER) #pragma vector aligned - #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ - sv0, sv1, sv2, sv3, sv4, sv5) + #pragma ivdep #endif for (int jj = 0; jj < jnum; jj++) { - flt_t forcecoul, forcebuck, evdwl, ecoul; - forcecoul = forcebuck = evdwl = ecoul = (flt_t)0.0; - - const int sbindex = jlist[jj] >> SBBITS & 3; const int j = jlist[jj] & NEIGHMASK; - const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; const int jtype = x[j].w; const flt_t rsq = delx * delx + dely * dely + delz * delz; + + if (rsq < c_forcei[jtype].cutsq) { + trsq[ej]=rsq; + tdelx[ej]=delx; + tdely[ej]=dely; + tdelz[ej]=delz; + tjtype[ej]=jtype; + tj[ej]=jlist[jj]; + ej++; + } + } + + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \ + sv0, sv1, sv2, sv3, sv4, sv5) + #endif + for (int jj = 0; jj < ej; jj++) { + flt_t forcecoul, forcebuck, evdwl, ecoul; + forcecoul = forcebuck = evdwl = ecoul = (flt_t)0.0; + + const int j = tj[jj] & NEIGHMASK; + const int sbindex = tj[jj] >> SBBITS & 3; + const int jtype = tjtype[jj]; + const flt_t rsq = trsq[jj]; const flt_t r2inv = (flt_t)1.0 / rsq; const flt_t r = (flt_t)1.0 / sqrt(r2inv); - #ifdef INTEL_VMASK - if (rsq < c_forcei[jtype].cutsq) { + #ifdef INTEL_ALLOW_TABLE + if (!ncoultablebits || rsq <= tabinnersq) { #endif - #ifdef INTEL_ALLOW_TABLE - if (!ncoultablebits || rsq <= tabinnersq) { - #endif - const flt_t A1 = 0.254829592; - const flt_t A2 = -0.284496736; - const flt_t A3 = 1.421413741; - const flt_t A4 = -1.453152027; - const flt_t A5 = 1.061405429; - const flt_t EWALD_F = 1.12837917; - const flt_t INV_EWALD_P = 1.0 / 0.3275911; - - const flt_t grij = g_ewald * r; - const flt_t expm2 = exp(-grij * grij); - const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); - const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; - const flt_t prefactor = qqrd2e * qtmp * q[j] / r; - forcecoul = prefactor * (erfc + EWALD_F * grij * expm2); - if (EFLAG) ecoul = prefactor * erfc; - - const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])* - prefactor; - forcecoul -= adjust; - if (EFLAG) ecoul -= adjust; - - #ifdef INTEL_ALLOW_TABLE - } else { - float rsq_lookup = rsq; - const int itable = (__intel_castf32_u32(rsq_lookup) & - ncoulmask) >> ncoulshiftbits; - const flt_t fraction = (rsq_lookup - table[itable].r) * - table[itable].dr; - - const flt_t tablet = table[itable].f + - fraction * table[itable].df; - forcecoul = qtmp * q[j] * tablet; - if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] + - fraction * detable[itable]); - if (sbindex) { - const flt_t table2 = ctable[itable] + - fraction * dctable[itable]; - const flt_t prefactor = qtmp * q[j] * table2; - const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) * - prefactor; - forcecoul -= adjust; - if (EFLAG) ecoul -= adjust; - } + const flt_t A1 = 0.254829592; + const flt_t A2 = -0.284496736; + const flt_t A3 = 1.421413741; + const flt_t A4 = -1.453152027; + const flt_t A5 = 1.061405429; + const flt_t EWALD_F = 1.12837917; + const flt_t INV_EWALD_P = 1.0 / 0.3275911; + + const flt_t grij = g_ewald * r; + const flt_t expm2 = exp(-grij * grij); + const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); + const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const flt_t prefactor = qqrd2e * qtmp * q[j] / r; + forcecoul = prefactor * (erfc + EWALD_F * grij * expm2); + if (EFLAG) ecoul = prefactor * erfc; + + const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])* + prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + + #ifdef INTEL_ALLOW_TABLE + } else { + float rsq_lookup = rsq; + const int itable = (__intel_castf32_u32(rsq_lookup) & + ncoulmask) >> ncoulshiftbits; + const flt_t fraction = (rsq_lookup - table[itable].r) * + table[itable].dr; + + const flt_t tablet = table[itable].f + + fraction * table[itable].df; + forcecoul = qtmp * q[j] * tablet; + if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] + + fraction * detable[itable]); + if (sbindex) { + const flt_t table2 = ctable[itable] + + fraction * dctable[itable]; + const flt_t prefactor = qtmp * q[j] * table2; + const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) * + prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; } - #endif - #ifdef INTEL_VMASK } - #endif + #endif - #ifdef INTEL_VMASK + #ifdef INTEL_VMASK if (rsq < c_forcei[jtype].cut_ljsq) { #endif flt_t r6inv = r2inv * r2inv * r2inv; @@ -361,80 +385,74 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag, #ifdef INTEL_VMASK } #else - if (rsq > c_forcei[jtype].cutsq) - { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; } if (rsq > c_forcei[jtype].cut_ljsq) { forcebuck = (flt_t)0.0; evdwl = (flt_t)0.0; } #endif - #ifdef INTEL_VMASK - if (rsq < c_forcei[jtype].cutsq) { - #endif - const flt_t fpair = (forcecoul + forcebuck) * r2inv; - fxtmp += delx * fpair; - fytmp += dely * fpair; - fztmp += delz * fpair; - if (NEWTON_PAIR || j < nlocal) { - f[j].x -= delx * fpair; - f[j].y -= dely * fpair; - f[j].z -= delz * fpair; - } - - if (EVFLAG) { - flt_t ev_pre = (flt_t)0; - if (NEWTON_PAIR || i < nlocal) - ev_pre += (flt_t)0.5; - if (NEWTON_PAIR || j < nlocal) - ev_pre += (flt_t)0.5; - - if (EFLAG) { - sevdwl += ev_pre * evdwl; - secoul += ev_pre * ecoul; - if (eatom) { - if (NEWTON_PAIR || i < nlocal) - fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; - if (NEWTON_PAIR || j < nlocal) - f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; - } - } - IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz); + const flt_t fpair = (forcecoul + forcebuck) * r2inv; + const flt_t fpx = fpair * tdelx[jj]; + fxtmp += fpx; + if (NEWTON_PAIR) f[j].x -= fpx; + const flt_t fpy = fpair * tdely[jj]; + fytmp += fpy; + if (NEWTON_PAIR) f[j].y -= fpy; + const flt_t fpz = fpair * tdelz[jj]; + fztmp += fpz; + if (NEWTON_PAIR) f[j].z -= fpz; + + if (EFLAG) { + sevdwl += evdwl; + secoul += ecoul; + if (eatom) { + fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; + if (NEWTON_PAIR) + f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; } - #ifdef INTEL_VMASK } - #endif + if (NEWTON_PAIR == 0) + IP_PRE_ev_tally_nborv(vflag, tdelx[jj], tdely[jj], tdelz[jj], + fpx, fpy, fpz); } // for jj - - f[i].x += fxtmp; - f[i].y += fytmp; - f[i].z += fztmp; - IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp); + if (NEWTON_PAIR) { + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } else { + f[i].x = fxtmp; + f[i].y = fytmp; + f[i].z = fztmp; + } + IP_PRE_ev_tally_atomq(NEWTON_PAIR, EFLAG, vflag, f, fwtmp); } // for ii - #ifndef _LMP_INTEL_OFFLOAD - if (vflag == 2) - #endif - { - #if defined(_OPENMP) - #pragma omp barrier - #endif - IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, - nlocal, minlocal, nthreads, f_start, f_stride, - x, offload); - } + IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start, + f_stride, x, offload, vflag, ov0, ov1, ov2, ov3, + ov4, ov5); } // end of omp parallel region - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = oecoul; - } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; + + IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5; + ev_global[0] = oevdwl; + ev_global[1] = oecoul; + } + if (vflag) { + if (NEWTON_PAIR == 0) { + ov0 *= (acc_t)0.5; + ov1 *= (acc_t)0.5; + ov2 *= (acc_t)0.5; + ov3 *= (acc_t)0.5; + ov4 *= (acc_t)0.5; + ov5 *= (acc_t)0.5; } + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) *timer_compute = MIC_Wtime() - *timer_compute; @@ -446,7 +464,7 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); else fix->add_result_array(f_start, 0, offload); @@ -457,6 +475,10 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag, void PairBuckCoulLongIntel::init_style() { PairBuckCoulLong::init_style(); + if (force->newton_pair == 0) { + neighbor->requests[neighbor->nrequest-1]->half = 0; + neighbor->requests[neighbor->nrequest-1]->full = 1; + } neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); @@ -464,7 +486,7 @@ void PairBuckCoulLongIntel::init_style() error->all(FLERR, "The 'package intel' command is required for /intel styles"); fix = static_cast<FixIntel *>(modify->fix[ifix]); - + fix->pair_init_check(); #ifdef _LMP_INTEL_OFFLOAD _cop = fix->coprocessor_number(); @@ -484,6 +506,13 @@ template <class flt_t, class acc_t> void PairBuckCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t,acc_t> *buffers) { + int off_ccache = 0; + #ifdef _LMP_INTEL_OFFLOAD + if (_cop >= 0) off_ccache = 1; + #endif + buffers->grow_ccache(off_ccache, comm->nthreads, 1); + _ccache_stride = buffers->ccache_stride(); + int tp1 = atom->ntypes + 1; int ntable = 1; if (ncoultablebits) @@ -518,6 +547,9 @@ void PairBuckCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, for (int i = 0; i < tp1; i++) { for (int j = 0; j < tp1; j++) { + if (cutsq[i][j] < cut_ljsq[i][j]) + error->all(FLERR, + "Intel variant of lj/buck/coul/long expects lj cutoff<=coulombic"); fc.c_force[i][j].cutsq = cutsq[i][j]; fc.c_force[i][j].cut_ljsq = cut_ljsq[i][j]; fc.c_force[i][j].buck1 = buck1[i][j]; @@ -571,9 +603,9 @@ void PairBuckCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void PairBuckCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, - const int ntable, - Memory *memory, - const int cop) { + const int ntable, + Memory *memory, + const int cop) { if ( (ntypes != _ntypes || ntable != _ntable) ) { if (_ntypes > 0) { #ifdef _LMP_INTEL_OFFLOAD @@ -593,10 +625,10 @@ void PairBuckCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, ospecial_coul != NULL && _cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \ - nocopy(oc_force, oc_energy: alloc_if(0) free_if(1)) \ - nocopy(orho_inv: alloc_if(0) free_if(1)) \ - nocopy(otable: alloc_if(0) free_if(1)) \ - nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1)) + nocopy(oc_force, oc_energy: alloc_if(0) free_if(1)) \ + nocopy(orho_inv: alloc_if(0) free_if(1)) \ + nocopy(otable: alloc_if(0) free_if(1)) \ + nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1)) } #endif diff --git a/src/USER-INTEL/pair_buck_coul_long_intel.h b/src/USER-INTEL/pair_buck_coul_long_intel.h index 57e45174049a001f9f1b92a80ccbac860a07ad7c..ec37c699c8aceee88a29cf1e3d590b127301b879 100644 --- a/src/USER-INTEL/pair_buck_coul_long_intel.h +++ b/src/USER-INTEL/pair_buck_coul_long_intel.h @@ -40,7 +40,7 @@ class PairBuckCoulLongIntel : public PairBuckCoulLong { private: FixIntel *fix; - int _cop, _lrt; + int _cop, _lrt, _ccache_stride; template <class flt_t> class ForceConst; @@ -48,10 +48,10 @@ class PairBuckCoulLongIntel : public PairBuckCoulLong { void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> * buffers, const ForceConst<flt_t> &fc); - template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> + template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc, const int astart, const int aend); + IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc, const int astart, const int aend); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, @@ -76,7 +76,7 @@ class PairBuckCoulLongIntel : public PairBuckCoulLong { ~ForceConst() { set_ntypes(0,0,NULL,_cop); } void set_ntypes(const int ntypes, const int ntable, Memory *memory, - const int cop); + const int cop); private: int _ntypes, _ntable, _cop; diff --git a/src/USER-INTEL/pair_buck_intel.cpp b/src/USER-INTEL/pair_buck_intel.cpp index 4815d1e0257fe10d55427724f23ec8b86cf8ef47..8c63d2e62d5d9a419ec627d9da01bbf64cfd77a3 100644 --- a/src/USER-INTEL/pair_buck_intel.cpp +++ b/src/USER-INTEL/pair_buck_intel.cpp @@ -48,7 +48,7 @@ PairBuckIntel::~PairBuckIntel() void PairBuckIntel::compute(int eflag, int vflag) { if (fix->precision()==FixIntel::PREC_MODE_MIXED) - compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), + compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), force_const_single); else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE) compute<double,double>(eflag, vflag, fix->get_double_buffers(), @@ -63,8 +63,8 @@ void PairBuckIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void PairBuckIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) { ev_setup(eflag,vflag); @@ -78,57 +78,51 @@ void PairBuckIntel::compute(int eflag, int vflag, if (ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); + + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; - IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, - nthreads, sizeof(ATOM_T)); + IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, + packthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom,ito,ago); } fix->stop_watch(TIME_PACK); } - - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - if (force->newton_pair) { - eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); - } + + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + if (force->newton_pair) { + eval<1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1>(0, ovflag, buffers, fc, host_start, inum); } else { - if (force->newton_pair) { - eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); - } + eval<1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { - eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0>(0, 0, buffers, fc, host_start, inum); + eval<0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0>(0, ovflag, buffers, fc, host_start, inum); } } } /* ---------------------------------------------------------------------- */ -template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> +template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void PairBuckIntel::eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc, - const int astart, const int aend) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc, + const int astart, const int aend) { const int inum = aend - astart; if (inum == 0) return; @@ -152,9 +146,9 @@ void PairBuckIntel::eval(const int offload, const int vflag, // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, - buffers, offload, fix, separate_flag, - x_size, q_size, ev_size, f_stride); + IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; @@ -166,7 +160,7 @@ void PairBuckIntel::eval(const int offload, const int vflag, int *overflow = fix->get_off_overflow_flag(); double *timer_compute = fix->off_watch_pair(); // Redeclare as local variables for offload - + if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY); #pragma offload target(mic:_cop) if(offload) \ in(special_lj:length(0) alloc_if(0) free_if(0)) \ @@ -188,31 +182,30 @@ void PairBuckIntel::eval(const int offload, const int vflag, *timer_compute = MIC_Wtime(); #endif - IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, - f_stride, x, 0); + IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, + f_stride, x, 0); acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = (acc_t)0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; - } + if (EFLAG) oevdwl = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; // loop over neighbors of my atoms #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int iifrom, iito, tid; - IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + int iifrom, iip, iito, tid; + IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads); iifrom += astart; iito += astart; - FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride); - memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + int foff; + if (NEWTON_PAIR) foff = tid * f_stride - minlocal; + else foff = -minlocal; + FORCE_T * _noalias const f = f_start + foff; + if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); - for (int i = iifrom; i < iito; ++i) { + for (int i = iifrom; i < iito; i += iip) { const int itype = x[i].w; const int ptr_off = itype * ntypes; @@ -222,24 +215,23 @@ void PairBuckIntel::eval(const int offload, const int vflag, const int jnum = numneigh[i]; acc_t fxtmp,fytmp,fztmp,fwtmp; - acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; + acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; const flt_t xtmp = x[i].x; const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; fxtmp = fytmp = fztmp = (acc_t)0; - if (EVFLAG) { - if (EFLAG) fwtmp = sevdwl = (acc_t)0; + if (EFLAG) fwtmp = sevdwl = (acc_t)0; + if (NEWTON_PAIR == 0) if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; - } #if defined(LMP_SIMD_COMPILER) #pragma vector aligned - #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ - sv0, sv1, sv2, sv3, sv4, sv5) + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ + sv0, sv1, sv2, sv3, sv4, sv5) #endif for (int jj = 0; jj < jnum; jj++) { - + flt_t forcebuck, evdwl; forcebuck = evdwl = (flt_t)0.0; @@ -253,7 +245,7 @@ void PairBuckIntel::eval(const int offload, const int vflag, const flt_t rsq = delx * delx + dely * dely + delz * delz; const flt_t r = sqrt(rsq); const flt_t r2inv = (flt_t)1.0 / rsq; - + #ifdef INTEL_VMASK if (rsq < c_forcei[jtype].cutsq) { #endif @@ -265,7 +257,7 @@ void PairBuckIntel::eval(const int offload, const int vflag, #ifndef INTEL_VMASK if (rsq > c_forcei[jtype].cutsq) forcebuck =(flt_t)0.0; - #endif + #endif if (EFLAG) { evdwl = rexp * c_energyi[jtype].a - r6inv * c_energyi[jtype].c - @@ -280,73 +272,74 @@ void PairBuckIntel::eval(const int offload, const int vflag, if (sbindex) { const flt_t factor_lj = special_lj[sbindex]; forcebuck *= factor_lj; - if (EFLAG) + if (EFLAG) evdwl *= factor_lj; } const flt_t fpair = forcebuck * r2inv; - fxtmp += delx * fpair; - fytmp += dely * fpair; - fztmp += delz * fpair; - if (NEWTON_PAIR || j < nlocal) { - f[j].x -= delx * fpair; - f[j].y -= dely * fpair; - f[j].z -= delz * fpair; - } - - if (EVFLAG) { - flt_t ev_pre = (flt_t)0; - if (NEWTON_PAIR || i < nlocal) - ev_pre += (flt_t)0.5; - if (NEWTON_PAIR || j < nlocal) - ev_pre += (flt_t)0.5; - - if (EFLAG) { - sevdwl += ev_pre * evdwl; - if (eatom) { - if (NEWTON_PAIR || i < nlocal) - fwtmp += (flt_t)0.5 * evdwl; - if (NEWTON_PAIR || j < nlocal) - f[j].w += (flt_t)0.5 * evdwl; - } + const flt_t fpx = fpair * delx; + fxtmp += fpx; + if (NEWTON_PAIR) f[j].x -= fpx; + const flt_t fpy = fpair * dely; + fytmp += fpy; + if (NEWTON_PAIR) f[j].y -= fpy; + const flt_t fpz = fpair * delz; + fztmp += fpz; + if (NEWTON_PAIR) f[j].z -= fpz; + + if (EFLAG) { + sevdwl += evdwl; + if (eatom) { + fwtmp += (flt_t)0.5 * evdwl; + if (NEWTON_PAIR) + f[j].w += (flt_t)0.5 * evdwl; } - IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz); } + if (NEWTON_PAIR == 0) + IP_PRE_ev_tally_nborv(vflag, delx, dely, delz, fpx, fpy, fpz); #ifdef INTEL_VMASK } #endif } // for jj - - f[i].x += fxtmp; - f[i].y += fytmp; - f[i].z += fztmp; - IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp); + if (NEWTON_PAIR) { + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } else { + f[i].x = fxtmp; + f[i].y = fytmp; + f[i].z = fztmp; + } + IP_PRE_ev_tally_atom(NEWTON_PAIR, EFLAG, vflag, f, fwtmp); } // for ii - #ifndef _LMP_INTEL_OFFLOAD - if (vflag == 2) - #endif - { - #if defined(_OPENMP) - #pragma omp barrier - #endif - IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, - nlocal, minlocal, nthreads, f_start, f_stride, - x, offload); - } + IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start, + f_stride, x, offload, vflag, ov0, ov1, ov2, ov3, + ov4, ov5); } // end of omp parallel region - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = (acc_t)0; - } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; + + IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5; + ev_global[0] = oevdwl; + ev_global[1] = (acc_t)0; + } + if (vflag) { + if (NEWTON_PAIR == 0) { + ov0 *= (acc_t)0.5; + ov1 *= (acc_t)0.5; + ov2 *= (acc_t)0.5; + ov3 *= (acc_t)0.5; + ov4 *= (acc_t)0.5; + ov5 *= (acc_t)0.5; } + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) *timer_compute = MIC_Wtime() - *timer_compute; @@ -358,7 +351,7 @@ void PairBuckIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); else fix->add_result_array(f_start, 0, offload); @@ -367,6 +360,10 @@ void PairBuckIntel::eval(const int offload, const int vflag, void PairBuckIntel::init_style() { PairBuck::init_style(); + if (force->newton_pair == 0) { + neighbor->requests[neighbor->nrequest-1]->half = 0; + neighbor->requests[neighbor->nrequest-1]->full = 1; + } neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); @@ -374,7 +371,7 @@ void PairBuckIntel::init_style() error->all(FLERR, "The 'package intel' command is required for /intel styles"); fix = static_cast<FixIntel *>(modify->fix[ifix]); - + fix->pair_init_check(); #ifdef _LMP_INTEL_OFFLOAD _cop = fix->coprocessor_number(); @@ -445,7 +442,7 @@ void PairBuckIntel::pack_force_const(ForceConst<flt_t> &fc, /* ---------------------------------------------------------------------- */ template <class flt_t> -void PairBuckIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, +void PairBuckIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, Memory *memory, const int cop) { if ( (ntypes != _ntypes ) ) { @@ -455,8 +452,8 @@ void PairBuckIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, c_force_t * oc_force = c_force[0]; c_energy_t * oc_energy = c_energy[0]; - if (ospecial_lj != NULL && oc_force != NULL && - oc_energy != NULL && + if (ospecial_lj != NULL && oc_force != NULL && + oc_energy != NULL && _cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj: alloc_if(0) free_if(1)) \ @@ -479,8 +476,8 @@ void PairBuckIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, c_force_t * oc_force = c_force[0]; c_energy_t * oc_energy = c_energy[0]; int tp1sq = ntypes*ntypes; - if (ospecial_lj != NULL && oc_force != NULL && - oc_energy != NULL && + if (ospecial_lj != NULL && oc_force != NULL && + oc_energy != NULL && cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \ diff --git a/src/USER-INTEL/pair_buck_intel.h b/src/USER-INTEL/pair_buck_intel.h index 4f039c3f9747dd93891117774dbb581f6d49442d..ab5e135262a378d89b0975376ffca91afe4ad9fc 100644 --- a/src/USER-INTEL/pair_buck_intel.h +++ b/src/USER-INTEL/pair_buck_intel.h @@ -48,10 +48,10 @@ private: void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> * buffers, const ForceConst<flt_t> &fc); - template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> + template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc, const int astart, const int aend); + IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc, const int astart, const int aend); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, @@ -59,7 +59,7 @@ private: template <class flt_t> class ForceConst { - + public: typedef struct { flt_t buck1, buck2, rhoinv, cutsq; } c_force_t; typedef struct { flt_t a, c, offset, pad; } c_energy_t; @@ -78,7 +78,7 @@ private: int _ntypes, _cop; Memory *_memory; }; - + ForceConst<float> force_const_single; ForceConst<double> force_const_double; }; diff --git a/src/USER-INTEL/pair_eam_intel.cpp b/src/USER-INTEL/pair_eam_intel.cpp index f8c972ab8b8d634ac31da85e44e88d8e76d9536a..b97128bf9ff434fbc71c02d204c25128e9583a20 100644 --- a/src/USER-INTEL/pair_eam_intel.cpp +++ b/src/USER-INTEL/pair_eam_intel.cpp @@ -74,8 +74,8 @@ void PairEAMIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void PairEAMIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) { ev_setup(eflag, vflag); @@ -90,78 +90,58 @@ void PairEAMIntel::compute(int eflag, int vflag, if (ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, - nthreads, sizeof(ATOM_T)); + packthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom,ito,ago); } fix->stop_watch(TIME_PACK); } + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; if (_onetype) { - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - if (force->newton_pair) { - eval<1,1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,1,1,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1,0>(0, ovflag, buffers, fc, host_start, inum); - } + if (eflag) { + if (force->newton_pair) { + eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); } else { - if (force->newton_pair) { - eval<1,1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,1,0,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,0,0>(0, ovflag, buffers, fc, host_start, inum); - } + eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { - eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum); + eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); } } } else { - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - if (force->newton_pair) { - eval<0,1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<0,1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<0,1,1,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<0,1,1,0>(0, ovflag, buffers, fc, host_start, inum); - } + if (eflag) { + if (force->newton_pair) { + eval<0,1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1,1>(0, ovflag, buffers, fc, host_start, inum); } else { - if (force->newton_pair) { - eval<0,1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<0,1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<0,1,0,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<0,1,0,0>(0, ovflag, buffers, fc, host_start, inum); - } + eval<0,1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { - eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<0,0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0,1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum); + eval<0,0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0,0>(0, ovflag, buffers, fc, host_start, inum); } } } @@ -169,11 +149,10 @@ void PairEAMIntel::compute(int eflag, int vflag, /* ---------------------------------------------------------------------- */ -template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, - class acc_t> +template <int ONETYPE, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void PairEAMIntel::eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc, + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc, const int astart, const int aend) { const int inum = aend - astart; @@ -186,7 +165,10 @@ void PairEAMIntel::eval(const int offload, const int vflag, nmax = atom->nmax; int edge = (nmax * sizeof(acc_t)) % INTEL_DATA_ALIGN; if (edge) nmax += (INTEL_DATA_ALIGN - edge) / sizeof(acc_t); - memory->create(rho,nmax*comm->nthreads,"pair:rho"); + if (NEWTON_PAIR) + memory->create(rho,nmax*comm->nthreads,"pair:rho"); + else + memory->create(rho,nmax,"pair:rho"); memory->create(fp,nmax,"pair:fp"); // Use single precision allocation for single/mixed mode // Keep double version for single and swap_eam @@ -222,9 +204,17 @@ void PairEAMIntel::eval(const int offload, const int vflag, const int ntypes = atom->ntypes + 1; const int eatom = this->eflag_atom; + flt_t * _noalias const ccachex = buffers->get_ccachex(); + flt_t * _noalias const ccachey = buffers->get_ccachey(); + flt_t * _noalias const ccachez = buffers->get_ccachez(); + flt_t * _noalias const ccachew = buffers->get_ccachew(); + int * _noalias const ccachei = buffers->get_ccachei(); + int * _noalias const ccachej = buffers->get_ccachej(); + const int ccache_stride = _ccache_stride; + // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, + IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag, buffers, offload, fix, separate_flag, x_size, q_size, ev_size, f_stride); @@ -252,123 +242,146 @@ void PairEAMIntel::eval(const int offload, const int vflag, f_stride, x, 0); acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = (acc_t)0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; - } + if (EFLAG) oevdwl = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; // loop over neighbors of my atoms #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(fp_f, f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) #endif { int iifrom, iito, tid; - IP_PRE_omp_range_id_vec(iifrom, iito, tid, inum, nthreads, - INTEL_VECTOR_WIDTH); + IP_PRE_omp_range_id_vec(iifrom, iito, tid, inum, nthreads, + INTEL_VECTOR_WIDTH); iifrom += astart; iito += astart; - FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride); - double * _noalias const trho = rho + tid*nmax; - if (NEWTON_PAIR) - memset(trho, 0, nall * sizeof(double)); - else - memset(trho, 0, nlocal * sizeof(double)); + int foff; + if (NEWTON_PAIR) foff = tid * f_stride - minlocal; + else foff = -minlocal; + FORCE_T * _noalias const f = f_start + foff; + if (NEWTON_PAIR) foff = tid * nmax; + else foff = 0; + double * _noalias const trho = rho + foff; + if (NEWTON_PAIR) { + memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + memset(trho, 0, nall * sizeof(double)); + } + + const int toffs = tid * ccache_stride; + flt_t * _noalias const tdelx = ccachex + toffs; + flt_t * _noalias const tdely = ccachey + toffs; + flt_t * _noalias const tdelz = ccachez + toffs; + flt_t * _noalias const trsq = ccachew + toffs; + int * _noalias const tj = ccachei + toffs; + int * _noalias const tjtype = ccachej + toffs; flt_t oscale; int rhor_joff, frho_ioff; if (ONETYPE) { const int ptr_off=_onetype * ntypes + _onetype; - oscale = scale_f[ptr_off]; - int rhor_ioff = istride * _onetype; - rhor_joff = rhor_ioff + _onetype * jstride; - frho_ioff = fstride * _onetype; + oscale = scale_f[ptr_off]; + int rhor_ioff = istride * _onetype; + rhor_joff = rhor_ioff + _onetype * jstride; + frho_ioff = fstride * _onetype; } for (int i = iifrom; i < iito; ++i) { int itype, rhor_ioff; - if (!ONETYPE) { + if (!ONETYPE) { itype = x[i].w; - rhor_ioff = istride * itype; - } - const int * _noalias const jlist = firstneigh + cnumneigh[i]; - const int jnum = numneigh[i]; - - const flt_t xtmp = x[i].x; - const flt_t ytmp = x[i].y; - const flt_t ztmp = x[i].z; - - acc_t rhoi = (acc_t)0.0; - #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma simd reduction(+:rhoi) - #endif - for (int jj = 0; jj < jnum; jj++) { - int j, jtype; - j = jlist[jj] & NEIGHMASK; - + rhor_ioff = istride * itype; + } + const int * _noalias const jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + + const flt_t xtmp = x[i].x; + const flt_t ytmp = x[i].y; + const flt_t ztmp = x[i].z; + + acc_t rhoi = (acc_t)0.0; + int ej = 0; + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma ivdep + #endif + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; - const flt_t rsq = delx*delx + dely*dely + delz*delz; - - if (rsq < fcutforcesq) { - if (!ONETYPE) jtype = x[j].w; - flt_t p = sqrt(rsq)*frdr + (flt_t)1.0; - int m = static_cast<int> (p); - m = MIN(m,nr-1); - p -= m; - p = MIN(p,(flt_t)1.0); - if (!ONETYPE) - rhor_joff = rhor_ioff + jtype * jstride; - const int joff = rhor_joff + m; - flt_t ra; - ra = ((rhor_spline_e[joff].a*p + rhor_spline_e[joff].b) * p + - rhor_spline_e[joff].c) * p + rhor_spline_e[joff].d; - rhoi += ra; - if (NEWTON_PAIR || j < nlocal) { - if (!ONETYPE) { - const int ioff = jtype * istride + itype * jstride + m; - ra = ((rhor_spline_e[ioff].a*p + rhor_spline_e[ioff].b)*p + - rhor_spline_e[ioff].c) * p + rhor_spline_e[ioff].d; - } - trho[j] += ra; + const flt_t rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < fcutforcesq) { + trsq[ej]=rsq; + if (!ONETYPE) tjtype[ej]=x[j].w; + tj[ej]=jlist[jj]; + ej++; + } + } + + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd reduction(+:rhoi) + #endif + for (int jj = 0; jj < ej; jj++) { + int jtype; + const int j = tj[jj] & NEIGHMASK; + if (!ONETYPE) jtype = tjtype[jj]; + const flt_t rsq = trsq[jj]; + flt_t p = sqrt(rsq)*frdr + (flt_t)1.0; + int m = static_cast<int> (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,(flt_t)1.0); + if (!ONETYPE) + rhor_joff = rhor_ioff + jtype * jstride; + const int joff = rhor_joff + m; + flt_t ra; + ra = ((rhor_spline_e[joff].a*p + rhor_spline_e[joff].b) * p + + rhor_spline_e[joff].c) * p + rhor_spline_e[joff].d; + rhoi += ra; + if (NEWTON_PAIR) { + if (!ONETYPE) { + const int ioff = jtype * istride + itype * jstride + m; + ra = ((rhor_spline_e[ioff].a*p + rhor_spline_e[ioff].b)*p + + rhor_spline_e[ioff].c) * p + rhor_spline_e[ioff].d; } + trho[j] += ra; } } // for jj - trho[i] += rhoi; + if (NEWTON_PAIR) + trho[i] += rhoi; + else + trho[i] = rhoi; } // for i #if defined(_OPENMP) - if (nthreads > 1) { + if (NEWTON_PAIR && nthreads > 1) { #pragma omp barrier - if (tid == 0) { - int rcount; - if (NEWTON_PAIR) rcount = nall; - else rcount = nlocal; - if (nthreads == 2) { + if (tid == 0) { + const int rcount = nall; + if (nthreads == 2) { double *trho2 = rho + nmax; - #pragma vector aligned + #pragma vector aligned #pragma simd - for (int n = 0; n < rcount; n++) - rho[n] += trho2[n]; + for (int n = 0; n < rcount; n++) + rho[n] += trho2[n]; } else if (nthreads == 4) { double *trho2 = rho + nmax; - double *trho3 = trho2 + nmax; - double *trho4 = trho3 + nmax; - #pragma vector aligned - #pragma simd - for (int n = 0; n < rcount; n++) - rho[n] += trho2[n] + trho3[n] + trho4[n]; + double *trho3 = trho2 + nmax; + double *trho4 = trho3 + nmax; + #pragma vector aligned + #pragma simd + for (int n = 0; n < rcount; n++) + rho[n] += trho2[n] + trho3[n] + trho4[n]; } else { - double *trhon = rho + nmax; - for (int t = 1; t < nthreads; t++) { - #pragma vector aligned - #pragma simd - for (int n = 0; n < rcount; n++) - rho[n] += trhon[n]; - trhon += nmax; + double *trhon = rho + nmax; + for (int t = 1; t < nthreads; t++) { + #pragma vector aligned + #pragma simd + for (int n = 0; n < rcount; n++) + rho[n] += trhon[n]; + trhon += nmax; } } } @@ -398,32 +411,32 @@ void PairEAMIntel::eval(const int offload, const int vflag, #pragma simd reduction(+:tevdwl) #endif for (int i = iifrom; i < iito; ++i) { - int itype; - if (!ONETYPE) itype = x[i].w; - flt_t p = rho[i]*frdrho + (flt_t)1.0; - int m = static_cast<int> (p); - m = MAX(1,MIN(m,nrho-1)); - p -= m; - p = MIN(p,(flt_t)1.0); - if (!ONETYPE) frho_ioff = itype * fstride; - const int ioff = frho_ioff + m; - fp_f[i] = (frho_spline_f[ioff].a*p + frho_spline_f[ioff].b)*p + - frho_spline_f[ioff].c; - if (EFLAG) { - flt_t phi = ((frho_spline_e[ioff].a*p + frho_spline_e[ioff].b)*p + - frho_spline_e[ioff].c)*p + frho_spline_e[ioff].d; - if (rho[i] > frhomax) phi += fp_f[i] * (rho[i]-frhomax); - if (!ONETYPE) { - const int ptr_off=itype*ntypes + itype; - oscale = scale_f[ptr_off]; - } - phi *= oscale; - tevdwl += phi; - if (eatom) f[i].w += phi; - } + int itype; + if (!ONETYPE) itype = x[i].w; + flt_t p = rho[i]*frdrho + (flt_t)1.0; + int m = static_cast<int> (p); + m = MAX(1,MIN(m,nrho-1)); + p -= m; + p = MIN(p,(flt_t)1.0); + if (!ONETYPE) frho_ioff = itype * fstride; + const int ioff = frho_ioff + m; + fp_f[i] = (frho_spline_f[ioff].a*p + frho_spline_f[ioff].b)*p + + frho_spline_f[ioff].c; + if (EFLAG) { + flt_t phi = ((frho_spline_e[ioff].a*p + frho_spline_e[ioff].b)*p + + frho_spline_e[ioff].c)*p + frho_spline_e[ioff].d; + if (rho[i] > frhomax) phi += fp_f[i] * (rho[i]-frhomax); + if (!ONETYPE) { + const int ptr_off=itype*ntypes + itype; + oscale = scale_f[ptr_off]; + } + phi *= oscale; + tevdwl += phi; + if (eatom) f[i].w += phi; + } } if (EFLAG) oevdwl += tevdwl; - + // communicate derivative of embedding function @@ -431,11 +444,10 @@ void PairEAMIntel::eval(const int offload, const int vflag, #pragma omp barrier #endif - if (tid == 0) { + if (tid == 0) comm->forward_comm_pair(this); - memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); - } else - memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + if (NEWTON_PAIR) + memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); #if defined(_OPENMP) #pragma omp barrier @@ -446,140 +458,158 @@ void PairEAMIntel::eval(const int offload, const int vflag, for (int i = iifrom; i < iito; ++i) { int itype, rhor_ioff; - const flt_t * _noalias scale_fi; - if (!ONETYPE) { - itype = x[i].w; - rhor_ioff = istride * itype; - scale_fi = scale_f + itype*ntypes; - } - const int * _noalias const jlist = firstneigh + cnumneigh[i]; - const int jnum = numneigh[i]; - - acc_t fxtmp, fytmp, fztmp, fwtmp; - acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; - - const flt_t xtmp = x[i].x; - const flt_t ytmp = x[i].y; - const flt_t ztmp = x[i].z; - fxtmp = fytmp = fztmp = (acc_t)0; - if (EVFLAG) { - if (EFLAG) fwtmp = sevdwl = (acc_t)0; - if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; + const flt_t * _noalias scale_fi; + if (!ONETYPE) { + itype = x[i].w; + rhor_ioff = istride * itype; + scale_fi = scale_f + itype*ntypes; } + const int * _noalias const jlist = firstneigh + cnumneigh[i]; + const int jnum = numneigh[i]; + + acc_t fxtmp, fytmp, fztmp, fwtmp; + acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; + + const flt_t xtmp = x[i].x; + const flt_t ytmp = x[i].y; + const flt_t ztmp = x[i].z; + fxtmp = fytmp = fztmp = (acc_t)0; + if (EFLAG) fwtmp = sevdwl = (acc_t)0; + if (NEWTON_PAIR == 0) + if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; - #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ - sv0, sv1, sv2, sv3, sv4, sv5) - #endif - for (int jj = 0; jj < jnum; jj++) { - int j, jtype; - j = jlist[jj] & NEIGHMASK; - + int ej = 0; + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma ivdep + #endif + for (int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; - const flt_t rsq = delx*delx + dely*dely + delz*delz; - - - if (rsq < fcutforcesq) { - if (!ONETYPE) jtype = x[j].w; - const flt_t r = sqrt(rsq); - flt_t p = r*frdr + (flt_t)1.0; - int m = static_cast<int> (p); - m = MIN(m,nr-1); - p -= m; - p = MIN(p,(flt_t)1.0); - if (!ONETYPE) - rhor_joff = rhor_ioff + jtype * jstride; - const int joff = rhor_joff + m; - const flt_t rhojp = (rhor_spline_f[joff].a*p + - rhor_spline_f[joff].b)*p + - rhor_spline_f[joff].c; - flt_t rhoip; - if (!ONETYPE) { - const int ioff = jtype * istride + itype * jstride + m; - rhoip = (rhor_spline_f[ioff].a*p + rhor_spline_f[ioff].b)*p + - rhor_spline_f[ioff].c; - } else - rhoip = rhojp; - const flt_t z2p = (z2r_spline_t[joff].a*p + - z2r_spline_t[joff].b)*p + - z2r_spline_t[joff].c; - const flt_t z2 = ((z2r_spline_t[joff].d*p + - z2r_spline_t[joff].e)*p + - z2r_spline_t[joff].f)*p + - z2r_spline_t[joff].g; - - const flt_t recip = (flt_t)1.0/r; - const flt_t phi = z2*recip; - const flt_t phip = z2p*recip - phi*recip; - const flt_t psip = fp_f[i]*rhojp + fp_f[j]*rhoip + phip; - if (!ONETYPE) - oscale = scale_fi[jtype]; - const flt_t fpair = -oscale*psip*recip; - - fxtmp += delx*fpair; - fytmp += dely*fpair; - fztmp += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { - f[j].x -= delx*fpair; - f[j].y -= dely*fpair; - f[j].z -= delz*fpair; - } + const flt_t rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < fcutforcesq) { + trsq[ej]=rsq; + tdelx[ej]=delx; + tdely[ej]=dely; + tdelz[ej]=delz; + if (!ONETYPE) tjtype[ej]=x[j].w; + tj[ej]=jlist[jj]; + ej++; + } + } - if (EVFLAG) { - flt_t ev_pre = (flt_t)0; - if (NEWTON_PAIR || i<nlocal) - ev_pre += (flt_t)0.5; - if (NEWTON_PAIR || j<nlocal) - ev_pre += (flt_t)0.5; - - if (EFLAG) { - const flt_t evdwl = oscale*phi; - sevdwl += ev_pre * evdwl; - if (eatom) { - if (NEWTON_PAIR || i < nlocal) - fwtmp += (flt_t)0.5 * evdwl; - if (NEWTON_PAIR || j < nlocal) - f[j].w += (flt_t)0.5 * evdwl; - } - } - IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, - delx, dely, delz); + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ + sv0, sv1, sv2, sv3, sv4, sv5) + #endif + for (int jj = 0; jj < ej; jj++) { + int jtype; + const int j = tj[jj] & NEIGHMASK; + if (!ONETYPE) jtype = tjtype[jj]; + const flt_t rsq = trsq[jj]; + const flt_t r = sqrt(rsq); + flt_t p = r*frdr + (flt_t)1.0; + int m = static_cast<int> (p); + m = MIN(m,nr-1); + p -= m; + p = MIN(p,(flt_t)1.0); + if (!ONETYPE) + rhor_joff = rhor_ioff + jtype * jstride; + const int joff = rhor_joff + m; + const flt_t rhojp = (rhor_spline_f[joff].a*p + + rhor_spline_f[joff].b)*p + + rhor_spline_f[joff].c; + flt_t rhoip; + if (!ONETYPE) { + const int ioff = jtype * istride + itype * jstride + m; + rhoip = (rhor_spline_f[ioff].a*p + rhor_spline_f[ioff].b)*p + + rhor_spline_f[ioff].c; + } else + rhoip = rhojp; + const flt_t z2p = (z2r_spline_t[joff].a*p + + z2r_spline_t[joff].b)*p + + z2r_spline_t[joff].c; + const flt_t z2 = ((z2r_spline_t[joff].d*p + + z2r_spline_t[joff].e)*p + + z2r_spline_t[joff].f)*p + + z2r_spline_t[joff].g; + + const flt_t recip = (flt_t)1.0/r; + const flt_t phi = z2*recip; + const flt_t phip = z2p*recip - phi*recip; + const flt_t psip = fp_f[i]*rhojp + fp_f[j]*rhoip + phip; + if (!ONETYPE) + oscale = scale_fi[jtype]; + const flt_t fpair = -oscale*psip*recip; + + const flt_t fpx = fpair * tdelx[jj]; + fxtmp += fpx; + if (NEWTON_PAIR) f[j].x -= fpx; + const flt_t fpy = fpair * tdely[jj]; + fytmp += fpy; + if (NEWTON_PAIR) f[j].y -= fpy; + const flt_t fpz = fpair * tdelz[jj]; + fztmp += fpz; + if (NEWTON_PAIR) f[j].z -= fpz; + + if (EFLAG) { + const flt_t evdwl = oscale*phi; + sevdwl += evdwl; + if (eatom) { + fwtmp += (flt_t)0.5 * evdwl; + if (NEWTON_PAIR) + f[j].w += (flt_t)0.5 * evdwl; } - } // if rsq + } + if (NEWTON_PAIR == 0) + IP_PRE_ev_tally_nborv(vflag, tdelx[jj], tdely[jj], tdelz[jj], + fpx, fpy, fpz); } // for jj - f[i].x += fxtmp; - f[i].y += fytmp; - f[i].z += fztmp; + if (NEWTON_PAIR) { + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } else { + f[i].x = fxtmp; + f[i].y = fytmp; + f[i].z = fztmp; + sevdwl *= (acc_t)0.5; + } - IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp); + IP_PRE_ev_tally_atom(NEWTON_PAIR, EFLAG, vflag, f, fwtmp); } // for i - if (vflag == 2) { - #if defined(_OPENMP) - #pragma omp barrier - #endif - IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, - nlocal, minlocal, nthreads, f_start, f_stride, - x, offload); - } - + IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start, + f_stride, x, offload, vflag, ov0, ov1, ov2, ov3, + ov4, ov5); } /// omp - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = (acc_t)0.0; - } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; + + IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + ev_global[0] = oevdwl; + ev_global[1] = (acc_t)0.0; + } + if (vflag) { + if (NEWTON_PAIR == 0) { + ov0 *= (acc_t)0.5; + ov1 *= (acc_t)0.5; + ov2 *= (acc_t)0.5; + ov3 *= (acc_t)0.5; + ov4 *= (acc_t)0.5; + ov5 *= (acc_t)0.5; } + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) *timer_compute = MIC_Wtime() - *timer_compute; @@ -591,7 +621,7 @@ void PairEAMIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); else fix->add_result_array(f_start, 0, offload); @@ -604,6 +634,10 @@ void PairEAMIntel::eval(const int offload, const int vflag, void PairEAMIntel::init_style() { PairEAM::init_style(); + if (force->newton_pair == 0) { + neighbor->requests[neighbor->nrequest-1]->half = 0; + neighbor->requests[neighbor->nrequest-1]->full = 1; + } neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); @@ -631,8 +665,15 @@ void PairEAMIntel::init_style() template <class flt_t, class acc_t> void PairEAMIntel::pack_force_const(ForceConst<flt_t> &fc, - IntelBuffers<flt_t,acc_t> *buffers) + IntelBuffers<flt_t,acc_t> *buffers) { + int off_ccache = 0; + #ifdef _LMP_INTEL_OFFLOAD + if (_cop >= 0) off_ccache = 1; + #endif + buffers->grow_ccache(off_ccache, comm->nthreads, 1); + _ccache_stride = buffers->ccache_stride(); + int tp1 = atom->ntypes + 1; fc.set_ntypes(tp1,nr,nrho,memory,_cop); buffers->set_ntypes(tp1); @@ -643,14 +684,14 @@ void PairEAMIntel::pack_force_const(ForceConst<flt_t> &fc, for (int i = 1; i <= atom->ntypes; i++) { for (int j = i; j <= atom->ntypes; j++) { if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { - cut = init_one(i,j); - cutneigh = cut + neighbor->skin; - cutsq[i][j] = cutsq[j][i] = cut*cut; - cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; + cut = init_one(i,j); + cutneigh = cut + neighbor->skin; + cutsq[i][j] = cutsq[j][i] = cut*cut; + cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh; } } } - + _onetype=-1; double oldscale=-1; for (int i = 1; i < tp1; i++) { @@ -668,32 +709,32 @@ void PairEAMIntel::pack_force_const(ForceConst<flt_t> &fc, for (int j = 1; j < tp1; j++) { fc.scale_f[i][j] = scale[i][j]; if (type2rhor[i][j] >= 0) { - const int joff = ioff + j * fc.rhor_jstride(); - for (int k = 0; k < nr + 1; k++) { - if (type2rhor[j][i] != type2rhor[i][j]) - _onetype = 0; + const int joff = ioff + j * fc.rhor_jstride(); + for (int k = 0; k < nr + 1; k++) { + if (type2rhor[j][i] != type2rhor[i][j]) + _onetype = 0; else if (_onetype < 0) - _onetype = i; + _onetype = i; if (oldscale < 0) oldscale = scale[i][j]; else - if (oldscale != scale[i][j]) - _onetype = 0; - fc.rhor_spline_f[joff + k].a=rhor_spline[type2rhor[j][i]][k][0]; - fc.rhor_spline_f[joff + k].b=rhor_spline[type2rhor[j][i]][k][1]; - fc.rhor_spline_f[joff + k].c=rhor_spline[type2rhor[j][i]][k][2]; - fc.rhor_spline_e[joff + k].a=rhor_spline[type2rhor[j][i]][k][3]; - fc.rhor_spline_e[joff + k].b=rhor_spline[type2rhor[j][i]][k][4]; - fc.rhor_spline_e[joff + k].c=rhor_spline[type2rhor[j][i]][k][5]; - fc.rhor_spline_e[joff + k].d=rhor_spline[type2rhor[j][i]][k][6]; - fc.z2r_spline_t[joff + k].a=z2r_spline[type2rhor[j][i]][k][0]; - fc.z2r_spline_t[joff + k].b=z2r_spline[type2rhor[j][i]][k][1]; - fc.z2r_spline_t[joff + k].c=z2r_spline[type2rhor[j][i]][k][2]; - fc.z2r_spline_t[joff + k].d=z2r_spline[type2rhor[j][i]][k][3]; - fc.z2r_spline_t[joff + k].e=z2r_spline[type2rhor[j][i]][k][4]; - fc.z2r_spline_t[joff + k].f=z2r_spline[type2rhor[j][i]][k][5]; - fc.z2r_spline_t[joff + k].g=z2r_spline[type2rhor[j][i]][k][6]; - } + if (oldscale != scale[i][j]) + _onetype = 0; + fc.rhor_spline_f[joff + k].a=rhor_spline[type2rhor[j][i]][k][0]; + fc.rhor_spline_f[joff + k].b=rhor_spline[type2rhor[j][i]][k][1]; + fc.rhor_spline_f[joff + k].c=rhor_spline[type2rhor[j][i]][k][2]; + fc.rhor_spline_e[joff + k].a=rhor_spline[type2rhor[j][i]][k][3]; + fc.rhor_spline_e[joff + k].b=rhor_spline[type2rhor[j][i]][k][4]; + fc.rhor_spline_e[joff + k].c=rhor_spline[type2rhor[j][i]][k][5]; + fc.rhor_spline_e[joff + k].d=rhor_spline[type2rhor[j][i]][k][6]; + fc.z2r_spline_t[joff + k].a=z2r_spline[type2rhor[j][i]][k][0]; + fc.z2r_spline_t[joff + k].b=z2r_spline[type2rhor[j][i]][k][1]; + fc.z2r_spline_t[joff + k].c=z2r_spline[type2rhor[j][i]][k][2]; + fc.z2r_spline_t[joff + k].d=z2r_spline[type2rhor[j][i]][k][3]; + fc.z2r_spline_t[joff + k].e=z2r_spline[type2rhor[j][i]][k][4]; + fc.z2r_spline_t[joff + k].f=z2r_spline[type2rhor[j][i]][k][5]; + fc.z2r_spline_t[joff + k].g=z2r_spline[type2rhor[j][i]][k][6]; + } } } } @@ -704,9 +745,9 @@ void PairEAMIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void PairEAMIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, - const int nr, const int nrho, - Memory *memory, - const int cop) { + const int nr, const int nrho, + Memory *memory, + const int cop) { if (ntypes != _ntypes || nr > _nr || nrho > _nrho) { if (_ntypes > 0) { _memory->destroy(rhor_spline_f); @@ -739,7 +780,7 @@ void PairEAMIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, /* ---------------------------------------------------------------------- */ int PairEAMIntel::pack_forward_comm(int n, int *list, double *buf, - int pbc_flag, int *pbc) + int pbc_flag, int *pbc) { if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) return pack_forward_comm(n, list, buf, fp); @@ -761,7 +802,7 @@ void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf) template<class flt_t> int PairEAMIntel::pack_forward_comm(int n, int *list, double *buf, - flt_t *fp_f) + flt_t *fp_f) { int i,j,m; @@ -776,8 +817,8 @@ int PairEAMIntel::pack_forward_comm(int n, int *list, double *buf, /* ---------------------------------------------------------------------- */ template<class flt_t> -void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf, - flt_t *fp_f) +void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf, + flt_t *fp_f) { int i,m,last; diff --git a/src/USER-INTEL/pair_eam_intel.h b/src/USER-INTEL/pair_eam_intel.h index f7fb71ad2cabac350c8e9b60c600b1d4c3cbae73..f34e740bdaaf83c9229239339d755dd0588dab55 100644 --- a/src/USER-INTEL/pair_eam_intel.h +++ b/src/USER-INTEL/pair_eam_intel.h @@ -41,7 +41,7 @@ class PairEAMIntel : public PairEAM { protected: FixIntel *fix; - int _cop, _onetype; + int _cop, _onetype, _ccache_stride; float *fp_float; template <class flt_t> @@ -53,8 +53,8 @@ class PairEAMIntel : public PairEAM { template <class flt_t, class acc_t> void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); - template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, - class acc_t> + template <int ONETYPE, int EFLAG, int NEWTON_PAIR, class flt_t, + class acc_t> void eval(const int offload, const int vflag, IntelBuffers<flt_t,acc_t> * buffers, const ForceConst<flt_t> &fc, const int astart, const int aend); @@ -79,8 +79,8 @@ class PairEAMIntel : public PairEAM { ForceConst() : _ntypes(0), _nr(0) {} ~ForceConst() { set_ntypes(0, 0, 0, NULL, _cop); } - void set_ntypes(const int ntypes, const int nr, const int nrho, - Memory *memory, const int cop); + void set_ntypes(const int ntypes, const int nr, const int nrho, + Memory *memory, const int cop); inline int rhor_jstride() const { return _nr; } inline int rhor_istride() const { return _nr * _ntypes; } inline int frho_stride() const { return _nrho; } diff --git a/src/USER-INTEL/pair_gayberne_intel.cpp b/src/USER-INTEL/pair_gayberne_intel.cpp index c1e3d1b37f7c91dd15c250c077927364a96f7072..ed7dd424af34e665cdc97b05726c2b8d0ded39d4 100644 --- a/src/USER-INTEL/pair_gayberne_intel.cpp +++ b/src/USER-INTEL/pair_gayberne_intel.cpp @@ -88,23 +88,27 @@ void PairGayBerneIntel::compute(int eflag, int vflag, const AtomVecEllipsoid::Bonus * const bonus = avec->bonus; const int * const ellipsoid = atom->ellipsoid; QUAT_T * _noalias const quat = buffers->get_quat(); + + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; - IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads, - sizeof(ATOM_T)); + IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, packthreads, + sizeof(ATOM_T)); if (ago != 0) buffers->thr_pack(ifrom,ito,ago); for (int i = ifrom; i < ito; i++) { - int qi = ellipsoid[i]; - if (qi > -1) { - quat[i].w = bonus[qi].quat[0]; - quat[i].i = bonus[qi].quat[1]; - quat[i].j = bonus[qi].quat[2]; - quat[i].k = bonus[qi].quat[3]; - } + int qi = ellipsoid[i]; + if (qi > -1) { + quat[i].w = bonus[qi].quat[0]; + quat[i].i = bonus[qi].quat[1]; + quat[i].j = bonus[qi].quat[2]; + quat[i].k = bonus[qi].quat[3]; + } } } quat[nall].w = (flt_t)1.0; @@ -114,39 +118,29 @@ void PairGayBerneIntel::compute(int eflag, int vflag, fix->stop_watch(TIME_PACK); } - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - if (force->newton_pair) { - eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); - } + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + if (force->newton_pair) { + eval<1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1>(0, ovflag, buffers, fc, host_start, inum); } else { - if (force->newton_pair) { - eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); - } + eval<1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { - eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0>(0, 0, buffers, fc, host_start, inum); + eval<0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0>(0, ovflag, buffers, fc, host_start, inum); } } } -template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> +template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void PairGayBerneIntel::eval(const int offload, const int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc, @@ -167,66 +161,65 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, if (fix->separate_buffers()) { fix->start_watch(TIME_PACK); if (offload) { - #pragma omp parallel default(none) \ - shared(buffers,nlocal,nall,bonus,ellipsoid) + #pragma omp parallel { int ifrom, ito, tid; - int nthreads = comm->nthreads; - IP_PRE_omp_range_id_align(ifrom, ito, tid, nlocal, - nthreads, sizeof(ATOM_T)); - if (ago != 0) buffers->thr_pack_cop(ifrom, ito, 0); - for (int i = ifrom; i < ito; i++) { - int qi = ellipsoid[i]; - if (qi > -1) { - quat[i].w = bonus[qi].quat[0]; - quat[i].i = bonus[qi].quat[1]; - quat[i].j = bonus[qi].quat[2]; - quat[i].k = bonus[qi].quat[3]; - } - } - int nghost = nall - nlocal; - if (nghost) { - IP_PRE_omp_range_align(ifrom, ito, tid, nall - nlocal, - nthreads, sizeof(ATOM_T)); - int offset = 0; - ifrom += nlocal; - ito += nlocal; - if (ago != 0) { - offset = fix->offload_min_ghost() - nlocal; - buffers->thr_pack_cop(ifrom, ito, offset, ago == 1); - } - for (int i = ifrom; i < ito; i++) { - int qi = ellipsoid[i + offset]; - if (qi > -1) { - quat[i].w = bonus[qi].quat[0]; - quat[i].i = bonus[qi].quat[1]; - quat[i].j = bonus[qi].quat[2]; - quat[i].k = bonus[qi].quat[3]; - } - } - } + int nthreads = comm->nthreads; + IP_PRE_omp_range_id_align(ifrom, ito, tid, nlocal, + nthreads, sizeof(ATOM_T)); + if (ago != 0) buffers->thr_pack_cop(ifrom, ito, 0); + for (int i = ifrom; i < ito; i++) { + int qi = ellipsoid[i]; + if (qi > -1) { + quat[i].w = bonus[qi].quat[0]; + quat[i].i = bonus[qi].quat[1]; + quat[i].j = bonus[qi].quat[2]; + quat[i].k = bonus[qi].quat[3]; + } + } + int nghost = nall - nlocal; + if (nghost) { + IP_PRE_omp_range_align(ifrom, ito, tid, nall - nlocal, + nthreads, sizeof(ATOM_T)); + int offset = 0; + ifrom += nlocal; + ito += nlocal; + if (ago != 0) { + offset = fix->offload_min_ghost() - nlocal; + buffers->thr_pack_cop(ifrom, ito, offset, ago == 1); + } + for (int i = ifrom; i < ito; i++) { + int qi = ellipsoid[i + offset]; + if (qi > -1) { + quat[i].w = bonus[qi].quat[0]; + quat[i].i = bonus[qi].quat[1]; + quat[i].j = bonus[qi].quat[2]; + quat[i].k = bonus[qi].quat[3]; + } + } + } } } else { if (ago != 0) buffers->thr_pack_host(fix->host_min_local(), nlocal, 0); for (int i = fix->host_min_local(); i < nlocal; i++) { - int qi = ellipsoid[i]; - if (qi > -1) { - quat[i].w = bonus[qi].quat[0]; - quat[i].i = bonus[qi].quat[1]; - quat[i].j = bonus[qi].quat[2]; - quat[i].k = bonus[qi].quat[3]; - } + int qi = ellipsoid[i]; + if (qi > -1) { + quat[i].w = bonus[qi].quat[0]; + quat[i].i = bonus[qi].quat[1]; + quat[i].j = bonus[qi].quat[2]; + quat[i].k = bonus[qi].quat[3]; + } } int offset = fix->host_min_ghost() - nlocal; if (ago != 0) buffers->thr_pack_host(nlocal, nall, offset); for (int i = nlocal; i < nall; i++) { - int qi = ellipsoid[i + offset]; - if (qi > -1) { - quat[i].w = bonus[qi].quat[0]; - quat[i].i = bonus[qi].quat[1]; - quat[i].j = bonus[qi].quat[2]; - quat[i].k = bonus[qi].quat[3]; - } + int qi = ellipsoid[i + offset]; + if (qi > -1) { + quat[i].w = bonus[qi].quat[0]; + quat[i].i = bonus[qi].quat[1]; + quat[i].j = bonus[qi].quat[2]; + quat[i].k = bonus[qi].quat[3]; + } } } fix->stop_watch(TIME_PACK); @@ -258,9 +251,9 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, - buffers, offload, fix, separate_flag, - x_size, q_size, ev_size, f_stride); + IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; @@ -310,30 +303,31 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, #ifdef _LMP_INTEL_OFFLOAD if (separate_flag) { if (separate_flag < 3) { - int all_local = nlocal; - int ghost_min = overflow[LMP_GHOST_MIN]; - nlocal = overflow[LMP_LOCAL_MAX] + 1; - int nghost = overflow[LMP_GHOST_MAX] + 1 - ghost_min; - if (nghost < 0) nghost = 0; - nall = nlocal + nghost; - separate_flag--; - int flength; - if (NEWTON_PAIR) flength = nall; - else flength = nlocal; - IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), - separate_flag); - if (nghost) { - if (nlocal < all_local || ghost_min > all_local) { - memmove(x + nlocal, x + ghost_min, - (nall - nlocal) * sizeof(ATOM_T)); - memmove(quat + nlocal, quat + ghost_min, - (nall - nlocal) * sizeof(QUAT_T)); - } - } + int all_local = nlocal; + int ghost_min = overflow[LMP_GHOST_MIN]; + nlocal = overflow[LMP_LOCAL_MAX] + 1; + int nghost = overflow[LMP_GHOST_MAX] + 1 - ghost_min; + if (nghost < 0) nghost = 0; + nall = nlocal + nghost; + separate_flag--; + int flength; + if (NEWTON_PAIR) flength = nall; + else flength = nlocal; + IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), + separate_flag); + if (nghost) { + if (nlocal < all_local || ghost_min > all_local) { + memmove(x + nlocal, x + ghost_min, + (nall - nlocal) * sizeof(ATOM_T)); + memmove(quat + nlocal, quat + ghost_min, + (nall - nlocal) * sizeof(QUAT_T)); + } + } } x[nall].x = (flt_t)INTEL_BIGP; x[nall].y = (flt_t)INTEL_BIGP; x[nall].z = (flt_t)INTEL_BIGP; + x[nall].w = 1; quat[nall].w = (flt_t)1.0; quat[nall].i = (flt_t)0.0; quat[nall].j = (flt_t)0.0; @@ -342,25 +336,25 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, #endif acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = (acc_t)0.0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; - } + if (EFLAG) oevdwl = (acc_t)0.0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0; + if (NEWTON_PAIR == 0) f_start[1].w = 0; // loop over neighbors of my atoms #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int iifrom, iito, tid; - IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + int iifrom, iip, iito, tid; + IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads); iifrom += astart; iito += astart; - FORCE_T * _noalias const f = f_start - minlocal * 2 + (tid * f_stride); - memset(f + minlocal * 2, 0, f_stride * sizeof(FORCE_T)); + int foff; + if (NEWTON_PAIR) foff = tid * f_stride - minlocal * 2; + else foff = minlocal*-2; + FORCE_T * _noalias const f = f_start + foff; + if (NEWTON_PAIR) memset(f + minlocal * 2, 0, f_stride * sizeof(FORCE_T)); flt_t * _noalias const rsq_form = rsq_formi + tid * max_nbors; flt_t * _noalias const delx_form = delx_formi + tid * max_nbors; @@ -370,7 +364,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, int * _noalias const jlist_form = jlist_formi + tid * max_nbors; int ierror = 0; - for (int i = iifrom; i < iito; ++i) { + for (int i = iifrom; i < iito; i += iip) { // const int i = ilist[ii]; const int itype = x[i].w; const int ptr_off = itype * ntypes; @@ -401,13 +395,16 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; fxtmp = fytmp = fztmp = t1tmp = t2tmp = t3tmp = (acc_t)0.0; - if (EVFLAG) { - if (EFLAG) fwtmp = sevdwl = (acc_t)0.0; + if (EFLAG) fwtmp = sevdwl = (acc_t)0.0; + if (NEWTON_PAIR == 0) if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0; - } bool multiple_forms = false; int packed_j = 0; + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma ivdep + #endif for (int jj = 0; jj < jnum; jj++) { int jm = jlist[jj]; int j = jm & NEIGHMASK; @@ -431,27 +428,27 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, } else multiple_forms = true; } - const int edge = (packed_j % pad_width); - if (edge) { - const int packed_end = packed_j + (pad_width - edge); + const int edge = (packed_j % pad_width); + if (edge) { + const int packed_end = packed_j + (pad_width - edge); #if defined(LMP_SIMD_COMPILER) #pragma loop_count min=1, max=15, avg=8 #endif - for ( ; packed_j < packed_end; packed_j++) - jlist_form[packed_j] = nall; - } - + for ( ; packed_j < packed_end; packed_j++) + jlist_form[packed_j] = nall; + } + // ------------------------------------------------------------- - #ifdef INTEL_V512 - __assume(packed_j % INTEL_VECTOR_WIDTH == 0); - __assume(packed_j % 8 == 0); - __assume(packed_j % INTEL_MIC_VECTOR_WIDTH == 0); - #endif + #ifdef INTEL_V512 + __assume(packed_j % INTEL_VECTOR_WIDTH == 0); + __assume(packed_j % 8 == 0); + __assume(packed_j % INTEL_MIC_VECTOR_WIDTH == 0); + #endif #if defined(LMP_SIMD_COMPILER) #pragma vector aligned - #pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp,t3tmp, \ - sevdwl,sv0,sv1,sv2,sv3,sv4,sv5) + #pragma simd reduction(+:fxtmp,fytmp,fztmp,fwtmp,t1tmp,t2tmp,t3tmp, \ + sevdwl,sv0,sv1,sv2,sv3,sv4,sv5) #endif for (int jj = 0; jj < packed_j; jj++) { flt_t a2_0, a2_1, a2_2, a2_3, a2_4, a2_5, a2_6, a2_7, a2_8; @@ -461,15 +458,15 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, flt_t fforce_0, fforce_1, fforce_2, ttor_0, ttor_1, ttor_2; flt_t rtor_0, rtor_1, rtor_2; - const int sbindex = jlist_form[jj] >> SBBITS & 3; - const int j = jlist_form[jj] & NEIGHMASK; + const int sbindex = jlist_form[jj] >> SBBITS & 3; + const int j = jlist_form[jj] & NEIGHMASK; flt_t factor_lj = special_lj[sbindex]; const int jtype = jtype_form[jj]; - const flt_t sigma = ijci[jtype].sigma; - const flt_t epsilon = ijci[jtype].epsilon; - const flt_t shape2_0 = ic[jtype].shape2[0]; - const flt_t shape2_1 = ic[jtype].shape2[1]; - const flt_t shape2_2 = ic[jtype].shape2[2]; + const flt_t sigma = ijci[jtype].sigma; + const flt_t epsilon = ijci[jtype].epsilon; + const flt_t shape2_0 = ic[jtype].shape2[0]; + const flt_t shape2_1 = ic[jtype].shape2[1]; + const flt_t shape2_2 = ic[jtype].shape2[2]; flt_t one_eng, evdwl; ME_quat_to_mat_trans(quat[j], a2); @@ -491,7 +488,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, ME_plus3(g1, g2, g12); flt_t kappa_0, kappa_1, kappa_2; ME_mldivide3(g12, delx_form[jj], dely_form[jj], delz_form[jj], - kappa, ierror); + kappa, ierror); // tempv = G12^-1*r12hat @@ -523,7 +520,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, flt_t iota_0, iota_1, iota_2; ME_plus3(b1, b2, b12); ME_mldivide3(b12, delx_form[jj], dely_form[jj], delz_form[jj], - iota, ierror); + iota, ierror); // tempv = G12^-1*r12hat @@ -537,7 +534,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, // compute dUr/dr temp1 = ((flt_t)2.0 * varrho12 * varrho - varrho6 * varrho) / - sigma; + sigma; temp1 = temp1 * (flt_t)24.0 * epsilon; flt_t u_slj = temp1 * std::pow(sigma12, (flt_t)3.0) * (flt_t)0.5; flt_t dUr_0, dUr_1, dUr_2; @@ -551,8 +548,8 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, flt_t dchi_0, dchi_1, dchi_2; temp1 = ME_dot3(iota, r12hat); - temp2 = (flt_t)-4.0 / rsq_form[jj] * mu * - std::pow(chi, (mu - (flt_t)1.0) / mu); + temp2 = (flt_t)-4.0 / rsq_form[jj] * mu * + std::pow(chi, (mu - (flt_t)1.0) / mu); dchi_0 = temp2 * (iota_0 - temp1 * r12hat_0); dchi_1 = temp2 * (iota_1 - temp1 * r12hat_1); dchi_2 = temp2 * (iota_2 - temp1 * r12hat_2); @@ -573,7 +570,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, ME_cross3(tempv, tempv2, dUr); flt_t dUr2_0, dUr2_1, dUr2_2; - if (NEWTON_PAIR || j < nlocal) { + if (NEWTON_PAIR) { ME_vecmat(kappa, g2, tempv2); ME_cross3(tempv, tempv2, dUr2); } @@ -588,7 +585,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, dchi_2 *= temp1; flt_t dchi2_0, dchi2_1, dchi2_2; - if (NEWTON_PAIR || j < nlocal) { + if (NEWTON_PAIR) { ME_vecmat(iota, b2, tempv); ME_cross3(tempv, iota, dchi2); dchi2_0 *= temp1; @@ -630,7 +627,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, // compute d_eta for particle 2 flt_t deta2_0, deta2_1, deta2_2; - if (NEWTON_PAIR || j < nlocal) { + if (NEWTON_PAIR) { deta2_0 = deta2_1 = deta2_2 = (flt_t)0.0; ME_compute_eta_torque(g12, a2, shape2, temp); @@ -666,36 +663,36 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, temp3 = chi * eta; ttor_0 = (temp1 * dchi_0 + temp2 * deta_0 + temp3 * dUr_0) * - (flt_t)-1.0; + (flt_t)-1.0; ttor_1 = (temp1 * dchi_1 + temp2 * deta_1 + temp3 * dUr_1) * - (flt_t)-1.0; + (flt_t)-1.0; ttor_2 = (temp1 * dchi_2 + temp2 * deta_2 + temp3 * dUr_2) * - (flt_t)-1.0; + (flt_t)-1.0; - if (NEWTON_PAIR || j < nlocal) { + if (NEWTON_PAIR) { rtor_0 = (temp1 * dchi2_0 + temp2 * deta2_0 + temp3 * dUr2_0) * - (flt_t)-1.0; + (flt_t)-1.0; rtor_1 = (temp1 * dchi2_1 + temp2 * deta2_1 + temp3 * dUr2_1) * - (flt_t)-1.0; + (flt_t)-1.0; rtor_2 = (temp1 * dchi2_2 + temp2 * deta2_2 + temp3 * dUr2_2) * - (flt_t)-1.0; + (flt_t)-1.0; } one_eng = temp1 * chi; - #ifndef INTEL_VMASK - if (jlist_form[jj] == nall) { - one_eng = (flt_t)0.0; - fforce_0 = 0.0; - fforce_1 = 0.0; - fforce_2 = 0.0; - ttor_0 = 0.0; - ttor_1 = 0.0; - ttor_2 = 0.0; - rtor_0 = 0.0; - rtor_1 = 0.0; - rtor_2 = 0.0; - } - #endif + #ifndef INTEL_VMASK + if (jlist_form[jj] == nall) { + one_eng = (flt_t)0.0; + fforce_0 = 0.0; + fforce_1 = 0.0; + fforce_2 = 0.0; + ttor_0 = 0.0; + ttor_1 = 0.0; + ttor_2 = 0.0; + rtor_0 = 0.0; + rtor_1 = 0.0; + rtor_2 = 0.0; + } + #endif fforce_0 *= factor_lj; fforce_1 *= factor_lj; @@ -704,61 +701,53 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, ttor_1 *= factor_lj; ttor_2 *= factor_lj; - #ifdef INTEL_VMASK - if (jlist_form[jj] < nall) { - #endif - fxtmp += fforce_0; - fytmp += fforce_1; - fztmp += fforce_2; - t1tmp += ttor_0; - t2tmp += ttor_1; - t3tmp += ttor_2; - - if (NEWTON_PAIR || j < nlocal) { - rtor_0 *= factor_lj; - rtor_1 *= factor_lj; - rtor_2 *= factor_lj; - int jp = j * 2; - f[jp].x -= fforce_0; - f[jp].y -= fforce_1; - f[jp].z -= fforce_2; - jp++; - f[jp].x += rtor_0; - f[jp].y += rtor_1; - f[jp].z += rtor_2; - } - - if (EVFLAG) { - flt_t ev_pre = (flt_t)0.0; - if (NEWTON_PAIR || i < nlocal) - ev_pre += (flt_t)0.5; - if (NEWTON_PAIR || j < nlocal) - ev_pre += (flt_t)0.5; - - if (EFLAG) { - evdwl = factor_lj * one_eng; - sevdwl += ev_pre * evdwl; - if (eatom) { - if (NEWTON_PAIR || i < nlocal) - fwtmp += (flt_t)0.5 * evdwl; - if (NEWTON_PAIR || j < nlocal) - f[j*2].w += (flt_t)0.5 * evdwl; - } - } - - if (vflag == 1) { - ev_pre *= (flt_t)-1.0; - sv0 += ev_pre * delx_form[jj] * fforce_0; - sv1 += ev_pre * dely_form[jj] * fforce_1; - sv2 += ev_pre * delz_form[jj] * fforce_2; - sv3 += ev_pre * delx_form[jj] * fforce_1; - sv4 += ev_pre * delx_form[jj] * fforce_2; - sv5 += ev_pre * dely_form[jj] * fforce_2; - } - } // EVFLAG - #ifdef INTEL_VMASK - } - #endif + #ifdef INTEL_VMASK + if (jlist_form[jj] < nall) { + #endif + fxtmp += fforce_0; + fytmp += fforce_1; + fztmp += fforce_2; + t1tmp += ttor_0; + t2tmp += ttor_1; + t3tmp += ttor_2; + + if (NEWTON_PAIR) { + rtor_0 *= factor_lj; + rtor_1 *= factor_lj; + rtor_2 *= factor_lj; + int jp = j * 2; + f[jp].x -= fforce_0; + f[jp].y -= fforce_1; + f[jp].z -= fforce_2; + jp++; + f[jp].x += rtor_0; + f[jp].y += rtor_1; + f[jp].z += rtor_2; + } + + if (EFLAG) { + evdwl = factor_lj * one_eng; + sevdwl += evdwl; + if (eatom) { + fwtmp += (flt_t)0.5 * evdwl; + if (NEWTON_PAIR) + f[j*2].w += (flt_t)0.5 * evdwl; + } + } + + if (NEWTON_PAIR == 0) { + if (vflag == 1) { + sv0 += delx_form[jj] * fforce_0; + sv1 += dely_form[jj] * fforce_1; + sv2 += delz_form[jj] * fforce_2; + sv3 += delx_form[jj] * fforce_1; + sv4 += delx_form[jj] * fforce_2; + sv5 += dely_form[jj] * fforce_2; + } + } // EVFLAG + #ifdef INTEL_VMASK + } + #endif } // for jj // ------------------------------------------------------------- @@ -767,19 +756,29 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, ierror = 2; int ip = i * 2; - f[ip].x += fxtmp; - f[ip].y += fytmp; - f[ip].z += fztmp; - ip++; - f[ip].x += t1tmp; - f[ip].y += t2tmp; - f[ip].z += t3tmp; - - if (EVFLAG) { - if (EFLAG) { - if (eatom) f[i * 2].w += fwtmp; - oevdwl += sevdwl; - } + if (NEWTON_PAIR) { + f[ip].x += fxtmp; + f[ip].y += fytmp; + f[ip].z += fztmp; + ip++; + f[ip].x += t1tmp; + f[ip].y += t2tmp; + f[ip].z += t3tmp; + } else { + f[ip].x = fxtmp; + f[ip].y = fytmp; + f[ip].z = fztmp; + ip++; + f[ip].x = t1tmp; + f[ip].y = t2tmp; + f[ip].z = t3tmp; + } + + if (EFLAG) { + oevdwl += sevdwl; + if (eatom) f[i * 2].w += fwtmp; + } + if (NEWTON_PAIR == 0) { if (vflag == 1) { ov0 += sv0; ov1 += sv1; @@ -791,57 +790,32 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, } } // for i int o_range; - if (NEWTON_PAIR) + if (NEWTON_PAIR) { o_range = nall; - else - o_range = nlocal; - if (offload == 0) o_range -= minlocal; - IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, - sizeof(FORCE_T)); - const int two_iito = iito * 2; - - acc_t *facc = &(f_start[0].x); - const int sto = two_iito * 4; - const int fst4 = f_stride * 4; - #if defined(_OPENMP) - #pragma omp barrier - #endif - int t_off = f_stride; - if (EFLAG && eatom) { - for (int t = 1; t < nthreads; t++) { - #if defined(LMP_SIMD_COMPILER) - #pragma vector nontemporal - #pragma novector - #endif - for (int n = iifrom * 2; n < two_iito; n++) { - f_start[n].x += f_start[n + t_off].x; - f_start[n].y += f_start[n + t_off].y; - f_start[n].z += f_start[n + t_off].z; - f_start[n].w += f_start[n + t_off].w; - } - t_off += f_stride; - } - } else { + if (offload == 0) o_range -= minlocal; + IP_PRE_omp_range_align(iifrom, iito, tid, o_range, nthreads, + sizeof(FORCE_T)); + const int sto = iito * 8; + const int fst4 = f_stride * 4; + #if defined(_OPENMP) + #pragma omp barrier + #endif + acc_t *f_scalar = &f_start[0].x; + acc_t *f_scalar2 = f_scalar + fst4; for (int t = 1; t < nthreads; t++) { #if defined(LMP_SIMD_COMPILER) - #pragma vector nontemporal - #pragma novector + #pragma vector aligned + #pragma simd #endif - for (int n = iifrom * 2; n < two_iito; n++) { - f_start[n].x += f_start[n + t_off].x; - f_start[n].y += f_start[n + t_off].y; - f_start[n].z += f_start[n + t_off].z; - } - t_off += f_stride; + for (int n = iifrom * 8; n < sto; n++) + f_scalar[n] += f_scalar2[n]; + f_scalar2 += fst4; } - } - if (EVFLAG) { if (vflag==2) { const ATOM_T * _noalias const xo = x + minlocal; #if defined(LMP_SIMD_COMPILER) - #pragma vector nontemporal - #pragma novector + #pragma novector #endif for (int n = iifrom; n < iito; n++) { const int nt2 = n * 2; @@ -859,19 +833,26 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, f_start[1].w = ierror; } // omp - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = (acc_t)0.0; - } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; + if (EFLAG) { + if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5; + ev_global[0] = oevdwl; + ev_global[1] = (acc_t)0.0; + } + if (vflag) { + if (NEWTON_PAIR == 0) { + ov0 *= (acc_t)-0.5; + ov1 *= (acc_t)-0.5; + ov2 *= (acc_t)-0.5; + ov3 *= (acc_t)-0.5; + ov4 *= (acc_t)-0.5; + ov5 *= (acc_t)-0.5; } + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) @@ -884,7 +865,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, 2); else fix->add_result_array(f_start, 0, offload, 0, 0, 2); @@ -895,6 +876,10 @@ void PairGayBerneIntel::eval(const int offload, const int vflag, void PairGayBerneIntel::init_style() { PairGayBerne::init_style(); + if (force->newton_pair == 0) { + neighbor->requests[neighbor->nrequest-1]->half = 0; + neighbor->requests[neighbor->nrequest-1]->full = 1; + } neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); @@ -997,7 +982,7 @@ void PairGayBerneIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, const int one_length, const int nthreads, Memory *memory, - const int cop) { + const int cop) { if (ntypes != _ntypes) { if (_ntypes > 0) { fc_packed3 *oic = ic; @@ -1014,9 +999,9 @@ void PairGayBerneIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, int * ojlist_form = jlist_form[0]; if (ospecial_lj != NULL && oijc != NULL && olj34 != NULL && - orsq_form != NULL && odelx_form != NULL && odely_form != NULL && - odelz_form != NULL && ojtype_form != NULL && ojlist_form != NULL && - _cop >= 0) { + orsq_form != NULL && odelx_form != NULL && odely_form != NULL && + odelz_form != NULL && ojtype_form != NULL && ojlist_form != NULL && + _cop >= 0) { #pragma offload_transfer target(mic:_cop) \ nocopy(ospecial_lj, oijc, olj34, oic: alloc_if(0) free_if(1)) \ nocopy(orsq_form, odelx_form, odely_form: alloc_if(0) free_if(1)) \ @@ -1048,14 +1033,14 @@ void PairGayBerneIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, memory->create(jlist_form, nthreads, one_length, "jlist_form"); for (int zn = 0; zn < nthreads; zn++) - for (int zo = 0; zo < one_length; zo++) { - rsq_form[zn][zo] = 10.0; - delx_form[zn][zo] = 10.0; - dely_form[zn][zo] = 10.0; - delz_form[zn][zo] = 10.0; - jtype_form[zn][zo] = 1; - jlist_form[zn][zo] = 0; - } + for (int zo = 0; zo < one_length; zo++) { + rsq_form[zn][zo] = 10.0; + delx_form[zn][zo] = 10.0; + dely_form[zn][zo] = 10.0; + delz_form[zn][zo] = 10.0; + jtype_form[zn][zo] = 1; + jlist_form[zn][zo] = 0; + } #ifdef _LMP_INTEL_OFFLOAD flt_t * ospecial_lj = special_lj; @@ -1072,9 +1057,9 @@ void PairGayBerneIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, int tp1sq = ntypes*ntypes; if (ospecial_lj != NULL && oijc != NULL && olj34 != NULL && - oic != NULL && orsq_form != NULL && odelx_form != NULL && - odely_form != NULL && odelz_form != NULL && ojtype_form !=NULL && - ojlist_form !=NULL && cop >= 0) { + oic != NULL && orsq_form != NULL && odelx_form != NULL && + odely_form != NULL && odelz_form != NULL && ojtype_form !=NULL && + ojlist_form !=NULL && cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \ nocopy(oijc,olj34: length(tp1sq) alloc_if(1) free_if(0)) \ diff --git a/src/USER-INTEL/pair_gayberne_intel.h b/src/USER-INTEL/pair_gayberne_intel.h index aaed31d567274792b9100fa46ff0f5cb644f0e2c..07dfba14d113e279bebe240058908b71db39139d 100644 --- a/src/USER-INTEL/pair_gayberne_intel.h +++ b/src/USER-INTEL/pair_gayberne_intel.h @@ -43,7 +43,7 @@ class PairGayBerneIntel : public PairGayBerne { template <class flt_t, class acc_t> void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); - template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> + template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void eval(const int offload, const int vflag, IntelBuffers<flt_t,acc_t> * buffers, const ForceConst<flt_t> &fc, const int astart, const int aend); diff --git a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp index ce6e40141f395f079eea9ad36225fbdad7da6782..fe99525122c17ae456241ae8f33e6f2deb1bf82a 100644 --- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp +++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp @@ -67,8 +67,8 @@ void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) { ev_setup(eflag,vflag); @@ -82,58 +82,52 @@ void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag, if (_lrt == 0 && ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); + + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost, - nthreads, sizeof(ATOM_T)); + packthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom,ito,ago); } fix->stop_watch(TIME_PACK); } // -------------------- Regular version - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - if (force->newton_pair) { - eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); - } + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + if (force->newton_pair) { + eval<1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1>(0, ovflag, buffers, fc, host_start, inum); } else { - if (force->newton_pair) { - eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); - } + eval<1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { - eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0>(0, 0, buffers, fc, host_start, inum); + eval<0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0>(0, ovflag, buffers, fc, host_start, inum); } } } /* ---------------------------------------------------------------------- */ -template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> +template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc, - const int astart, const int aend) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc, + const int astart, const int aend) { const int inum = aend - astart; if (inum == 0) return; @@ -182,9 +176,9 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, - buffers, offload, fix, separate_flag, - x_size, q_size, ev_size, f_stride); + IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; @@ -233,28 +227,27 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, #endif IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, - f_stride, x, q); + f_stride, x, q); acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = oecoul = (acc_t)0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; - } + if (EFLAG) oevdwl = oecoul = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; // loop over neighbors of my atoms #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int iifrom, iito, tid; - IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + int iifrom, iip, iito, tid; + IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads); iifrom += astart; iito += astart; - FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride); - memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + int foff; + if (NEWTON_PAIR) foff = tid * f_stride - minlocal; + else foff = -minlocal; + FORCE_T * _noalias const f = f_start + foff; + if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); flt_t cutboth = cut_coulsq; const int toffs = tid * ccache_stride; @@ -265,8 +258,8 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, int * _noalias const tj = ccachei + toffs; int * _noalias const tjtype = ccachej + toffs; - for (int i = iifrom; i < iito; ++i) { - // const int i = ilist[ii]; + for (int i = iifrom; i < iito; i += iip) { + // const int i = ilist[ii]; const int itype = x[i].w; const int ptr_off = itype * ntypes; @@ -277,221 +270,219 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, const int jnum = numneigh[i]; acc_t fxtmp,fytmp,fztmp,fwtmp; - acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5; + acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5; const flt_t xtmp = x[i].x; const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; const flt_t qtmp = q[i]; fxtmp = fytmp = fztmp = (acc_t)0; - if (EVFLAG) { - if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; - if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; - } + if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; + if (NEWTON_PAIR == 0) + if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; - int ej = 0; + int ej = 0; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma ivdep + #pragma vector aligned + #pragma ivdep #endif for (int jj = 0; jj < jnum; jj++) { const int j = jlist[jj] & NEIGHMASK; - const flt_t delx = xtmp - x[j].x; + const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; const flt_t rsq = delx * delx + dely * dely + delz * delz; - if (rsq < cut_coulsq) { - trsq[ej]=rsq; - tdelx[ej]=delx; - tdely[ej]=dely; - tdelz[ej]=delz; - tjtype[ej]=x[j].w; - tj[ej]=jlist[jj]; - ej++; - } - } + if (rsq < cut_coulsq) { + trsq[ej]=rsq; + tdelx[ej]=delx; + tdely[ej]=dely; + tdelz[ej]=delz; + tjtype[ej]=x[j].w; + tj[ej]=jlist[jj]; + ej++; + } + } #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \ - sv0, sv1, sv2, sv3, sv4, sv5) + #pragma vector aligned + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \ + sv0, sv1, sv2, sv3, sv4, sv5) #endif for (int jj = 0; jj < ej; jj++) { flt_t forcecoul, forcelj, evdwl, ecoul; forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0; - const int j = tj[jj] & NEIGHMASK; + const int j = tj[jj] & NEIGHMASK; const int sbindex = tj[jj] >> SBBITS & 3; - const int jtype = tjtype[jj]; - const flt_t rsq = trsq[jj]; + const int jtype = tjtype[jj]; + const flt_t rsq = trsq[jj]; const flt_t r2inv = (flt_t)1.0 / rsq; #ifdef INTEL_ALLOW_TABLE if (!ncoultablebits || rsq <= tabinnersq) { #endif const flt_t A1 = 0.254829592; - const flt_t A2 = -0.284496736; - const flt_t A3 = 1.421413741; - const flt_t A4 = -1.453152027; - const flt_t A5 = 1.061405429; - const flt_t EWALD_F = 1.12837917; - const flt_t INV_EWALD_P = 1.0 / 0.3275911; - - const flt_t r = (flt_t)1.0 / sqrt(r2inv); - const flt_t grij = g_ewald * r; - const flt_t expm2 = exp(-grij * grij); - const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); - const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; - const flt_t prefactor = qqrd2e * qtmp * q[j] / r; - forcecoul = prefactor * (erfc + EWALD_F * grij * expm2); - if (EFLAG) ecoul = prefactor * erfc; - - const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])* - prefactor; - forcecoul -= adjust; - if (EFLAG) ecoul -= adjust; + const flt_t A2 = -0.284496736; + const flt_t A3 = 1.421413741; + const flt_t A4 = -1.453152027; + const flt_t A5 = 1.061405429; + const flt_t EWALD_F = 1.12837917; + const flt_t INV_EWALD_P = 1.0 / 0.3275911; + + const flt_t r = (flt_t)1.0 / sqrt(r2inv); + const flt_t grij = g_ewald * r; + const flt_t expm2 = exp(-grij * grij); + const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); + const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const flt_t prefactor = qqrd2e * qtmp * q[j] / r; + forcecoul = prefactor * (erfc + EWALD_F * grij * expm2); + if (EFLAG) ecoul = prefactor * erfc; + + const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])* + prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; #ifdef INTEL_ALLOW_TABLE - } else { - float rsq_lookup = rsq; - const int itable = (__intel_castf32_u32(rsq_lookup) & - ncoulmask) >> ncoulshiftbits; - const flt_t fraction = (rsq_lookup - table[itable].r) * - table[itable].dr; - - const flt_t tablet = table[itable].f + - fraction * table[itable].df; - forcecoul = qtmp * q[j] * tablet; - if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] + - fraction * detable[itable]); - if (sbindex) { - const flt_t table2 = ctable[itable] + - fraction * dctable[itable]; - const flt_t prefactor = qtmp * q[j] * table2; - const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) * - prefactor; - forcecoul -= adjust; - if (EFLAG) ecoul -= adjust; - } + } else { + float rsq_lookup = rsq; + const int itable = (__intel_castf32_u32(rsq_lookup) & + ncoulmask) >> ncoulshiftbits; + const flt_t fraction = (rsq_lookup - table[itable].r) * + table[itable].dr; + + const flt_t tablet = table[itable].f + + fraction * table[itable].df; + forcecoul = qtmp * q[j] * tablet; + if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] + + fraction * detable[itable]); + if (sbindex) { + const flt_t table2 = ctable[itable] + + fraction * dctable[itable]; + const flt_t prefactor = qtmp * q[j] * table2; + const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) * + prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + } } #endif - #ifdef INTEL_VMASK - if (rsq < cut_ljsq) { - #endif + #ifdef INTEL_VMASK + if (rsq < cut_ljsq) { + #endif flt_t r6inv = r2inv * r2inv * r2inv; forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y); if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w); - #ifdef INTEL_VMASK - if (rsq > cut_lj_innersq) { - #endif + #ifdef INTEL_VMASK + if (rsq > cut_lj_innersq) { + #endif const flt_t drsq = cut_ljsq - rsq; const flt_t cut2 = (rsq - cut_lj_innersq) * drsq; const flt_t switch1 = drsq * (drsq * drsq + (flt_t)3.0 * cut2) * inv_denom_lj; const flt_t switch2 = (flt_t)12.0 * rsq * cut2 * inv_denom_lj; if (EFLAG) { - #ifndef INTEL_VMASK - if (rsq > cut_lj_innersq) { - #endif + #ifndef INTEL_VMASK + if (rsq > cut_lj_innersq) { + #endif forcelj = forcelj * switch1 + evdwl * switch2; evdwl *= switch1; - #ifndef INTEL_VMASK - } - #endif + #ifndef INTEL_VMASK + } + #endif } else { const flt_t philj = r6inv * (lji[jtype].z*r6inv - lji[jtype].w); - #ifndef INTEL_VMASK - if (rsq > cut_lj_innersq) - #endif + #ifndef INTEL_VMASK + if (rsq > cut_lj_innersq) + #endif forcelj = forcelj * switch1 + philj * switch2; } - #ifdef INTEL_VMASK - } - #endif + #ifdef INTEL_VMASK + } + #endif if (sbindex) { const flt_t factor_lj = special_lj[sbindex]; forcelj *= factor_lj; if (EFLAG) evdwl *= factor_lj; } - #ifdef INTEL_VMASK - } - #else - if (rsq > cut_coulsq) { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; } - if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; } - #endif - - const flt_t delx = tdelx[jj]; - const flt_t dely = tdely[jj]; - const flt_t delz = tdelz[jj]; - const flt_t fpair = (forcecoul + forcelj) * r2inv; - fxtmp += delx * fpair; - fytmp += dely * fpair; - fztmp += delz * fpair; - if (NEWTON_PAIR || j < nlocal) { - f[j].x -= delx * fpair; - f[j].y -= dely * fpair; - f[j].z -= delz * fpair; - } - - if (EVFLAG) { - flt_t ev_pre = (flt_t)0; - if (NEWTON_PAIR || i < nlocal) - ev_pre += (flt_t)0.5; - if (NEWTON_PAIR || j < nlocal) - ev_pre += (flt_t)0.5; - - if (EFLAG) { - sevdwl += ev_pre * evdwl; - secoul += ev_pre * ecoul; - if (eatom) { - if (NEWTON_PAIR || i < nlocal) - fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; - if (NEWTON_PAIR || j < nlocal) - f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; - } - } - - IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, - delx, dely, delz); - } - } // for jj - f[i].x += fxtmp; - f[i].y += fytmp; - f[i].z += fztmp; + #ifdef INTEL_VMASK + } + #else + if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; } + #endif - IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp); + const flt_t fpair = (forcecoul + forcelj) * r2inv; + const flt_t fpx = fpair * tdelx[jj]; + fxtmp += fpx; + if (NEWTON_PAIR) f[j].x -= fpx; + const flt_t fpy = fpair * tdely[jj]; + fytmp += fpy; + if (NEWTON_PAIR) f[j].y -= fpy; + const flt_t fpz = fpair * tdelz[jj]; + fztmp += fpz; + if (NEWTON_PAIR) f[j].z -= fpz; + + if (EFLAG) { + sevdwl += evdwl; + secoul += ecoul; + if (eatom) { + fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; + if (NEWTON_PAIR) + f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; + } + } + if (NEWTON_PAIR == 0) + IP_PRE_ev_tally_nborv(vflag, tdelx[jj], tdely[jj], tdelz[jj], + fpx, fpy, fpz); + } // for jj + if (NEWTON_PAIR) { + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } else { + f[i].x = fxtmp; + f[i].y = fytmp; + f[i].z = fztmp; + } + IP_PRE_ev_tally_atomq(NEWTON_PAIR, EFLAG, vflag, f, fwtmp); } // for ii - #ifndef _LMP_INTEL_OFFLOAD - if (vflag == 2) - #endif - { - #if defined(_OPENMP) - #pragma omp barrier - #endif - IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, - nlocal, minlocal, nthreads, f_start, f_stride, - x, offload); - } + IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start, + f_stride, x, offload, vflag, ov0, ov1, ov2, ov3, + ov4, ov5); } // end of omp parallel region - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = oecoul; + + IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + if (NEWTON_PAIR == 0) { + oevdwl *= (acc_t)0.5; + oecoul *= (acc_t)0.5; } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; + ev_global[0] = oevdwl; + ev_global[1] = oecoul; + } + if (vflag) { + if (NEWTON_PAIR == 0) { + ov0 *= (acc_t)0.5; + ov1 *= (acc_t)0.5; + ov2 *= (acc_t)0.5; + ov3 *= (acc_t)0.5; + ov4 *= (acc_t)0.5; + ov5 *= (acc_t)0.5; } + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) *timer_compute = MIC_Wtime() - *timer_compute; @@ -503,7 +494,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); else fix->add_result_array(f_start, 0, offload); @@ -514,6 +505,10 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag, void PairLJCharmmCoulLongIntel::init_style() { PairLJCharmmCoulLong::init_style(); + if (force->newton_pair == 0) { + neighbor->requests[neighbor->nrequest-1]->half = 0; + neighbor->requests[neighbor->nrequest-1]->full = 1; + } neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); @@ -541,11 +536,6 @@ template <class flt_t, class acc_t> void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t,acc_t> *buffers) { - int tp1 = atom->ntypes + 1; - int ntable = 1; - if (ncoultablebits) - for (int i = 0; i < ncoultablebits; i++) ntable *= 2; - int off_ccache = 0; #ifdef _LMP_INTEL_OFFLOAD if (_cop >= 0) off_ccache = 1; @@ -553,6 +543,11 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, buffers->grow_ccache(off_ccache, comm->nthreads, 1); _ccache_stride = buffers->ccache_stride(); + int tp1 = atom->ntypes + 1; + int ntable = 1; + if (ncoultablebits) + for (int i = 0; i < ncoultablebits; i++) ntable *= 2; + fc.set_ntypes(tp1, ntable, memory, _cop); buffers->set_ntypes(tp1); flt_t **cutneighsq = buffers->get_cutneighsq(); @@ -561,7 +556,7 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, double cut, cutneigh; if (cut_lj > cut_coul) error->all(FLERR, - "Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic"); + "Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic"); for (int i = 1; i <= atom->ntypes; i++) { for (int j = i; j <= atom->ntypes; j++) { if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { @@ -642,7 +637,7 @@ template <class flt_t> void PairLJCharmmCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, const int ntable, Memory *memory, - const int cop) { + const int cop) { if ( (ntypes != _ntypes || ntable != _ntable) ) { if (_ntypes > 0) { #ifdef _LMP_INTEL_OFFLOAD @@ -658,12 +653,12 @@ void PairLJCharmmCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL && otable != NULL && oetable != NULL && odetable != NULL && octable != NULL && odctable != NULL && ospecial_coul != NULL && - cop >= 0) { + cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \ - nocopy(ocutsq, olj: alloc_if(0) free_if(1)) \ - nocopy(otable: alloc_if(0) free_if(1)) \ - nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1)) + nocopy(ocutsq, olj: alloc_if(0) free_if(1)) \ + nocopy(otable: alloc_if(0) free_if(1)) \ + nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1)) } #endif @@ -699,7 +694,7 @@ void PairLJCharmmCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL && otable !=NULL && oetable != NULL && odetable != NULL && octable != NULL && odctable != NULL && ospecial_coul != NULL && - cop >= 0) { + cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \ nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \ diff --git a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h index 6a207d8400dde538cada15e0e32a477c00da2367..1b13d784971667c7f68b48afbe2e057013e0cb5e 100644 --- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h +++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h @@ -48,10 +48,10 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong { template <class flt_t, class acc_t> void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); - template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> + template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc, const int astart, const int aend); + IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc, const int astart, const int aend); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, @@ -75,7 +75,7 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong { ~ForceConst() { set_ntypes(0,0,NULL,_cop); } void set_ntypes(const int ntypes, const int ntable, Memory *memory, - const int cop); + const int cop); private: int _ntypes, _ntable, _cop; diff --git a/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp index f26ff724c813855ba74fcec586931e63a8c442ae..e9775d6ec5c96b97bcd89ae39925796474bdca86 100644 --- a/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp +++ b/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp @@ -68,8 +68,8 @@ void PairLJCutCoulLongIntel::compute(int eflag, int vflag) template <class flt_t, class acc_t> void PairLJCutCoulLongIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) { ev_setup(eflag,vflag); @@ -83,57 +83,50 @@ void PairLJCutCoulLongIntel::compute(int eflag, int vflag, if (_lrt == 0 && ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, - nthreads, sizeof(ATOM_T)); + packthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom,ito,ago); } fix->stop_watch(TIME_PACK); } - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - if (force->newton_pair) { - eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); - } + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + if (force->newton_pair) { + eval<1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1>(0, ovflag, buffers, fc, host_start, inum); } else { - if (force->newton_pair) { - eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); - } + eval<1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { - eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<0,0,0>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0>(0, 0, buffers, fc, host_start, inum); + eval<0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0>(0, ovflag, buffers, fc, host_start, inum); } } } /* ---------------------------------------------------------------------- */ -template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> +template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc, - const int astart, const int aend) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc, + const int astart, const int aend) { const int inum = aend - astart; if (inum == 0) return; @@ -167,11 +160,19 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, const int ntypes = atom->ntypes + 1; const int eatom = this->eflag_atom; + flt_t * _noalias const ccachex = buffers->get_ccachex(); + flt_t * _noalias const ccachey = buffers->get_ccachey(); + flt_t * _noalias const ccachez = buffers->get_ccachez(); + flt_t * _noalias const ccachew = buffers->get_ccachew(); + int * _noalias const ccachei = buffers->get_ccachei(); + int * _noalias const ccachej = buffers->get_ccachej(); + const int ccache_stride = _ccache_stride; + // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, - buffers, offload, fix, separate_flag, - x_size, q_size, ev_size, f_stride); + IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; @@ -204,8 +205,10 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, in(x:length(x_size) alloc_if(0) free_if(0)) \ in(q:length(q_size) alloc_if(0) free_if(0)) \ in(overflow:length(0) alloc_if(0) free_if(0)) \ + in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \ + in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \ in(astart,nthreads,qqrd2e,g_ewald,inum,nall,ntypes,vflag,eatom) \ - in(f_stride,nlocal,minlocal,separate_flag,offload) \ + in(ccache_stride,f_stride,nlocal,minlocal,separate_flag,offload) \ out(f_start:length(f_stride) alloc_if(0) free_if(0)) \ out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ @@ -217,30 +220,37 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, #endif IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, - f_stride, x, q); + f_stride, x, q); acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = oecoul = (acc_t)0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; - } + if (EFLAG) oevdwl = oecoul = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; // loop over neighbors of my atoms #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int iifrom, iito, tid; - IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + int iifrom, iip, iito, tid; + IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads); iifrom += astart; iito += astart; - FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride); - memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); - - for (int i = iifrom; i < iito; ++i) { + int foff; + if (NEWTON_PAIR) foff = tid * f_stride - minlocal; + else foff = -minlocal; + FORCE_T * _noalias const f = f_start + foff; + if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + + const int toffs = tid * ccache_stride; + flt_t * _noalias const tdelx = ccachex + toffs; + flt_t * _noalias const tdely = ccachey + toffs; + flt_t * _noalias const tdelz = ccachez + toffs; + flt_t * _noalias const trsq = ccachew + toffs; + int * _noalias const tj = ccachei + toffs; + int * _noalias const tjtype = ccachej + toffs; + + for (int i = iifrom; i < iito; i += iip) { const int itype = x[i].w; const int ptr_off = itype * ntypes; @@ -251,100 +261,112 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, const int jnum = numneigh[i]; acc_t fxtmp,fytmp,fztmp,fwtmp; - acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5; + acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5; const flt_t xtmp = x[i].x; const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; const flt_t qtmp = q[i]; fxtmp = fytmp = fztmp = (acc_t)0; - if (EVFLAG) { - if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; - if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; - } + if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0; + if (NEWTON_PAIR == 0) + if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; + int ej = 0; #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \ - sv0, sv1, sv2, sv3, sv4, sv5) + #pragma vector aligned + #pragma ivdep #endif for (int jj = 0; jj < jnum; jj++) { - flt_t forcecoul, forcelj, evdwl, ecoul; - forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0; - - const int sbindex = jlist[jj] >> SBBITS & 3; const int j = jlist[jj] & NEIGHMASK; - const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; const int jtype = x[j].w; const flt_t rsq = delx * delx + dely * dely + delz * delz; + if (rsq < c_forcei[jtype].cutsq) { + trsq[ej]=rsq; + tdelx[ej]=delx; + tdely[ej]=dely; + tdelz[ej]=delz; + tjtype[ej]=jtype; + tj[ej]=jlist[jj]; + ej++; + } + } + + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \ + sv0, sv1, sv2, sv3, sv4, sv5) + #endif + for (int jj = 0; jj < ej; jj++) { + flt_t forcecoul, forcelj, evdwl, ecoul; + forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0; + + const int j = tj[jj] & NEIGHMASK; + const int sbindex = tj[jj] >> SBBITS & 3; + const int jtype = tjtype[jj]; + const flt_t rsq = trsq[jj]; const flt_t r2inv = (flt_t)1.0 / rsq; - #ifdef INTEL_VMASK - if (rsq < c_forcei[jtype].cutsq) { + #ifdef INTEL_ALLOW_TABLE + if (!ncoultablebits || rsq <= tabinnersq) { #endif - #ifdef INTEL_ALLOW_TABLE - if (!ncoultablebits || rsq <= tabinnersq) { - #endif - const flt_t A1 = 0.254829592; - const flt_t A2 = -0.284496736; - const flt_t A3 = 1.421413741; - const flt_t A4 = -1.453152027; - const flt_t A5 = 1.061405429; - const flt_t EWALD_F = 1.12837917; - const flt_t INV_EWALD_P = 1.0 / 0.3275911; - - const flt_t r = (flt_t)1.0 / sqrt(r2inv); - const flt_t grij = g_ewald * r; - const flt_t expm2 = exp(-grij * grij); - const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); - const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; - const flt_t prefactor = qqrd2e * qtmp * q[j] / r; - forcecoul = prefactor * (erfc + EWALD_F * grij * expm2); - if (EFLAG) ecoul = prefactor * erfc; - - const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])* - prefactor; - forcecoul -= adjust; - if (EFLAG) ecoul -= adjust; - - #ifdef INTEL_ALLOW_TABLE - } else { - float rsq_lookup = rsq; - const int itable = (__intel_castf32_u32(rsq_lookup) & - ncoulmask) >> ncoulshiftbits; - const flt_t fraction = (rsq_lookup - table[itable].r) * - table[itable].dr; - - const flt_t tablet = table[itable].f + - fraction * table[itable].df; - forcecoul = qtmp * q[j] * tablet; - if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] + - fraction * detable[itable]); - if (sbindex) { - const flt_t table2 = ctable[itable] + - fraction * dctable[itable]; - const flt_t prefactor = qtmp * q[j] * table2; - const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) * - prefactor; - forcecoul -= adjust; - if (EFLAG) ecoul -= adjust; - } + const flt_t A1 = 0.254829592; + const flt_t A2 = -0.284496736; + const flt_t A3 = 1.421413741; + const flt_t A4 = -1.453152027; + const flt_t A5 = 1.061405429; + const flt_t EWALD_F = 1.12837917; + const flt_t INV_EWALD_P = 1.0 / 0.3275911; + + const flt_t r = (flt_t)1.0 / sqrt(r2inv); + const flt_t grij = g_ewald * r; + const flt_t expm2 = exp(-grij * grij); + const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij); + const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + const flt_t prefactor = qqrd2e * qtmp * q[j] / r; + forcecoul = prefactor * (erfc + EWALD_F * grij * expm2); + if (EFLAG) ecoul = prefactor * erfc; + + const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])* + prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; + + #ifdef INTEL_ALLOW_TABLE + } else { + float rsq_lookup = rsq; + const int itable = (__intel_castf32_u32(rsq_lookup) & + ncoulmask) >> ncoulshiftbits; + const flt_t fraction = (rsq_lookup - table[itable].r) * + table[itable].dr; + + const flt_t tablet = table[itable].f + + fraction * table[itable].df; + forcecoul = qtmp * q[j] * tablet; + if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] + + fraction * detable[itable]); + if (sbindex) { + const flt_t table2 = ctable[itable] + + fraction * dctable[itable]; + const flt_t prefactor = qtmp * q[j] * table2; + const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) * + prefactor; + forcecoul -= adjust; + if (EFLAG) ecoul -= adjust; } - #endif - #ifdef INTEL_VMASK - } - #endif - - #ifdef INTEL_VMASK - if (rsq < c_forcei[jtype].cut_ljsq) { - #endif + } + #endif + + #ifdef INTEL_VMASK + if (rsq < c_forcei[jtype].cut_ljsq) { + #endif flt_t r6inv = r2inv * r2inv * r2inv; forcelj = r6inv * (c_forcei[jtype].lj1 * r6inv - - c_forcei[jtype].lj2); + c_forcei[jtype].lj2); if (EFLAG) evdwl = r6inv*(c_energyi[jtype].lj3 * r6inv - c_energyi[jtype].lj4) - c_energyi[jtype].offset; @@ -354,83 +376,82 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, forcelj *= factor_lj; if (EFLAG) evdwl *= factor_lj; } - #ifdef INTEL_VMASK - } - #else - if (rsq > c_forcei[jtype].cutsq) - { forcecoul = (flt_t)0.0; ecoul = (flt_t)0.0; } - if (rsq > c_forcei[jtype].cut_ljsq) - { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; } - #endif - - #ifdef INTEL_VMASK - if (rsq < c_forcei[jtype].cutsq) { - #endif - const flt_t fpair = (forcecoul + forcelj) * r2inv; - fxtmp += delx * fpair; - fytmp += dely * fpair; - fztmp += delz * fpair; - if (NEWTON_PAIR || j < nlocal) { - f[j].x -= delx * fpair; - f[j].y -= dely * fpair; - f[j].z -= delz * fpair; - } + #ifdef INTEL_VMASK + } + #else + if (rsq > c_forcei[jtype].cut_ljsq) + { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; } + #endif - if (EVFLAG) { - flt_t ev_pre = (flt_t)0; - if (NEWTON_PAIR || i < nlocal) - ev_pre += (flt_t)0.5; - if (NEWTON_PAIR || j < nlocal) - ev_pre += (flt_t)0.5; - - if (EFLAG) { - sevdwl += ev_pre * evdwl; - secoul += ev_pre * ecoul; - if (eatom) { - if (NEWTON_PAIR || i < nlocal) - fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; - if (NEWTON_PAIR || j < nlocal) - f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; - } - } - IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz); + const flt_t fpair = (forcecoul + forcelj) * r2inv; + const flt_t fpx = fpair * tdelx[jj]; + fxtmp += fpx; + if (NEWTON_PAIR) f[j].x -= fpx; + const flt_t fpy = fpair * tdely[jj]; + fytmp += fpy; + if (NEWTON_PAIR) f[j].y -= fpy; + const flt_t fpz = fpair * tdelz[jj]; + fztmp += fpz; + if (NEWTON_PAIR) f[j].z -= fpz; + + if (EFLAG) { + sevdwl += evdwl; + secoul += ecoul; + if (eatom) { + fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; + if (NEWTON_PAIR) + f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul; } - #ifdef INTEL_VMASK - } - #endif + } + if (NEWTON_PAIR == 0) + IP_PRE_ev_tally_nborv(vflag, tdelx[jj], tdely[jj], tdelz[jj], + fpx, fpy, fpz); } // for jj - f[i].x += fxtmp; - f[i].y += fytmp; - f[i].z += fztmp; - IP_PRE_ev_tally_atomq(EVFLAG, EFLAG, vflag, f, fwtmp); + if (NEWTON_PAIR) { + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } else { + f[i].x = fxtmp; + f[i].y = fytmp; + f[i].z = fztmp; + } + + IP_PRE_ev_tally_atomq(NEWTON_PAIR, EFLAG, vflag, f, fwtmp); } // for ii - #ifndef _LMP_INTEL_OFFLOAD - if (vflag == 2) - #endif - { - #if defined(_OPENMP) - #pragma omp barrier - #endif - IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, - nlocal, minlocal, nthreads, f_start, f_stride, - x, offload); - } + IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start, + f_stride, x, offload, vflag, ov0, ov1, ov2, ov3, + ov4, ov5); } // end of omp parallel region - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = oecoul; + + IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + if (NEWTON_PAIR == 0) { + oevdwl *= (acc_t)0.5; + oecoul *= (acc_t)0.5; } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; + ev_global[0] = oevdwl; + ev_global[1] = oecoul; + } + if (vflag) { + if (NEWTON_PAIR == 0) { + ov0 *= (acc_t)0.5; + ov1 *= (acc_t)0.5; + ov2 *= (acc_t)0.5; + ov3 *= (acc_t)0.5; + ov4 *= (acc_t)0.5; + ov5 *= (acc_t)0.5; } + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) *timer_compute = MIC_Wtime() - *timer_compute; @@ -442,7 +463,7 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); else fix->add_result_array(f_start, 0, offload); @@ -453,6 +474,10 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag, void PairLJCutCoulLongIntel::init_style() { PairLJCutCoulLong::init_style(); + if (force->newton_pair == 0) { + neighbor->requests[neighbor->nrequest-1]->half = 0; + neighbor->requests[neighbor->nrequest-1]->full = 1; + } neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); @@ -480,6 +505,13 @@ template <class flt_t, class acc_t> void PairLJCutCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, IntelBuffers<flt_t,acc_t> *buffers) { + int off_ccache = 0; + #ifdef _LMP_INTEL_OFFLOAD + if (_cop >= 0) off_ccache = 1; + #endif + buffers->grow_ccache(off_ccache, comm->nthreads, 1); + _ccache_stride = buffers->ccache_stride(); + int tp1 = atom->ntypes + 1; int ntable = 1; if (ncoultablebits) @@ -514,6 +546,9 @@ void PairLJCutCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, for (int i = 0; i < tp1; i++) { for (int j = 0; j < tp1; j++) { + if (cutsq[i][j] < cut_ljsq[i][j]) + error->all(FLERR, + "Intel variant of lj/cut/coul/long expects lj cutoff<=coulombic"); fc.c_force[i][j].cutsq = cutsq[i][j]; fc.c_force[i][j].cut_ljsq = cut_ljsq[i][j]; fc.c_force[i][j].lj1 = lj1[i][j]; @@ -563,9 +598,9 @@ void PairLJCutCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void PairLJCutCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, - const int ntable, - Memory *memory, - const int cop) { + const int ntable, + Memory *memory, + const int cop) { if ( (ntypes != _ntypes || ntable != _ntable) ) { if (_ntypes > 0) { #ifdef _LMP_INTEL_OFFLOAD @@ -584,9 +619,9 @@ void PairLJCutCoulLongIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, ospecial_coul != NULL && _cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \ - nocopy(oc_force, oc_energy: alloc_if(0) free_if(1)) \ - nocopy(otable: alloc_if(0) free_if(1)) \ - nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1)) + nocopy(oc_force, oc_energy: alloc_if(0) free_if(1)) \ + nocopy(otable: alloc_if(0) free_if(1)) \ + nocopy(oetable, odetable, octable, odctable: alloc_if(0) free_if(1)) } #endif diff --git a/src/USER-INTEL/pair_lj_cut_coul_long_intel.h b/src/USER-INTEL/pair_lj_cut_coul_long_intel.h index dad73d18bd4c3389cd2009ad36bd9c5cb867d7d6..288a6a7bc4938aa710c3a036f5568e94964bccab 100644 --- a/src/USER-INTEL/pair_lj_cut_coul_long_intel.h +++ b/src/USER-INTEL/pair_lj_cut_coul_long_intel.h @@ -42,16 +42,16 @@ class PairLJCutCoulLongIntel : public PairLJCutCoulLong { private: FixIntel *fix; - int _cop, _lrt; + int _cop, _lrt, _ccache_stride; template <class flt_t> class ForceConst; template <class flt_t, class acc_t> void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); - template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> + template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc, const int astart, const int aend); + IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc, const int astart, const int aend); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, @@ -76,7 +76,7 @@ class PairLJCutCoulLongIntel : public PairLJCutCoulLong { ~ForceConst() { set_ntypes(0,0,NULL,_cop); } void set_ntypes(const int ntypes, const int ntable, Memory *memory, - const int cop); + const int cop); private: int _ntypes, _ntable, _cop; diff --git a/src/USER-INTEL/pair_lj_cut_intel.cpp b/src/USER-INTEL/pair_lj_cut_intel.cpp index dd08dc023c295d6f4f21e8f65007f69cf4aff002..4871821842dd327e8d2da1e1a0bb02a8e4146ae9 100644 --- a/src/USER-INTEL/pair_lj_cut_intel.cpp +++ b/src/USER-INTEL/pair_lj_cut_intel.cpp @@ -75,85 +75,64 @@ void PairLJCutIntel::compute(int eflag, int vflag, if (ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, - nthreads, sizeof(ATOM_T)); + packthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom,ito,ago); } fix->stop_watch(TIME_PACK); } + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; if (_onetype) { - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - if (force->newton_pair) { - eval<1,1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,1,1,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1,0>(0, ovflag, buffers, fc, host_start, inum); - } + if (eflag) { + if (force->newton_pair) { + eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); } else { - if (force->newton_pair) { - eval<1,1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,1,0,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,0,0>(0, ovflag, buffers, fc, host_start, inum); - } + eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { - eval<1,0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<1,0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<1,0,0,0>(1, 0, buffers, fc, 0, offload_end); - eval<1,0,0,0>(0, 0, buffers, fc, host_start, inum); + eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum); } } } else { - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - if (force->newton_pair) { - eval<0,1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<0,1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<0,1,1,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<0,1,1,0>(0, ovflag, buffers, fc, host_start, inum); - } + if (eflag) { + if (force->newton_pair) { + eval<0,1,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1,1>(0, ovflag, buffers, fc, host_start, inum); } else { - if (force->newton_pair) { - eval<0,1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<0,1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<0,1,0,0>(1, ovflag, buffers, fc, 0, offload_end); - eval<0,1,0,0>(0, ovflag, buffers, fc, host_start, inum); - } + eval<0,1,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,1,0>(0, ovflag, buffers, fc, host_start, inum); } } else { if (force->newton_pair) { - eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<0,0,1>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0,1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum); + eval<0,0,0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0,0,0>(0, ovflag, buffers, fc, host_start, inum); } } } } -template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, - class acc_t> +template <int ONETYPE, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void PairLJCutIntel::eval(const int offload, const int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc, @@ -181,9 +160,9 @@ void PairLJCutIntel::eval(const int offload, const int vflag, // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, - buffers, offload, fix, separate_flag, - x_size, q_size, ev_size, f_stride); + IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; @@ -197,48 +176,47 @@ void PairLJCutIntel::eval(const int offload, const int vflag, #endif IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, - f_stride, x, 0); + f_stride, x, 0); acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = (acc_t)0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; - } + if (EFLAG) oevdwl = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; // loop over neighbors of my atoms #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int iifrom, iito, tid; - IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + int iifrom, iip, iito, tid; + IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads); iifrom += astart; iito += astart; - FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride); - memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); + int foff; + if (NEWTON_PAIR) foff = tid * f_stride - minlocal; + else foff = -minlocal; + FORCE_T * _noalias const f = f_start + foff; + if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); flt_t cutsq, lj1, lj2, lj3, lj4, offset; if (ONETYPE) { - cutsq = ljc12o[3].cutsq; - lj1 = ljc12o[3].lj1; - lj2 = ljc12o[3].lj2; - lj3 = lj34[3].lj3; - lj4 = lj34[3].lj4; - offset = ljc12o[3].offset; + cutsq = ljc12o[3].cutsq; + lj1 = ljc12o[3].lj1; + lj2 = ljc12o[3].lj2; + lj3 = lj34[3].lj3; + lj4 = lj34[3].lj4; + offset = ljc12o[3].offset; } - for (int i = iifrom; i < iito; ++i) { + for (int i = iifrom; i < iito; i += iip) { int itype, ptr_off; const FC_PACKED1_T * _noalias ljc12oi; const FC_PACKED2_T * _noalias lj34i; - if (!ONETYPE) { - itype = x[i].w; + if (!ONETYPE) { + itype = x[i].w; ptr_off = itype * ntypes; ljc12oi = ljc12o + ptr_off; lj34i = lj34 + ptr_off; - } + } const int * _noalias const jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; @@ -250,134 +228,134 @@ void PairLJCutIntel::eval(const int offload, const int vflag, const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; fxtmp = fytmp = fztmp = (acc_t)0; - if (EVFLAG) { - if (EFLAG) fwtmp = sevdwl = (acc_t)0; + if (EFLAG) fwtmp = sevdwl = (acc_t)0; + if (NEWTON_PAIR == 0) if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; - } #if defined(LMP_SIMD_COMPILER) #pragma vector aligned - #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ - sv0, sv1, sv2, sv3, sv4, sv5) + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ + sv0, sv1, sv2, sv3, sv4, sv5) #endif for (int jj = 0; jj < jnum; jj++) { flt_t forcelj, evdwl; forcelj = evdwl = (flt_t)0.0; - int j, jtype, sbindex; - if (!ONETYPE) { - sbindex = jlist[jj] >> SBBITS & 3; - j = jlist[jj] & NEIGHMASK; - } else - j = jlist[jj]; + int j, jtype, sbindex; + if (!ONETYPE) { + sbindex = jlist[jj] >> SBBITS & 3; + j = jlist[jj] & NEIGHMASK; + } else + j = jlist[jj]; const flt_t delx = xtmp - x[j].x; const flt_t dely = ytmp - x[j].y; const flt_t delz = ztmp - x[j].z; if (!ONETYPE) { - jtype = x[j].w; + jtype = x[j].w; cutsq = ljc12oi[jtype].cutsq; - } + } const flt_t rsq = delx * delx + dely * dely + delz * delz; #ifdef INTEL_VMASK if (rsq < cutsq) { - #endif + #endif flt_t factor_lj; - if (!ONETYPE) factor_lj = special_lj[sbindex]; + if (!ONETYPE) factor_lj = special_lj[sbindex]; flt_t r2inv = 1.0 / rsq; flt_t r6inv = r2inv * r2inv * r2inv; #ifndef INTEL_VMASK - if (rsq > cutsq) r6inv = (flt_t)0.0; - #endif - if (!ONETYPE) { - lj1 = ljc12oi[jtype].lj1; - lj2 = ljc12oi[jtype].lj2; - } + if (rsq > cutsq) r6inv = (flt_t)0.0; + #endif + if (!ONETYPE) { + lj1 = ljc12oi[jtype].lj1; + lj2 = ljc12oi[jtype].lj2; + } forcelj = r6inv * (lj1 * r6inv - lj2); flt_t fpair; - if (!ONETYPE) - fpair = factor_lj * forcelj * r2inv; - else - fpair = forcelj * r2inv; - - fxtmp += delx * fpair; - fytmp += dely * fpair; - fztmp += delz * fpair; - if (NEWTON_PAIR || j < nlocal) { - f[j].x -= delx * fpair; - f[j].y -= dely * fpair; - f[j].z -= delz * fpair; - } - - if (EVFLAG) { - flt_t ev_pre = (flt_t)0; - if (NEWTON_PAIR || i<nlocal) - ev_pre += (flt_t)0.5; - if (NEWTON_PAIR || j<nlocal) - ev_pre += (flt_t)0.5; - - if (EFLAG) { - if (!ONETYPE) { - lj3 = lj34i[jtype].lj3; - lj4 = lj34i[jtype].lj4; - offset = ljc12oi[jtype].offset; - } - evdwl = r6inv * (lj3 * r6inv - lj4); - #ifdef INTEL_VMASK - evdwl -= offset; - #else - if (rsq < cutsq) evdwl -= offset; - #endif - if (!ONETYPE) evdwl *= factor_lj; - sevdwl += ev_pre*evdwl; - if (eatom) { - if (NEWTON_PAIR || i < nlocal) - fwtmp += 0.5 * evdwl; - if (NEWTON_PAIR || j < nlocal) - f[j].w += 0.5 * evdwl; - } + if (!ONETYPE) + fpair = factor_lj * forcelj * r2inv; + else + fpair = forcelj * r2inv; + + const flt_t fpx = fpair * delx; + fxtmp += fpx; + if (NEWTON_PAIR) f[j].x -= fpx; + const flt_t fpy = fpair * dely; + fytmp += fpy; + if (NEWTON_PAIR) f[j].y -= fpy; + const flt_t fpz = fpair * delz; + fztmp += fpz; + if (NEWTON_PAIR) f[j].z -= fpz; + + if (EFLAG) { + if (!ONETYPE) { + lj3 = lj34i[jtype].lj3; + lj4 = lj34i[jtype].lj4; + offset = ljc12oi[jtype].offset; + } + evdwl = r6inv * (lj3 * r6inv - lj4); + #ifdef INTEL_VMASK + evdwl -= offset; + #else + if (rsq < cutsq) evdwl -= offset; + #endif + if (!ONETYPE) evdwl *= factor_lj; + sevdwl += evdwl; + if (eatom) { + fwtmp += (flt_t)0.5 * evdwl; + if (NEWTON_PAIR) + f[j].w += (flt_t)0.5 * evdwl; } - - IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, - delx, dely, delz); } + + if (NEWTON_PAIR == 0) + IP_PRE_ev_tally_nborv(vflag, delx, dely, delz, fpx, fpy, fpz); #ifdef INTEL_VMASK } // if rsq #endif } // for jj - f[i].x += fxtmp; - f[i].y += fytmp; - f[i].z += fztmp; + if (NEWTON_PAIR) { + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } else { + f[i].x = fxtmp; + f[i].y = fytmp; + f[i].z = fztmp; + } - IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp); + IP_PRE_ev_tally_atom(NEWTON_PAIR, EFLAG, vflag, f, fwtmp); } // for ii - #ifndef _LMP_INTEL_OFFLOAD - if (vflag == 2) - #endif - { - #if defined(_OPENMP) - #pragma omp barrier - #endif - IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, - nlocal, minlocal, nthreads, f_start, f_stride, - x, offload); - } + IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start, + f_stride, x, offload, vflag, ov0, ov1, ov2, ov3, + ov4, ov5); } // end omp - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = (acc_t)0.0; - } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; + + IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + if (NEWTON_PAIR == 0) oevdwl *= (acc_t)0.5; + ev_global[0] = oevdwl; + ev_global[1] = (acc_t)0.0; + } + if (vflag) { + if (NEWTON_PAIR == 0) { + ov0 *= (acc_t)0.5; + ov1 *= (acc_t)0.5; + ov2 *= (acc_t)0.5; + ov3 *= (acc_t)0.5; + ov4 *= (acc_t)0.5; + ov5 *= (acc_t)0.5; } + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) *timer_compute = MIC_Wtime() - *timer_compute; @@ -389,7 +367,7 @@ void PairLJCutIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); else fix->add_result_array(f_start, 0, offload); @@ -400,6 +378,10 @@ void PairLJCutIntel::eval(const int offload, const int vflag, void PairLJCutIntel::init_style() { PairLJCut::init_style(); + if (force->newton_pair == 0) { + neighbor->requests[neighbor->nrequest-1]->half = 0; + neighbor->requests[neighbor->nrequest-1]->full = 1; + } neighbor->requests[neighbor->nrequest-1]->intel = 1; int ifix = modify->find_fix("package_intel"); @@ -472,7 +454,7 @@ void PairLJCutIntel::pack_force_const(ForceConst<flt_t> &fc, template <class flt_t> void PairLJCutIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, Memory *memory, - const int cop) { + const int cop) { if (ntypes != _ntypes) { if (_ntypes > 0) { fc_packed1 *oljc12o = ljc12o[0]; diff --git a/src/USER-INTEL/pair_lj_cut_intel.h b/src/USER-INTEL/pair_lj_cut_intel.h index a9c77324f3dffc022b2fb89d1621056b03cb6fb1..b577a046580dc0351e2071c4adb16a15739be493 100644 --- a/src/USER-INTEL/pair_lj_cut_intel.h +++ b/src/USER-INTEL/pair_lj_cut_intel.h @@ -45,8 +45,7 @@ class PairLJCutIntel : public PairLJCut { template <class flt_t, class acc_t> void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); - template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, - class acc_t> + template <int ONETYPE, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> void eval(const int offload, const int vflag, IntelBuffers<flt_t,acc_t> * buffers, const ForceConst<flt_t> &fc, const int astart, const int aend); diff --git a/src/USER-INTEL/pair_lj_long_coul_long_intel.cpp b/src/USER-INTEL/pair_lj_long_coul_long_intel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..86929d41ea3332db08e1a1295762b63884ac561d --- /dev/null +++ b/src/USER-INTEL/pair_lj_long_coul_long_intel.cpp @@ -0,0 +1,50 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: William McDoniel (RWTH Aachen University) +------------------------------------------------------------------------- */ + +#include <math.h> +#include "pair_lj_long_coul_long_intel.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "group.h" +#include "kspace.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "memory.h" +#include "suffix.h" + + +using namespace LAMMPS_NS; + +#define C_FORCE_T typename ForceConst<flt_t>::c_force_t +#define C_ENERGY_T typename ForceConst<flt_t>::c_energy_t +#define TABLE_T typename ForceConst<flt_t>::table_t + +PairLJLongCoulLongIntel::PairLJLongCoulLongIntel(LAMMPS *lmp) : + PairLJLongCoulLong(lmp) +{ + suffix_flag |= Suffix::INTEL; + respa_enable = 0; + cut_respa = NULL; +} + + +PairLJLongCoulLongIntel::~PairLJLongCoulLongIntel() +{ +} diff --git a/src/USER-OMP/pair_lj_sf_omp.h b/src/USER-INTEL/pair_lj_long_coul_long_intel.h similarity index 57% rename from src/USER-OMP/pair_lj_sf_omp.h rename to src/USER-INTEL/pair_lj_long_coul_long_intel.h index 92db973b3de96031a1c17c5c0470fea1f9e0670a..b7d3504ecdf073956fdfef9f066b12a0143b977b 100644 --- a/src/USER-OMP/pair_lj_sf_omp.h +++ b/src/USER-INTEL/pair_lj_long_coul_long_intel.h @@ -1,4 +1,4 @@ -/* -*- c++ -*- ---------------------------------------------------------- +/* *- c++ -*- ----------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -12,37 +12,28 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Axel Kohlmeyer (Temple U) + Contributing authors: William McDoniel (RWTH Aachen University) ------------------------------------------------------------------------- */ #ifdef PAIR_CLASS -PairStyle(lj/sf/omp,PairLJShiftedForceOMP) +PairStyle(lj/long/coul/long/intel,PairLJLongCoulLongIntel) #else -#ifndef LMP_PAIR_LJ_SF_OMP_H -#define LMP_PAIR_LJ_SF_OMP_H +#ifndef LMP_PAIR_LJ_LONG_COUL_LONG_INTEL_H +#define LMP_PAIR_LJ_LONG_COUL_LONG_INTEL_H -#include "pair_lj_sf.h" -#include "thr_omp.h" +#include "pair_lj_long_coul_long.h" +#include "fix_intel.h" namespace LAMMPS_NS { + class PairLJLongCoulLongIntel : public PairLJLongCoulLong { + public: + PairLJLongCoulLongIntel(class LAMMPS *); + virtual ~PairLJLongCoulLongIntel(); -class PairLJShiftedForceOMP : public PairLJShiftedForce, public ThrOMP { - - public: - PairLJShiftedForceOMP(class LAMMPS *); - - virtual void compute(int, int); - virtual double memory_usage(); - - private: - template <int EVFLAG, int EFLAG, int NEWTON_PAIR> - void eval(int ifrom, int ito, ThrData * const thr); -}; - + }; } - #endif #endif diff --git a/src/USER-INTEL/pair_sw_intel.cpp b/src/USER-INTEL/pair_sw_intel.cpp index 09e00fd867ec6ca59170243b80d2086d0b4d2172..7a6b7afd92197b22cd26125369aaed8dc8dee090 100644 --- a/src/USER-INTEL/pair_sw_intel.cpp +++ b/src/USER-INTEL/pair_sw_intel.cpp @@ -77,7 +77,7 @@ void PairSWIntel::compute(int eflag, int vflag) { if (fix->precision() == FixIntel::PREC_MODE_MIXED) compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), - force_const_single); + force_const_single); else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) compute<double,double>(eflag, vflag, fix->get_double_buffers(), force_const_double); @@ -109,85 +109,59 @@ void PairSWIntel::compute(int eflag, int vflag, if (ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, - nthreads, sizeof(ATOM_T)); + packthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom, ito, ago); } fix->stop_watch(TIME_PACK); } + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; if (_onetype) { if (_spq) { - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - eval<1,1,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); - eval<1,1,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad); - } else { - eval<1,1,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); - eval<1,1,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad); - } + if (eflag) { + eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); + eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } else { - eval<1,1,0,0>(1, 0, buffers, fc, 0, offload_end, _offload_pad); - eval<1,1,0,0>(0, 0, buffers, fc, host_start, inum, _host_pad); + eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); + eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } } else { - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - eval<0,1,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); - eval<0,1,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad); - } else { - eval<0,1,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); - eval<0,1,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad); - } + if (eflag) { + eval<0,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); + eval<0,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } else { - eval<0,1,0,0>(1, 0, buffers, fc, 0, offload_end, _offload_pad); - eval<0,1,0,0>(0, 0, buffers, fc, host_start, inum, _host_pad); + eval<0,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); + eval<0,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } } } else { if (_spq) { - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - eval<1,0,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); - eval<1,0,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad); - } else { - eval<1,0,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); - eval<1,0,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad); - } + if (eflag) { + eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); + eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } else { - eval<1,0,0,0>(1, 0, buffers, fc, 0, offload_end, _offload_pad); - eval<1,0,0,0>(0, 0, buffers, fc, host_start, inum, _host_pad); + eval<1,0,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); + eval<1,0,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } } else { - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - eval<0,0,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); - eval<0,0,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad); - } else { - eval<0,0,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); - eval<0,0,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad); - } + if (eflag) { + eval<0,0,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); + eval<0,0,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } else { - eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end, _offload_pad); - eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum, _host_pad); + eval<0,0,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad); + eval<0,0,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad); } } } @@ -196,11 +170,11 @@ void PairSWIntel::compute(int eflag, int vflag, /* ---------------------------------------------------------------------- */ #ifndef LMP_USE_AVXCD -template <int SPQ,int ONETYPE,int EVFLAG,int EFLAG,class flt_t,class acc_t> +template <int SPQ,int ONETYPE,int EFLAG,class flt_t,class acc_t> void PairSWIntel::eval(const int offload, const int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc, const int astart, - const int aend, const int pad_width) + const int aend, const int pad_width) { const int inum = aend - astart; if (inum == 0) return; @@ -235,7 +209,7 @@ void PairSWIntel::eval(const int offload, const int vflag, // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, /* NEWTON_PAIR*/ 1, EVFLAG, EFLAG, vflag, + IP_PRE_get_transfern(ago, /* NEWTON_PAIR*/ 1, EFLAG, vflag, buffers, offload, fix, separate_flag, x_size, q_size, ev_size, f_stride); @@ -276,19 +250,15 @@ void PairSWIntel::eval(const int offload, const int vflag, f_stride, x, 0); acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = (acc_t)0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; - } + if (EFLAG) oevdwl = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int iifrom, iito, tid; - IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads); + int iifrom, iip, iito, tid; + IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads); iifrom += astart; iito += astart; @@ -308,50 +278,49 @@ void PairSWIntel::eval(const int offload, const int vflag, flt_t sigma_gamma, costheta, lambda_epsilon, lambda_epsilon2; if (ONETYPE) { cutsq = p2[3].cutsq; - cut = p2f[3].cut; - sigma = p2f[3].sigma; - c1 = p2f2[3].c1; - c2 = p2f2[3].c2; - c3 = p2f2[3].c3; - c4 = p2f2[3].c4; - sigma_gamma = p2[3].sigma_gamma; - costheta = p3[7].costheta; - lambda_epsilon = p3[7].lambda_epsilon; - lambda_epsilon2 = p3[7].lambda_epsilon2; - if (SPQ == 0) { + cut = p2f[3].cut; + sigma = p2f[3].sigma; + c1 = p2f2[3].c1; + c2 = p2f2[3].c2; + c3 = p2f2[3].c3; + c4 = p2f2[3].c4; + sigma_gamma = p2[3].sigma_gamma; + costheta = p3[7].costheta; + lambda_epsilon = p3[7].lambda_epsilon; + lambda_epsilon2 = p3[7].lambda_epsilon2; + if (SPQ == 0) { powerp = p2f[3].powerp; - powerq = p2f[3].powerq; + powerq = p2f[3].powerq; } - if (EFLAG) { + if (EFLAG) { c5 = p2e[3].c5; - c6 = p2e[3].c6; + c6 = p2e[3].c6; } } - for (int i = iifrom; i < iito; ++i) { + for (int i = iifrom; i < iito; i += iip) { int itype, itype_offset; const flt_t xtmp = x[i].x; const flt_t ytmp = x[i].y; const flt_t ztmp = x[i].z; - if (!ONETYPE) { + if (!ONETYPE) { itype = x[i].w; - itype_offset = itype * ntypes; - } + itype_offset = itype * ntypes; + } const int * _noalias const jlist = firstneigh + cnumneigh[i]; const int jnum = numneigh[i]; - const int jnumhalf = numneighhalf[i]; + const int jnumhalf = numneighhalf[i]; acc_t fxtmp, fytmp, fztmp, fwtmp; - acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; + acc_t sevdwl; fxtmp = fytmp = fztmp = (acc_t)0.0; - if (EVFLAG) { - if (EFLAG) fwtmp = sevdwl = (acc_t)0; - if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0; - } + if (EFLAG) fwtmp = sevdwl = (acc_t)0; - int ejnum = 0, ejnumhalf = 0; + int ejnum = 0, ejnumhalf = 0; + #pragma vector aligned + #pragma ivdep for (int jj = 0; jj < jnum; jj++) { int j = jlist[jj]; j &= NEIGHMASK; @@ -360,117 +329,115 @@ void PairSWIntel::eval(const int offload, const int vflag, const flt_t delz = x[j].z - ztmp; int jtype, ijtype; if (!ONETYPE) { - jtype = x[j].w; - ijtype = itype_offset + jtype; - cutsq = p2[ijtype].cutsq; - } + jtype = x[j].w; + ijtype = itype_offset + jtype; + cutsq = p2[ijtype].cutsq; + } const flt_t rsq1 = delx * delx + dely * dely + delz * delz; if (rsq1 < cutsq) { - tdelx[ejnum] = delx; - tdely[ejnum] = dely; - tdelz[ejnum] = delz; - trsq[ejnum] = rsq1; - tj[ejnum] = j; - if (!ONETYPE) tjtype[ejnum] = jtype; - ejnum++; - if (jj < jnumhalf) ejnumhalf++; - } - } - int ejnum_pad = ejnum; - - while ( (ejnum_pad % pad_width) != 0) { - tdelx[ejnum_pad] = (flt_t)0.0; - tdely[ejnum_pad] = (flt_t)0.0; - tdelz[ejnum_pad] = (flt_t)0.0; - trsq[ejnum_pad] = p2[3].cutsq + (flt_t)1.0; - tj[ejnum_pad] = nall; - if (!ONETYPE) tjtype[ejnum_pad] = 0; - ejnum_pad++; - } - + tdelx[ejnum] = delx; + tdely[ejnum] = dely; + tdelz[ejnum] = delz; + trsq[ejnum] = rsq1; + tj[ejnum] = j; + if (!ONETYPE) tjtype[ejnum] = jtype; + ejnum++; + if (jj < jnumhalf) ejnumhalf++; + } + } + int ejnum_pad = ejnum; + + while ( (ejnum_pad % pad_width) != 0) { + tdelx[ejnum_pad] = (flt_t)0.0; + tdely[ejnum_pad] = (flt_t)0.0; + tdelz[ejnum_pad] = (flt_t)0.0; + trsq[ejnum_pad] = p2[3].cutsq + (flt_t)1.0; + tj[ejnum_pad] = nall; + if (!ONETYPE) tjtype[ejnum_pad] = 0; + ejnum_pad++; + } + #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \ - sv0, sv1, sv2, sv3, sv4, sv5) - #endif + #pragma vector aligned + #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl) + #endif for (int jj = 0; jj < ejnum_pad; jj++) { acc_t fjxtmp, fjytmp, fjztmp, fjtmp; fjxtmp = fjytmp = fjztmp = (acc_t)0.0; if (EFLAG) fjtmp = (acc_t)0.0; - int ijtype; + int ijtype; - const flt_t delx = tdelx[jj]; - const flt_t dely = tdely[jj]; - const flt_t delz = tdelz[jj]; - if (!ONETYPE) ijtype = tjtype[jj] + itype_offset; + if (!ONETYPE) ijtype = tjtype[jj] + itype_offset; const flt_t rsq1 = trsq[jj]; const flt_t rinvsq1 = (flt_t)1.0 / rsq1; const flt_t r1 = (flt_t)1.0/sqrt(rinvsq1); - if (!ONETYPE) cut = p2f[ijtype].cut; + if (!ONETYPE) cut = p2f[ijtype].cut; const flt_t rainv1 = (flt_t)1.0 / (r1 - cut); - - // two-body interactions, skip half of them - flt_t rp, rq; - if (SPQ == 1) { - rp = r1 * r1; - rp *= rp; - rp = (flt_t)1.0 / rp; - rq = (flt_t)1.0; - } else { + + // two-body interactions, skip half of them + flt_t rp, rq; + if (SPQ == 1) { + rp = r1 * r1; + rp *= rp; + rp = (flt_t)1.0 / rp; + rq = (flt_t)1.0; + } else { if (!ONETYPE) { powerp = p2f[ijtype].powerp; - powerq = p2f[ijtype].powerq; + powerq = p2f[ijtype].powerq; } - rp = std::pow(r1, powerp); - rq = std::pow(r1, powerq); - } + rp = std::pow(r1, powerp); + rq = std::pow(r1, powerq); + } - if (!ONETYPE) { + if (!ONETYPE) { sigma = p2f[ijtype].sigma; - c1 = p2f2[ijtype].c1; - c2 = p2f2[ijtype].c2; - c3 = p2f2[ijtype].c3; - c4 = p2f2[ijtype].c4; + c1 = p2f2[ijtype].c1; + c2 = p2f2[ijtype].c2; + c3 = p2f2[ijtype].c3; + c4 = p2f2[ijtype].c4; + } + + const flt_t rainvsq = rainv1 * rainv1 * r1; + flt_t expsrainv = exp(sigma * rainv1); + if (jj >= ejnumhalf) expsrainv = (flt_t)0.0; + const flt_t fpair = (c1 * rp - c2 * rq + (c3 * rp - c4 * rq) * + rainvsq) * expsrainv * rinvsq1; + + const flt_t delx = tdelx[jj]; + const flt_t dely = tdely[jj]; + const flt_t delz = tdelz[jj]; + const flt_t fpx = fpair * delx; + fxtmp -= fpx; + fjxtmp += fpx; + const flt_t fpy = fpair * dely; + fytmp -= fpy; + fjytmp += fpy; + const flt_t fpz = fpair * delz; + fztmp -= fpz; + fjztmp += fpz; + + if (EFLAG) { + flt_t evdwl; + if (!ONETYPE) { + c5 = p2e[ijtype].c5; + c6 = p2e[ijtype].c6; + } + evdwl = (c5 * rp - c6 * rq) * expsrainv; + sevdwl += evdwl; + if (eatom) { + fwtmp += (flt_t)0.5 * evdwl; + fjtmp += (flt_t)0.5 * evdwl; + } } - const flt_t rainvsq = rainv1 * rainv1 * r1; - flt_t expsrainv = exp(sigma * rainv1); - if (jj >= ejnumhalf) expsrainv = (flt_t)0.0; - const flt_t fpair = (c1 * rp - c2 * rq + (c3 * rp - c4 * rq) * - rainvsq) * expsrainv * rinvsq1; - - fxtmp -= delx * fpair; - fytmp -= dely * fpair; - fztmp -= delz * fpair; - fjxtmp += delx * fpair; - fjytmp += dely * fpair; - fjztmp += delz * fpair; - - if (EVFLAG) { - if (EFLAG) { - flt_t evdwl; - if (!ONETYPE) { - c5 = p2e[ijtype].c5; - c6 = p2e[ijtype].c6; - } - evdwl = (c5 * rp - c6 * rq) * expsrainv; - sevdwl += evdwl; - if (eatom) { - fwtmp += (acc_t)0.5 * evdwl; - fjtmp += (acc_t)0.5 * evdwl; - } - } - IP_PRE_ev_tally_nbor(vflag, (flt_t)1.0, fpair, - -delx, -dely, -delz); - } - - /*---------------------------------------------*/ - - int ijkoff; - if (!ONETYPE) { + /*---------------------------------------------*/ + + int ijkoff; + if (!ONETYPE) { sigma_gamma = p2[ijtype].sigma_gamma; - ijkoff = ijtype * ntypes; + ijkoff = ijtype * ntypes; } flt_t gsrainv1 = sigma_gamma * rainv1; @@ -479,15 +446,15 @@ void PairSWIntel::eval(const int offload, const int vflag, for (int kk = 0; kk < ejnum; kk++) { int iktype, ijktype; - if (!ONETYPE) { + if (!ONETYPE) { iktype = tjtype[kk]; - ijktype = ijkoff + iktype; - iktype += itype_offset; - cut = p2[iktype].cut; - sigma_gamma = p2[iktype].sigma_gamma; - costheta = p3[ijktype].costheta; - lambda_epsilon = p3[ijktype].lambda_epsilon; - lambda_epsilon2 = p3[ijktype].lambda_epsilon2; + ijktype = ijkoff + iktype; + iktype += itype_offset; + cut = p2[iktype].cut; + sigma_gamma = p2[iktype].sigma_gamma; + costheta = p3[ijktype].costheta; + lambda_epsilon = p3[ijktype].lambda_epsilon; + lambda_epsilon2 = p3[ijktype].lambda_epsilon2; } flt_t delr2[3]; @@ -496,95 +463,88 @@ void PairSWIntel::eval(const int offload, const int vflag, delr2[2] = tdelz[kk]; const flt_t rsq2 = trsq[kk]; - const flt_t rinvsq2 = (flt_t)1.0 / rsq2; - const flt_t r2 = (flt_t)1.0 / sqrt(rinvsq2); - const flt_t rainv2 = (flt_t)1.0 / (r2 - cut); - const flt_t gsrainv2 = sigma_gamma * rainv2; - const flt_t gsrainvsq2 = gsrainv2 * rainv2 / r2; - const flt_t expgsrainv2 = exp(gsrainv2); + const flt_t rinvsq2 = (flt_t)1.0 / rsq2; + const flt_t r2 = (flt_t)1.0 / sqrt(rinvsq2); + const flt_t rainv2 = (flt_t)1.0 / (r2 - cut); + const flt_t gsrainv2 = sigma_gamma * rainv2; + const flt_t gsrainvsq2 = gsrainv2 * rainv2 / r2; + const flt_t expgsrainv2 = exp(gsrainv2); - const flt_t rinv12 = (flt_t)1.0 / (r1 * r2); - const flt_t cs = (delx * delr2[0] + dely * delr2[1] + + const flt_t rinv12 = (flt_t)1.0 / (r1 * r2); + const flt_t cs = (delx * delr2[0] + dely * delr2[1] + delz * delr2[2]) * rinv12; - const flt_t delcs = cs - costheta; - const flt_t delcssq = delcs*delcs; - - flt_t kfactor; - if (jj == kk || jj >= ejnum) kfactor = (flt_t)0.0; - else kfactor = (flt_t)1.0; - - const flt_t facexp = expgsrainv1*expgsrainv2*kfactor; - const flt_t facrad = lambda_epsilon * facexp * delcssq; - const flt_t frad1 = facrad*gsrainvsq1; - const flt_t frad2 = facrad*gsrainvsq2; - const flt_t facang = lambda_epsilon2 * facexp * delcs; - const flt_t facang12 = rinv12*facang; - const flt_t csfacang = cs*facang; - const flt_t csfac1 = rinvsq1*csfacang; - - const flt_t fjx = delx*(frad1+csfac1)-delr2[0]*facang12; - const flt_t fjy = dely*(frad1+csfac1)-delr2[1]*facang12; - const flt_t fjz = delz*(frad1+csfac1)-delr2[2]*facang12; - - fxtmp -= fjx; - fytmp -= fjy; - fztmp -= fjz; - fjxtmp += fjx; - fjytmp += fjy; - fjztmp += fjz; - - if (EVFLAG) { - if (EFLAG) { - const flt_t evdwl = facrad * (flt_t)0.5; - sevdwl += evdwl; - if (eatom) { - fwtmp += (acc_t)0.33333333 * evdwl; - fjtmp += (acc_t)0.33333333 * facrad; - } - } - IP_PRE_ev_tally_nbor3v(vflag, fjx, fjy, fjz, - delx, dely, delz); - } - } // for kk - const int j = tj[jj]; + const flt_t delcs = cs - costheta; + const flt_t delcssq = delcs*delcs; + + flt_t kfactor; + if (jj == kk || jj >= ejnum) kfactor = (flt_t)0.0; + else kfactor = (flt_t)1.0; + + const flt_t facexp = expgsrainv1*expgsrainv2*kfactor; + const flt_t facrad = lambda_epsilon * facexp * delcssq; + const flt_t frad1 = facrad*gsrainvsq1; + const flt_t frad2 = facrad*gsrainvsq2; + const flt_t facang = lambda_epsilon2 * facexp * delcs; + const flt_t facang12 = rinv12*facang; + const flt_t csfacang = cs*facang; + const flt_t csfac1 = rinvsq1*csfacang; + + const flt_t fjx = delx*(frad1+csfac1)-delr2[0]*facang12; + const flt_t fjy = dely*(frad1+csfac1)-delr2[1]*facang12; + const flt_t fjz = delz*(frad1+csfac1)-delr2[2]*facang12; + + fxtmp -= fjx; + fytmp -= fjy; + fztmp -= fjz; + fjxtmp += fjx; + fjytmp += fjy; + fjztmp += fjz; + + if (EFLAG) { + const flt_t evdwl = facrad * (flt_t)0.5; + sevdwl += evdwl; + if (eatom) { + fwtmp += (acc_t)0.33333333 * evdwl; + fjtmp += (acc_t)0.33333333 * facrad; + } + } + } // for kk + const int j = tj[jj]; f[j].x += fjxtmp; f[j].y += fjytmp; f[j].z += fjztmp; - if (EFLAG) - if (eatom) f[j].w += fjtmp; + if (EFLAG) + if (eatom) f[j].w += fjtmp; } // for jj f[i].x += fxtmp; f[i].y += fytmp; f[i].z += fztmp; - IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp); + + if (EFLAG) { + f[i].w += fwtmp; + oevdwl += sevdwl; + } } // for ii - #ifndef _LMP_INTEL_OFFLOAD - if (vflag == 2) - #endif - { - #if defined(_OPENMP) - #pragma omp barrier - #endif - IP_PRE_fdotr_acc_force(1, EVFLAG, EFLAG, vflag, eatom, nall, - nlocal, minlocal, nthreads, f_start, f_stride, - x, offload); - } + IP_PRE_fdotr_reduce_omp(1, nall, minlocal, nthreads, f_start, f_stride, + x, offload, vflag, ov0, ov1, ov2, ov3, ov4, ov5); } // end omp - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = (acc_t)0.0; - } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; - } + + IP_PRE_fdotr_reduce(1, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + ev_global[0] = oevdwl; + ev_global[1] = (acc_t)0.0; + } + if (vflag) { + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) *timer_compute = MIC_Wtime() - *timer_compute; @@ -595,13 +555,13 @@ void PairSWIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); else fix->add_result_array(f_start, 0, offload); } -#else +#else /* ---------------------------------------------------------------------- @@ -614,11 +574,11 @@ authors for more details. ------------------------------------------------------------------------- */ -template <int SPQ,int ONETYPE,int EVFLAG,int EFLAG,class flt_t,class acc_t> +template <int SPQ,int ONETYPE,int EFLAG,class flt_t,class acc_t> void PairSWIntel::eval(const int offload, const int vflag, IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc, const int astart, - const int aend, const int pad_width) + const ForceConst<flt_t> &fc, const int astart, + const int aend, const int pad_width) { typedef typename SIMD_type<flt_t>::SIMD_vec SIMD_flt_t; typedef typename SIMD_type<acc_t>::SIMD_vec SIMD_acc_t; @@ -659,7 +619,7 @@ void PairSWIntel::eval(const int offload, const int vflag, // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, /* NEWTON_PAIR*/ 1, EVFLAG, EFLAG, vflag, + IP_PRE_get_transfern(ago, /* NEWTON_PAIR*/ 1, EFLAG, vflag, buffers, offload, fix, separate_flag, x_size, q_size, ev_size, f_stride); @@ -686,7 +646,7 @@ void PairSWIntel::eval(const int offload, const int vflag, in(ccachei,ccachej,ccachef:length(0) alloc_if(0) free_if(0)) \ in(ccache_stride,nthreads,inum,nall,ntypes,vflag,eatom,offload) \ in(astart,nlocal,f_stride,minlocal,separate_flag,pad_width) \ - in(ccache_stride3) \ + in(ccache_stride3) \ out(f_start:length(f_stride) alloc_if(0) free_if(0)) \ out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \ out(timer_compute:length(1) alloc_if(0) free_if(0)) \ @@ -701,19 +661,17 @@ void PairSWIntel::eval(const int offload, const int vflag, f_stride, x, 0); acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = (acc_t)0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; - } + if (EFLAG) oevdwl = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5) #endif { - int iifrom, iito, tid; - IP_PRE_omp_range_id_vec(iifrom, iito, tid, inum, nthreads, swidth); + int iifrom, iip, iito, tid; + IP_PRE_omp_stride_id_vec(iifrom, iip, iito, tid, inum, nthreads, + swidth); + iifrom += astart; iito += astart; @@ -734,22 +692,22 @@ void PairSWIntel::eval(const int offload, const int vflag, SIMD_flt_t cutsq, cut, powerp, powerq, sigma, c1, c2, c3,c4, c5, c6; SIMD_flt_t sigma_gamma, costheta, lambda_epsilon, lambda_epsilon2; if (ONETYPE) { - cutsq = SIMD_set(p2[3].cutsq); - cut = SIMD_set(p2f[3].cut); - sigma = SIMD_set(p2f[3].sigma); - c1 = SIMD_set(p2f2[3].c1); - c2 = SIMD_set(p2f2[3].c2); - c3 = SIMD_set(p2f2[3].c3); - c4 = SIMD_set(p2f2[3].c4); - sigma_gamma = SIMD_set(p2[3].sigma_gamma); - costheta = SIMD_set(p3[7].costheta); - lambda_epsilon = SIMD_set(p3[7].lambda_epsilon); - lambda_epsilon2 = SIMD_set(p3[7].lambda_epsilon2); - if (SPQ == 0) { - powerp = SIMD_set(p2f[3].powerp); - powerq = SIMD_set(p2f[3].powerq); - } - if (EFLAG) { + cutsq = SIMD_set(p2[3].cutsq); + cut = SIMD_set(p2f[3].cut); + sigma = SIMD_set(p2f[3].sigma); + c1 = SIMD_set(p2f2[3].c1); + c2 = SIMD_set(p2f2[3].c2); + c3 = SIMD_set(p2f2[3].c3); + c4 = SIMD_set(p2f2[3].c4); + sigma_gamma = SIMD_set(p2[3].sigma_gamma); + costheta = SIMD_set(p3[7].costheta); + lambda_epsilon = SIMD_set(p3[7].lambda_epsilon); + lambda_epsilon2 = SIMD_set(p3[7].lambda_epsilon2); + if (SPQ == 0) { + powerp = SIMD_set(p2f[3].powerp); + powerq = SIMD_set(p2f[3].powerq); + } + if (EFLAG) { c5 = SIMD_set(p2e[3].c5); c6 = SIMD_set(p2e[3].c6); } @@ -757,130 +715,120 @@ void PairSWIntel::eval(const int offload, const int vflag, SIMD_int ilist = SIMD_set(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); const SIMD_int goffset = SIMD_set(0,16,32,48,64,80,96,112,128, - 144,160,176,192,208,224,240); + 144,160,176,192,208,224,240); ilist = ilist + iifrom; acc_t * const dforce = &(f[0].x); - for (int i = iifrom; i < iito; i += swidth) { - SIMD_mask imask = ilist < iito; - SIMD_flt_t xtmp, ytmp, ztmp; - SIMD_int itype, itype_offset; - - if (ONETYPE) - SIMD_atom_gather(imask, &(x[i].x), goffset, xtmp, ytmp, ztmp); - else { - SIMD_atom_gather(imask, &(x[i].x), goffset, xtmp, ytmp, ztmp, itype); - itype_offset = itype * ntypes; - } - - #ifdef OUTER_CHUNK - const int* ng = firstneigh + cnumneigh[i] - swidth; - #else + for (int i = iifrom; i < iito; i += iip) { + SIMD_mask imask = ilist < iito; + SIMD_flt_t xtmp, ytmp, ztmp; + SIMD_int itype, itype_offset; + + if (ONETYPE) + SIMD_atom_gather(imask, &(x[i].x), goffset, xtmp, ytmp, ztmp); + else { + SIMD_atom_gather(imask, &(x[i].x), goffset, xtmp, ytmp, ztmp, itype); + itype_offset = itype * ntypes; + } + + #ifdef OUTER_CHUNK + const int* ng = firstneigh + cnumneigh[i] - swidth; + #else SIMD_int ng = SIMD_load(cnumneigh + i); - ng = ng - 1; - #endif - const SIMD_int jnum = SIMD_loadz(imask, numneigh + i); - const SIMD_int jnumhalf = SIMD_loadz(imask, numneighhalf + i); - const int jnum_max = SIMD_max(jnum); - - SIMD_acc_t fxtmp = SIMD_set((acc_t)0); - SIMD_acc_t fytmp = SIMD_set((acc_t)0); - SIMD_acc_t fztmp = SIMD_set((acc_t)0); - SIMD_acc_t fwtmp, fxtmp2, fytmp2, fztmp2, fwtmp2; - if (is_same<flt_t,acc_t>::value == 0) { - fxtmp2 = SIMD_set((acc_t)0); - fytmp2 = SIMD_set((acc_t)0); - fztmp2 = SIMD_set((acc_t)0); + ng = ng - 1; + #endif + const SIMD_int jnum = SIMD_loadz(imask, numneigh + i); + const SIMD_int jnumhalf = SIMD_loadz(imask, numneighhalf + i); + const int jnum_max = SIMD_max(jnum); + + SIMD_acc_t fxtmp = SIMD_set((acc_t)0); + SIMD_acc_t fytmp = SIMD_set((acc_t)0); + SIMD_acc_t fztmp = SIMD_set((acc_t)0); + SIMD_acc_t fwtmp, fxtmp2, fytmp2, fztmp2, fwtmp2; + if (is_same<flt_t,acc_t>::value == 0) { + fxtmp2 = SIMD_set((acc_t)0); + fytmp2 = SIMD_set((acc_t)0); + fztmp2 = SIMD_set((acc_t)0); if (EFLAG) fwtmp2 = SIMD_set((acc_t)0); - } + } - SIMD_acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; - if (EVFLAG) { - if (EFLAG) { - fwtmp = SIMD_set((acc_t)0); - sevdwl = SIMD_set((acc_t)0); - } - if (vflag==1) { - sv0 = SIMD_set((acc_t)0); - sv1 = SIMD_set((acc_t)0); - sv2 = SIMD_set((acc_t)0); - sv3 = SIMD_set((acc_t)0); - sv4 = SIMD_set((acc_t)0); - sv5 = SIMD_set((acc_t)0); - } + SIMD_acc_t sevdwl; + if (EFLAG) { + fwtmp = SIMD_set((acc_t)0); + sevdwl = SIMD_set((acc_t)0); } - SIMD_int ejnum = SIMD_set(0); - SIMD_int ejnumhalf = SIMD_set(0); - SIMD_int coffset = SIMD_set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15); + SIMD_int ejnum = SIMD_set(0); + SIMD_int ejnumhalf = SIMD_set(0); + SIMD_int coffset = SIMD_set(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15); for (int jj = 0; jj < jnum_max; jj++) { SIMD_mask jmask = jj < jnum; - #ifdef OUTER_CHUNK - ng += swidth; - SIMD_int j = SIMD_load(ng); - #else - ng = ng + 1; - SIMD_int j = SIMD_gather(jmask, firstneigh, ng); - #endif + #ifdef OUTER_CHUNK + ng += swidth; + SIMD_int j = SIMD_load(ng); + #else + ng = ng + 1; + SIMD_int j = SIMD_gather(jmask, firstneigh, ng); + #endif j = j & SIMD_set(NEIGHMASK); - const SIMD_int joffset = j << 4; - - SIMD_flt_t delx, dely, delz; - SIMD_int jtype, ijtype; - if (ONETYPE) - SIMD_atom_gather(jmask, &(x[0].x), joffset, delx, dely, delz); - else { - SIMD_atom_gather(jmask, &(x[0].x), joffset, delx, dely, delz, - jtype); - ijtype = (jtype + itype_offset) << 2; - cutsq = SIMD_gather(jmask, &(p2[0].cutsq), ijtype); - } - - delx = delx - xtmp; - dely = dely - ytmp; - delz = delz - ztmp; + const SIMD_int joffset = j << 4; + + SIMD_flt_t delx, dely, delz; + SIMD_int jtype, ijtype; + if (ONETYPE) + SIMD_atom_gather(jmask, &(x[0].x), joffset, delx, dely, delz); + else { + SIMD_atom_gather(jmask, &(x[0].x), joffset, delx, dely, delz, + jtype); + ijtype = (jtype + itype_offset) << 2; + cutsq = SIMD_gather(jmask, &(p2[0].cutsq), ijtype); + } + + delx = delx - xtmp; + dely = dely - ytmp; + delz = delz - ztmp; SIMD_flt_t rsq1 = delx * delx; - rsq1 = SIMD_fma(dely, dely, rsq1); - rsq1 = SIMD_fma(delz, delz, rsq1); - - const SIMD_mask rmask = SIMD_lt(jmask, rsq1, cutsq); - SIMD_scatter(rmask, tdelx, coffset, delx); - SIMD_scatter(rmask, tdely, coffset, dely); - SIMD_scatter(rmask, tdelz, coffset, delz); - SIMD_scatter(rmask, trsq, coffset, rsq1); - SIMD_scatter(rmask, tj, coffset, j); - if (!ONETYPE) SIMD_scatter(rmask, tjtype, coffset, jtype); - ejnum = SIMD_add(rmask, ejnum, 1); - coffset = SIMD_add(rmask, coffset, swidth); - const SIMD_mask hmask = SIMD_lt(rmask, SIMD_set(jj), jnumhalf); - ejnumhalf = SIMD_add(hmask, ejnumhalf, 1); - } - - const int ejnum_max = SIMD_max(ejnum); - const int ejnumhalf_max = SIMD_max(ejnumhalf); - memset(tf, 0, ejnum_max * sizeof(acc_t) * swidth * 3); + rsq1 = SIMD_fma(dely, dely, rsq1); + rsq1 = SIMD_fma(delz, delz, rsq1); + + const SIMD_mask rmask = SIMD_lt(jmask, rsq1, cutsq); + SIMD_scatter(rmask, tdelx, coffset, delx); + SIMD_scatter(rmask, tdely, coffset, dely); + SIMD_scatter(rmask, tdelz, coffset, delz); + SIMD_scatter(rmask, trsq, coffset, rsq1); + SIMD_scatter(rmask, tj, coffset, j); + if (!ONETYPE) SIMD_scatter(rmask, tjtype, coffset, jtype); + ejnum = SIMD_add(rmask, ejnum, 1); + coffset = SIMD_add(rmask, coffset, swidth); + const SIMD_mask hmask = SIMD_lt(rmask, SIMD_set(jj), jnumhalf); + ejnumhalf = SIMD_add(hmask, ejnumhalf, 1); + } + + const int ejnum_max = SIMD_max(ejnum); + const int ejnumhalf_max = SIMD_max(ejnumhalf); + memset(tf, 0, ejnum_max * sizeof(acc_t) * swidth * 3); for (int jj = 0; jj < ejnum_max; jj++) { SIMD_int ijtype; - const int coffset = jj * swidth; - if (!ONETYPE) { - ijtype = SIMD_load(tjtype + coffset); - ijtype = (ijtype + itype_offset) << 2; - cut = SIMD_gather(&(p2f[0].cut), ijtype); - } - - SIMD_acc_t fjxtmp = SIMD_set((acc_t)0); - SIMD_acc_t fjytmp = SIMD_set((acc_t)0); - SIMD_acc_t fjztmp = SIMD_set((acc_t)0); - SIMD_acc_t fjtmp, fjxtmp2, fjytmp2, fjztmp2, fjtmp2; + const int coffset = jj * swidth; + if (!ONETYPE) { + ijtype = SIMD_load(tjtype + coffset); + ijtype = (ijtype + itype_offset) << 2; + cut = SIMD_gather(&(p2f[0].cut), ijtype); + } + + SIMD_acc_t fjxtmp = SIMD_set((acc_t)0); + SIMD_acc_t fjytmp = SIMD_set((acc_t)0); + SIMD_acc_t fjztmp = SIMD_set((acc_t)0); + SIMD_acc_t fjtmp, fjxtmp2, fjytmp2, fjztmp2, fjtmp2; if (EFLAG) fjtmp = SIMD_set((acc_t)0.0); - if (is_same<flt_t,acc_t>::value == 0) { - fjxtmp2 = SIMD_set((acc_t)0); - fjytmp2 = SIMD_set((acc_t)0); - fjztmp2 = SIMD_set((acc_t)0); - if (EFLAG) fjtmp2 = SIMD_set((acc_t)0.0); - } + if (is_same<flt_t,acc_t>::value == 0) { + fjxtmp2 = SIMD_set((acc_t)0); + fjytmp2 = SIMD_set((acc_t)0); + fjztmp2 = SIMD_set((acc_t)0); + if (EFLAG) fjtmp2 = SIMD_set((acc_t)0.0); + } const SIMD_flt_t delx = SIMD_load(tdelx + coffset); const SIMD_flt_t dely = SIMD_load(tdely + coffset); @@ -888,251 +836,223 @@ void PairSWIntel::eval(const int offload, const int vflag, const SIMD_flt_t rsq1 = SIMD_load(trsq + coffset); const SIMD_flt_t rinvsq1 = SIMD_rcp(rsq1); - const SIMD_flt_t r1 = SIMD_invsqrt(rinvsq1); + const SIMD_flt_t r1 = SIMD_invsqrt(rinvsq1); const SIMD_flt_t rainv1 = SIMD_rcp(r1 - cut); - - // two-body interactions, skip half of them - if (jj < ejnumhalf_max) { + + // two-body interactions, skip half of them + if (jj < ejnumhalf_max) { SIMD_flt_t rp, rq; - if (SPQ == 1) { + if (SPQ == 1) { rp = r1 * r1; - rp = rp * rp; - rp = SIMD_rcp(rp); - rq = SIMD_set((flt_t)1.0); + rp = rp * rp; + rp = SIMD_rcp(rp); + rq = SIMD_set((flt_t)1.0); } else { - if (!ONETYPE) { - powerp = SIMD_gather(&(p2f[0].powerp), ijtype); - powerq = SIMD_gather(&(p2f[0].powerq), ijtype); - } - rp = SIMD_pow(r1, powerp); - rq = SIMD_pow(r1, powerq); - } - - if (!ONETYPE) { - sigma = SIMD_gather(&(p2f[0].sigma), ijtype); - c1 = SIMD_gather(&(p2f2[0].c1), ijtype); - c2 = SIMD_gather(&(p2f2[0].c2), ijtype); - c3 = SIMD_gather(&(p2f2[0].c3), ijtype); - c4 = SIMD_gather(&(p2f2[0].c4), ijtype); - } - - const SIMD_flt_t rainvsq = rainv1 * rainv1 * r1; - const SIMD_flt_t expsrainv = SIMD_exp(sigma * rainv1); - const SIMD_flt_t fpair = (c1 * rp - c2 * rq + (c3 * rp - c4 * rq) * - rainvsq) * expsrainv * rinvsq1; - - const SIMD_flt_t fjx = delx * fpair; - const SIMD_flt_t fjy = dely * fpair; - const SIMD_flt_t fjz = delz * fpair; - - const SIMD_mask hmask = jj < ejnumhalf; - SIMD_accumulate3(hmask, fjx, fjy, fjz, fxtmp, fytmp, fztmp, - fjxtmp, fjytmp, fjztmp, fxtmp2, fytmp2, - fztmp2, fjxtmp2, fjytmp2, fjztmp2); - - if (EVFLAG) { - if (EFLAG) { - if (!ONETYPE) { - c5 = SIMD_gather(&(p2e[0].c5), ijtype); - c6 = SIMD_gather(&(p2e[0].c6), ijtype); - } - SIMD_flt_t evdwl; - evdwl = (c5 * rp - c6 * rq) * expsrainv; - SIMD_acc_energy3(hmask, evdwl, eatom, sevdwl, fwtmp, fjtmp, - fwtmp2, fjtmp2); - } - SIMD_ev_tally_nbor(hmask, vflag, (flt_t)1.0, fpair, delx, dely, - delz, sv0, sv1, sv2, sv3, sv4, sv5); - } + if (!ONETYPE) { + powerp = SIMD_gather(&(p2f[0].powerp), ijtype); + powerq = SIMD_gather(&(p2f[0].powerq), ijtype); + } + rp = SIMD_pow(r1, powerp); + rq = SIMD_pow(r1, powerq); + } + + if (!ONETYPE) { + sigma = SIMD_gather(&(p2f[0].sigma), ijtype); + c1 = SIMD_gather(&(p2f2[0].c1), ijtype); + c2 = SIMD_gather(&(p2f2[0].c2), ijtype); + c3 = SIMD_gather(&(p2f2[0].c3), ijtype); + c4 = SIMD_gather(&(p2f2[0].c4), ijtype); + } + + const SIMD_flt_t rainvsq = rainv1 * rainv1 * r1; + const SIMD_flt_t expsrainv = SIMD_exp(sigma * rainv1); + const SIMD_flt_t fpair = (c1 * rp - c2 * rq + (c3 * rp - c4 * rq) * + rainvsq) * expsrainv * rinvsq1; + + const SIMD_flt_t fjx = delx * fpair; + const SIMD_flt_t fjy = dely * fpair; + const SIMD_flt_t fjz = delz * fpair; + + const SIMD_mask hmask = jj < ejnumhalf; + SIMD_accumulate3(hmask, fjx, fjy, fjz, fxtmp, fytmp, fztmp, + fjxtmp, fjytmp, fjztmp, fxtmp2, fytmp2, + fztmp2, fjxtmp2, fjytmp2, fjztmp2); + + if (EFLAG) { + if (!ONETYPE) { + c5 = SIMD_gather(&(p2e[0].c5), ijtype); + c6 = SIMD_gather(&(p2e[0].c6), ijtype); + } + SIMD_flt_t evdwl; + evdwl = (c5 * rp - c6 * rq) * expsrainv; + SIMD_acc_energy3(hmask, evdwl, eatom, sevdwl, fwtmp, fjtmp, + fwtmp2, fjtmp2); + } } - /*---------------------------------------------*/ - SIMD_int ijkoff; - if (!ONETYPE) { - sigma_gamma = SIMD_gather(&(p2[0].sigma_gamma), ijtype); - ijkoff = ijtype * ntypes; - } + /*---------------------------------------------*/ + SIMD_int ijkoff; + if (!ONETYPE) { + sigma_gamma = SIMD_gather(&(p2[0].sigma_gamma), ijtype); + ijkoff = ijtype * ntypes; + } const SIMD_flt_t gsrainv1 = sigma_gamma * rainv1; const SIMD_flt_t gsrainvsq1 = gsrainv1 * rainv1 / r1; const SIMD_flt_t expgsrainv1 = SIMD_exp(gsrainv1); - const SIMD_mask jmask = jj < ejnum; + const SIMD_mask jmask = jj < ejnum; for (int kk = jj+1; kk < ejnum_max; kk++) { - SIMD_int iktype, ijktype; - const int kcoffset = kk * swidth; - if (!ONETYPE) { - iktype = SIMD_load(tjtype + kcoffset); - ijktype = ijkoff + (iktype << 2); - iktype = (iktype + itype_offset) << 2; - cut = SIMD_gather(&(p2[0].cut), iktype); - sigma_gamma = SIMD_gather(&(p2[0].sigma_gamma), iktype); - costheta = SIMD_gather(&(p3[0].costheta), ijktype); - lambda_epsilon = SIMD_gather(&(p3[0].lambda_epsilon), ijktype); - lambda_epsilon2 = SIMD_gather(&(p3[0].lambda_epsilon2), ijktype); - } - const SIMD_flt_t delr2x = SIMD_load(tdelx + kcoffset); - const SIMD_flt_t delr2y = SIMD_load(tdely + kcoffset); - const SIMD_flt_t delr2z = SIMD_load(tdelz + kcoffset); - const SIMD_flt_t rsq2 = SIMD_load(trsq + kcoffset); - - const SIMD_flt_t rinvsq2 = SIMD_rcp(rsq2); - const SIMD_flt_t r2 = SIMD_invsqrt(rinvsq2); - const SIMD_flt_t rainv2 = SIMD_rcp(r2 - cut); - const SIMD_flt_t gsrainv2 = sigma_gamma * rainv2; - const SIMD_flt_t gsrainvsq2 = gsrainv2 * rainv2 / r2; - const SIMD_flt_t expgsrainv2 = SIMD_exp(gsrainv2); - const SIMD_flt_t rinv12 = SIMD_rcp(r1 * r2); - const SIMD_flt_t cs = (delx * delr2x + dely * delr2y + + SIMD_int iktype, ijktype; + const int kcoffset = kk * swidth; + if (!ONETYPE) { + iktype = SIMD_load(tjtype + kcoffset); + ijktype = ijkoff + (iktype << 2); + iktype = (iktype + itype_offset) << 2; + cut = SIMD_gather(&(p2[0].cut), iktype); + sigma_gamma = SIMD_gather(&(p2[0].sigma_gamma), iktype); + costheta = SIMD_gather(&(p3[0].costheta), ijktype); + lambda_epsilon = SIMD_gather(&(p3[0].lambda_epsilon), ijktype); + lambda_epsilon2 = SIMD_gather(&(p3[0].lambda_epsilon2), ijktype); + } + const SIMD_flt_t delr2x = SIMD_load(tdelx + kcoffset); + const SIMD_flt_t delr2y = SIMD_load(tdely + kcoffset); + const SIMD_flt_t delr2z = SIMD_load(tdelz + kcoffset); + const SIMD_flt_t rsq2 = SIMD_load(trsq + kcoffset); + + const SIMD_flt_t rinvsq2 = SIMD_rcp(rsq2); + const SIMD_flt_t r2 = SIMD_invsqrt(rinvsq2); + const SIMD_flt_t rainv2 = SIMD_rcp(r2 - cut); + const SIMD_flt_t gsrainv2 = sigma_gamma * rainv2; + const SIMD_flt_t gsrainvsq2 = gsrainv2 * rainv2 / r2; + const SIMD_flt_t expgsrainv2 = SIMD_exp(gsrainv2); + const SIMD_flt_t rinv12 = SIMD_rcp(r1 * r2); + const SIMD_flt_t cs = (delx * delr2x + dely * delr2y + delz * delr2z) * rinv12; - const SIMD_flt_t delcs = cs - costheta; - const SIMD_flt_t delcssq = delcs*delcs; - - const SIMD_flt_t facexp = expgsrainv1*expgsrainv2; - const SIMD_flt_t facrad = lambda_epsilon * facexp * delcssq; - const SIMD_flt_t frad1 = facrad * gsrainvsq1; - const SIMD_flt_t frad2 = facrad * gsrainvsq2; - const SIMD_flt_t facang = lambda_epsilon2 * facexp * delcs; - const SIMD_flt_t facang12 = rinv12 * facang; - const SIMD_flt_t csfacang = cs * facang; - - const SIMD_flt_t csfac1 = rinvsq1 * csfacang; - const SIMD_flt_t fjx = delx * (frad1 + csfac1)-delr2x*facang12; - const SIMD_flt_t fjy = dely * (frad1 + csfac1)-delr2y*facang12; - const SIMD_flt_t fjz = delz * (frad1 + csfac1)-delr2z*facang12; - - const SIMD_flt_t csfac2 = rinvsq2 * csfacang; - SIMD_flt_t fkx = delx * facang12 - delr2x * (frad2 + csfac2); - SIMD_flt_t fky = dely * facang12 - delr2y * (frad2 + csfac2); - SIMD_flt_t fkz = delz * facang12 - delr2z * (frad2 + csfac2); - - const SIMD_mask kmask = SIMD_lt(jmask, kk, ejnum); - - SIMD_acc_cache3(kmask, fjx, fjy, fjz, fkx, fky, fkz, fxtmp, fytmp, - fztmp, fjxtmp, fjytmp, fjztmp, fxtmp2, fytmp2, - fztmp2, fjxtmp2, fjytmp2, fjztmp2, - tf + kcoffset * 3, swidth); - - if (EVFLAG) { - if (EFLAG) { - SIMD_int k; - if (eatom) { - k = SIMD_load(tj + kcoffset); - k = k << 4; - } - SIMD_acc_three(kmask, facrad, eatom, sevdwl, fwtmp, fjtmp, - fwtmp2, fjtmp2, k, dforce); - } - SIMD_ev_tally_nbor3v(kmask, vflag, fjx, fjy, fjz, fkx, fky, fkz, - delx, dely, delz, delr2x, delr2y, delr2z, - sv0, sv1, sv2, sv3, sv4, sv5); - } - - } // for kk - if (is_same<flt_t,acc_t>::value == 1) - SIMD_cache3(tf + coffset * 3, swidth, fjxtmp, fjytmp, fjztmp); - else - SIMD_cache3(tf + coffset * 3, swidth, fjxtmp, fjytmp, fjztmp, - fjxtmp2, fjytmp2, fjztmp2); - - if (EFLAG) { - if (eatom) { - SIMD_int j = SIMD_load(tj + coffset); - j = j << 4; - SIMD_jeng_update(jmask, dforce + 3, j, fjtmp); - if (is_same<flt_t,acc_t>::value == 0) - SIMD_jeng_update_hi(jmask, dforce + 3, j, fjtmp2); - } - } + const SIMD_flt_t delcs = cs - costheta; + const SIMD_flt_t delcssq = delcs*delcs; + + const SIMD_flt_t facexp = expgsrainv1*expgsrainv2; + const SIMD_flt_t facrad = lambda_epsilon * facexp * delcssq; + const SIMD_flt_t frad1 = facrad * gsrainvsq1; + const SIMD_flt_t frad2 = facrad * gsrainvsq2; + const SIMD_flt_t facang = lambda_epsilon2 * facexp * delcs; + const SIMD_flt_t facang12 = rinv12 * facang; + const SIMD_flt_t csfacang = cs * facang; + + const SIMD_flt_t csfac1 = rinvsq1 * csfacang; + const SIMD_flt_t fjx = delx * (frad1 + csfac1)-delr2x*facang12; + const SIMD_flt_t fjy = dely * (frad1 + csfac1)-delr2y*facang12; + const SIMD_flt_t fjz = delz * (frad1 + csfac1)-delr2z*facang12; + + const SIMD_flt_t csfac2 = rinvsq2 * csfacang; + SIMD_flt_t fkx = delx * facang12 - delr2x * (frad2 + csfac2); + SIMD_flt_t fky = dely * facang12 - delr2y * (frad2 + csfac2); + SIMD_flt_t fkz = delz * facang12 - delr2z * (frad2 + csfac2); + + const SIMD_mask kmask = SIMD_lt(jmask, kk, ejnum); + + SIMD_acc_cache3(kmask, fjx, fjy, fjz, fkx, fky, fkz, fxtmp, fytmp, + fztmp, fjxtmp, fjytmp, fjztmp, fxtmp2, fytmp2, + fztmp2, fjxtmp2, fjytmp2, fjztmp2, + tf + kcoffset * 3, swidth); + + if (EFLAG) { + SIMD_int k; + if (eatom) { + k = SIMD_load(tj + kcoffset); + k = k << 4; + } + SIMD_acc_three(kmask, facrad, eatom, sevdwl, fwtmp, fjtmp, + fwtmp2, fjtmp2, k, dforce); + } + } // for kk + if (is_same<flt_t,acc_t>::value == 1) + SIMD_cache3(tf + coffset * 3, swidth, fjxtmp, fjytmp, fjztmp); + else + SIMD_cache3(tf + coffset * 3, swidth, fjxtmp, fjytmp, fjztmp, + fjxtmp2, fjytmp2, fjztmp2); + + if (EFLAG) { + if (eatom) { + SIMD_int j = SIMD_load(tj + coffset); + j = j << 4; + SIMD_jeng_update(jmask, dforce + 3, j, fjtmp); + if (is_same<flt_t,acc_t>::value == 0) + SIMD_jeng_update_hi(jmask, dforce + 3, j, fjtmp2); + } + } } // for jj first loop for (int jj = 0; jj < ejnum_max; jj++) { - const int coffset = jj * swidth; - const SIMD_mask jmask = jj < ejnum; + const int coffset = jj * swidth; + const SIMD_mask jmask = jj < ejnum; const SIMD_int j = SIMD_load(tj + coffset); - const SIMD_int joffset = j << 4; - - SIMD_acc_t fjxtmp, fjytmp, fjztmp, fjxtmp2, fjytmp2, fjztmp2; - int foffset = swidth; - if (is_same<flt_t,acc_t>::value == 0) foffset = foffset >> 1; - acc_t *p = tf + coffset * 3; - fjxtmp = SIMD_load(p); - if (is_same<flt_t,acc_t>::value == 0) { - p = p + foffset; - fjxtmp2 = SIMD_load(p); - } - p = p + foffset; - fjytmp = SIMD_load(p); - if (is_same<flt_t,acc_t>::value == 0) { - p = p + foffset; - fjytmp2 = SIMD_load(p); - } - p = p + foffset; - fjztmp = SIMD_load(p); - if (is_same<flt_t,acc_t>::value == 0) { - p = p + foffset; - fjztmp2 = SIMD_load(p); - } - - SIMD_conflict_pi_reduce3(jmask, joffset, fjxtmp, fjytmp, fjztmp); - SIMD_jforce_update(jmask, dforce, joffset, fjxtmp, fjytmp, - fjztmp); + const SIMD_int joffset = j << 4; + + SIMD_acc_t fjxtmp, fjytmp, fjztmp, fjxtmp2, fjytmp2, fjztmp2; + int foffset = swidth; + if (is_same<flt_t,acc_t>::value == 0) foffset = foffset >> 1; + acc_t *p = tf + coffset * 3; + fjxtmp = SIMD_load(p); + if (is_same<flt_t,acc_t>::value == 0) { + p = p + foffset; + fjxtmp2 = SIMD_load(p); + } + p = p + foffset; + fjytmp = SIMD_load(p); + if (is_same<flt_t,acc_t>::value == 0) { + p = p + foffset; + fjytmp2 = SIMD_load(p); + } + p = p + foffset; + fjztmp = SIMD_load(p); + if (is_same<flt_t,acc_t>::value == 0) { + p = p + foffset; + fjztmp2 = SIMD_load(p); + } + + SIMD_conflict_pi_reduce3(jmask, joffset, fjxtmp, fjytmp, fjztmp); + SIMD_jforce_update(jmask, dforce, joffset, fjxtmp, fjytmp, + fjztmp); if (is_same<flt_t,acc_t>::value == 0) { - SIMD_int joffset2 = _mm512_shuffle_i32x4(joffset, joffset, 238); - SIMD_mask jmask2 = jmask >> 8; - SIMD_conflict_pi_reduce3(jmask2, joffset2, fjxtmp2, fjytmp2, - fjztmp2); - SIMD_jforce_update(jmask2, dforce, joffset2, fjxtmp2, fjytmp2, - fjztmp2); - } - } // for jj second loop - - SIMD_iforce_update(imask, &(f[i].x), goffset, fxtmp, fytmp, fztmp, - EVFLAG, eatom, fwtmp); - if (is_same<flt_t,acc_t>::value == 0) { - imask = imask >> 8; - SIMD_iforce_update(imask, &(f[i+8].x), goffset, fxtmp2, fytmp2, - fztmp2, EVFLAG, eatom, fwtmp2); - } - if (EVFLAG) { - if (EFLAG) oevdwl += SIMD_sum(sevdwl); - if (vflag == 1) { - ov0 += SIMD_sum(sv0); - ov1 += SIMD_sum(sv1); - ov2 += SIMD_sum(sv2); - ov3 += SIMD_sum(sv3); - ov4 += SIMD_sum(sv4); - ov5 += SIMD_sum(sv5); - } - } - ilist = ilist + swidth; + SIMD_int joffset2 = _mm512_shuffle_i32x4(joffset, joffset, 238); + SIMD_mask jmask2 = jmask >> 8; + SIMD_conflict_pi_reduce3(jmask2, joffset2, fjxtmp2, fjytmp2, + fjztmp2); + SIMD_jforce_update(jmask2, dforce, joffset2, fjxtmp2, fjytmp2, + fjztmp2); + } + } // for jj second loop + + SIMD_iforce_update(imask, &(f[i].x), goffset, fxtmp, fytmp, fztmp, + EFLAG, eatom, fwtmp); + if (is_same<flt_t,acc_t>::value == 0) { + imask = imask >> 8; + SIMD_iforce_update(imask, &(f[i+8].x), goffset, fxtmp2, fytmp2, + fztmp2, EFLAG, eatom, fwtmp2); + } + if (EFLAG) oevdwl += SIMD_sum(sevdwl); + ilist = ilist + iip; } // for ii - #ifndef _LMP_INTEL_OFFLOAD - if (vflag == 2) - #endif - { - #if defined(_OPENMP) - #pragma omp barrier - #endif - IP_PRE_fdotr_acc_force(1, EVFLAG, EFLAG, vflag, eatom, nall, nlocal, - minlocal, nthreads, f_start, f_stride, x, - offload); - } + IP_PRE_fdotr_reduce_omp(1, nall, minlocal, nthreads, f_start, f_stride, + x, offload, vflag, ov0, ov1, ov2, ov3, ov4, ov5); } // end omp - - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = (acc_t)0.0; - } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; - } + + IP_PRE_fdotr_reduce(1, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + ev_global[0] = oevdwl; + ev_global[1] = (acc_t)0.0; + } + if (vflag) { + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD) *timer_compute = MIC_Wtime() - *timer_compute; @@ -1143,7 +1063,7 @@ void PairSWIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); else fix->add_result_array(f_start, 0, offload); @@ -1199,7 +1119,7 @@ void PairSWIntel::init_style() #if defined(__INTEL_COMPILER) if (__INTEL_COMPILER_BUILD_DATE < 20141023) error->all(FLERR, "Intel compiler versions before " - "15 Update 1 not supported for sw/intel"); + "15 Update 1 not supported for sw/intel"); #endif } @@ -1212,6 +1132,7 @@ void PairSWIntel::pack_force_const(ForceConst<flt_t> &fc, #ifdef LMP_USE_AVXCD fix->nbor_pack_width(SIMD_type<flt_t>::width()); #endif + fix->three_body_neighbor(1); int off_ccache = 0; #ifdef _LMP_INTEL_OFFLOAD @@ -1247,7 +1168,7 @@ void PairSWIntel::pack_force_const(ForceConst<flt_t> &fc, } } } - + _onetype = 0; if (atom->ntypes == 1) _onetype = 1; @@ -1257,55 +1178,55 @@ void PairSWIntel::pack_force_const(ForceConst<flt_t> &fc, for (int jj = 0; jj < tp1; jj++) { int j = map[jj]; if (i < 0 || j < 0 || ii == 0 || jj == 0) { - fc.p2[ii][jj].cutsq = 0; - fc.p2[ii][jj].cut = 0; - fc.p2[ii][jj].sigma_gamma = 0; - fc.p2f[ii][jj].cut = 0; - fc.p2f[ii][jj].powerp = 0; - fc.p2f[ii][jj].powerq = 0; - fc.p2f[ii][jj].sigma = 0; - fc.p2f2[ii][jj].c1 = 0; - fc.p2f2[ii][jj].c2 = 0; - fc.p2f2[ii][jj].c3 = 0; - fc.p2f2[ii][jj].c4 = 0; - fc.p2e[ii][jj].c5 = 0; - fc.p2e[ii][jj].c6 = 0; + fc.p2[ii][jj].cutsq = 0; + fc.p2[ii][jj].cut = 0; + fc.p2[ii][jj].sigma_gamma = 0; + fc.p2f[ii][jj].cut = 0; + fc.p2f[ii][jj].powerp = 0; + fc.p2f[ii][jj].powerq = 0; + fc.p2f[ii][jj].sigma = 0; + fc.p2f2[ii][jj].c1 = 0; + fc.p2f2[ii][jj].c2 = 0; + fc.p2f2[ii][jj].c3 = 0; + fc.p2f2[ii][jj].c4 = 0; + fc.p2e[ii][jj].c5 = 0; + fc.p2e[ii][jj].c6 = 0; } else { - int ijparam = elem2param[i][j][j]; - fc.p2[ii][jj].cutsq = params[ijparam].cutsq; - fc.p2[ii][jj].cut = params[ijparam].cut; - fc.p2[ii][jj].sigma_gamma = params[ijparam].sigma_gamma; - fc.p2f[ii][jj].cut = params[ijparam].cut; - fc.p2f[ii][jj].powerp = -params[ijparam].powerp; - fc.p2f[ii][jj].powerq = -params[ijparam].powerq; - fc.p2f[ii][jj].sigma = params[ijparam].sigma; - fc.p2f2[ii][jj].c1 = params[ijparam].c1; - fc.p2f2[ii][jj].c2 = params[ijparam].c2; - fc.p2f2[ii][jj].c3 = params[ijparam].c3; - fc.p2f2[ii][jj].c4 = params[ijparam].c4; - fc.p2e[ii][jj].c5 = params[ijparam].c5; - fc.p2e[ii][jj].c6 = params[ijparam].c6; - - double cutcut = params[ijparam].cut * params[ijparam].cut; - if (params[ijparam].cutsq >= cutcut) - fc.p2[ii][jj].cutsq *= 0.98; - - if (params[ijparam].powerp != 4.0 || params[ijparam].powerq != 0.0) - _spq = 0; + int ijparam = elem2param[i][j][j]; + fc.p2[ii][jj].cutsq = params[ijparam].cutsq; + fc.p2[ii][jj].cut = params[ijparam].cut; + fc.p2[ii][jj].sigma_gamma = params[ijparam].sigma_gamma; + fc.p2f[ii][jj].cut = params[ijparam].cut; + fc.p2f[ii][jj].powerp = -params[ijparam].powerp; + fc.p2f[ii][jj].powerq = -params[ijparam].powerq; + fc.p2f[ii][jj].sigma = params[ijparam].sigma; + fc.p2f2[ii][jj].c1 = params[ijparam].c1; + fc.p2f2[ii][jj].c2 = params[ijparam].c2; + fc.p2f2[ii][jj].c3 = params[ijparam].c3; + fc.p2f2[ii][jj].c4 = params[ijparam].c4; + fc.p2e[ii][jj].c5 = params[ijparam].c5; + fc.p2e[ii][jj].c6 = params[ijparam].c6; + + double cutcut = params[ijparam].cut * params[ijparam].cut; + if (params[ijparam].cutsq >= cutcut) + fc.p2[ii][jj].cutsq *= 0.98; + + if (params[ijparam].powerp != 4.0 || params[ijparam].powerq != 0.0) + _spq = 0; } for (int kk = 0; kk < tp1; kk++) { int k = map[kk]; - if (i < 0 || j < 0 || k < 0 || ii == 0 || jj == 0 || kk == 0) { - fc.p3[ii][jj][kk].costheta = 0; - fc.p3[ii][jj][kk].lambda_epsilon = 0; - fc.p3[ii][jj][kk].lambda_epsilon2 = 0; - } else { - int ijkparam = elem2param[i][j][k]; - fc.p3[ii][jj][kk].costheta = params[ijkparam].costheta; - fc.p3[ii][jj][kk].lambda_epsilon = params[ijkparam].lambda_epsilon; - fc.p3[ii][jj][kk].lambda_epsilon2 = params[ijkparam].lambda_epsilon2; - } + if (i < 0 || j < 0 || k < 0 || ii == 0 || jj == 0 || kk == 0) { + fc.p3[ii][jj][kk].costheta = 0; + fc.p3[ii][jj][kk].lambda_epsilon = 0; + fc.p3[ii][jj][kk].lambda_epsilon2 = 0; + } else { + int ijkparam = elem2param[i][j][k]; + fc.p3[ii][jj][kk].costheta = params[ijkparam].costheta; + fc.p3[ii][jj][kk].lambda_epsilon = params[ijkparam].lambda_epsilon; + fc.p3[ii][jj][kk].lambda_epsilon2 = params[ijkparam].lambda_epsilon2; + } } } } @@ -1326,10 +1247,10 @@ void PairSWIntel::pack_force_const(ForceConst<flt_t> &fc, flt_t * ocutneighsq = cutneighsq[0]; int tp1sq = tp1 * tp1; int tp1cu = tp1sq * tp1; - if (op2 != NULL && op2f != NULL && op2f2 != NULL && op2e != NULL && + if (op2 != NULL && op2f != NULL && op2f2 != NULL && op2e != NULL && op3 != NULL && ocutneighsq != NULL) { #pragma offload_transfer target(mic:_cop) \ - in(op2,op2f,op2f2,op2e: length(tp1sq) alloc_if(0) free_if(0)) \ + in(op2,op2f,op2f2,op2e: length(tp1sq) alloc_if(0) free_if(0)) \ in(op3: length(tp1cu) alloc_if(0) free_if(0)) \ in(ocutneighsq: length(tp1sq)) } @@ -1351,8 +1272,8 @@ void PairSWIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, fc_packed3 *op3 = p3[0][0]; #ifdef _LMP_INTEL_OFFLOAD - if (op2 != NULL && op2f != NULL && op2f2 != NULL && op2e != NULL && - op3 != NULL && _cop >= 0) { + if (op2 != NULL && op2f != NULL && op2f2 != NULL && op2e != NULL && + op3 != NULL && _cop >= 0) { #pragma offload_transfer target(mic:_cop) \ nocopy(op2, op2f, op2f2, op2e, op3: alloc_if(0) free_if(1)) } @@ -1380,8 +1301,8 @@ void PairSWIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, fc_packed3 *op3 = p3[0][0]; int tp1sq = ntypes * ntypes; int tp1cu = tp1sq * ntypes; - if (op2 != NULL && op2f != NULL && op2f2 != NULL && op2e != NULL && - op3 != NULL && cop >= 0) { + if (op2 != NULL && op2f != NULL && op2f2 != NULL && op2e != NULL && + op3 != NULL && cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(op2,op2f,op2f2,op2e: length(tp1sq) alloc_if(1) free_if(0)) \ nocopy(op3: length(tp1cu) alloc_if(1) free_if(0)) diff --git a/src/USER-INTEL/pair_sw_intel.h b/src/USER-INTEL/pair_sw_intel.h index 8723803a358ad5089efe2a1a7b7cdb2bbe7b747a..ffcf9a6fb6b69cbfe9747d4b19752a546006ed7d 100644 --- a/src/USER-INTEL/pair_sw_intel.h +++ b/src/USER-INTEL/pair_sw_intel.h @@ -46,10 +46,10 @@ class PairSWIntel : public PairSW { template <class flt_t, class acc_t> void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); - template <int SPQ,int ONETYPE,int EVFLAG,int EFLAG,class flt_t,class acc_t> + template <int SPQ, int ONETYPE, int EFLAG, class flt_t, class acc_t> void eval(const int offload, const int vflag, IntelBuffers<flt_t,acc_t> * buffers, const ForceConst<flt_t> &fc, - const int astart, const int aend, const int pad_width); + const int astart, const int aend, const int pad_width); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, diff --git a/src/USER-INTEL/pair_tersoff_intel.cpp b/src/USER-INTEL/pair_tersoff_intel.cpp index 88354ec4d089cb8966aab47c7913660736fe29e2..9e0a888638b828c03a64648fe9dd0a7f3180387d 100644 --- a/src/USER-INTEL/pair_tersoff_intel.cpp +++ b/src/USER-INTEL/pair_tersoff_intel.cpp @@ -47,7 +47,7 @@ void PairTersoffIntel::init_style() { if (comm->me == 0) { error->warning(FLERR, "Tersoff/intel currently requires intel compiler. " - "Using MANYBODY version."); + "Using MANYBODY version."); } PairTersoff::init_style(); } @@ -87,7 +87,7 @@ PairTersoffIntel::PairTersoffIntel(LAMMPS *lmp) : PairTersoff(lmp) void PairTersoffIntel::compute(int eflag, int vflag) { if (fix->precision()==FixIntel::PREC_MODE_MIXED) { - compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), + compute<float,double>(eflag, vflag, fix->get_mixed_buffers(), force_const_single); } else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE) { compute<double,double>(eflag, vflag, fix->get_double_buffers(), @@ -104,8 +104,8 @@ void PairTersoffIntel::compute(int eflag, int vflag) // do we need to calculate energy/virial template <class flt_t, class acc_t> void PairTersoffIntel::compute(int eflag, int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc) { if (eflag || vflag) { ev_setup(eflag,vflag); @@ -119,32 +119,30 @@ void PairTersoffIntel::compute(int eflag, int vflag, if (ago != 0 && fix->separate_buffers() == 0) { fix->start_watch(TIME_PACK); + int packthreads; + if (nthreads > INTEL_HTHREADS) packthreads = nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; - IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, - nthreads, sizeof(ATOM_T)); + IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost, + packthreads, sizeof(ATOM_T)); buffers->thr_pack(ifrom,ito,ago); } fix->stop_watch(TIME_PACK); } - - if (evflag || vflag_fdotr) { - int ovflag = 0; - if (vflag_fdotr) ovflag = 2; - else if (vflag) ovflag = 1; - if (eflag) { - eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum); - } else { - eval<1,0,1>(1, ovflag, buffers, fc, 0, offload_end); - eval<1,0,1>(0, ovflag, buffers, fc, host_start, inum); - } + + int ovflag = 0; + if (vflag_fdotr) ovflag = 2; + else if (vflag) ovflag = 1; + if (eflag) { + eval<1>(1, ovflag, buffers, fc, 0, offload_end); + eval<1>(0, ovflag, buffers, fc, host_start, inum); } else { - eval<0,0,1>(1, 0, buffers, fc, 0, offload_end); - eval<0,0,1>(0, 0, buffers, fc, host_start, inum); + eval<0>(1, ovflag, buffers, fc, 0, offload_end); + eval<0>(0, ovflag, buffers, fc, host_start, inum); } } @@ -172,14 +170,14 @@ struct IntelKernelTersoff : public lmp_intel::vector_routines<flt_t, acc_t, mic> // what's done in here is that they are inlined and vectorized // attractive() also provides an option to compute zeta as well static fvec zeta_vector( - const c_inner_t * param, - ivec xjw, bvec mask, - fvec vrij, fvec rsq2, - fvec vdijx, fvec vdijy, fvec vdijz, + const c_inner_t * param, + ivec xjw, bvec mask, + fvec vrij, fvec rsq2, + fvec vdijx, fvec vdijy, fvec vdijz, fvec dikx, fvec diky, fvec dikz ); static void force_zeta_vector( - const c_outer_t * param, + const c_outer_t * param, ivec xjw, bvec mask, fvec vrijsq, fvec vzeta_ij, @@ -202,49 +200,47 @@ struct IntelKernelTersoff : public lmp_intel::vector_routines<flt_t, acc_t, mic> ); // perform the actual computation - template<bool EVFLAG, bool EFLAG> + template<bool EFLAG> static void kernel( - int iito, int iifrom, int eatom, int vflag, + int iito, int iifrom, int eatom, int vflag, const int * _noalias const numneigh, const int * _noalias const numneighhalf, - const int * _noalias const cnumneigh, - const int * _noalias const firstneigh, int ntypes, + const int * _noalias const cnumneigh, + const int * _noalias const firstneigh, int ntypes, typename IntelBuffers<flt_t,acc_t>::atom_t * _noalias const x, - const c_inner_t * _noalias const c_inner, - const c_outer_t * _noalias const c_outer, + const c_inner_t * _noalias const c_inner, + const c_outer_t * _noalias const c_outer, typename IntelBuffers<flt_t,acc_t>::vec3_acc_t * _noalias const f, - acc_t *evdwl, acc_t *ov0, acc_t * ov1, acc_t *ov2, acc_t* ov3, acc_t *ov4, acc_t *ov5 + acc_t *evdwl ); // perform one step of calculation, pass in i-j pairs of atoms (is, js) - template<int EVFLAG, int EFLAG> + template<int EFLAG> static void kernel_step( - int eatom, int vflag, + int eatom, int vflag, const int * _noalias const numneigh, - const int * _noalias const cnumneigh, - const int * _noalias const firstneigh, + const int * _noalias const cnumneigh, + const int * _noalias const firstneigh, int ntypes, typename IntelBuffers<flt_t,acc_t>::atom_t * _noalias const x, - const c_inner_t * _noalias const c_inner, - const c_outer_t * _noalias const c_outer, + const c_inner_t * _noalias const c_inner, + const c_outer_t * _noalias const c_outer, typename IntelBuffers<flt_t,acc_t>::vec3_acc_t * _noalias const f, - avec *vsevdwl, avec *vsv0, avec * vsv1, avec *vsv2, avec* vsv3, avec *vsv4, avec *vsv5, - int compress_idx, iarr is, iarr js, bvec vmask_repulsive + avec *vsevdwl, int compress_idx, iarr is, iarr js, bvec vmask_repulsive ); // perform one step of calculation, as opposed to the previous method now // with fixed i and a number of js - template<int EVFLAG, int EFLAG> + template<int EFLAG> static void kernel_step_const_i( - int eatom, int vflag, - const int * _noalias const numneigh, const int * _noalias const cnumneigh, - const int * _noalias const firstneigh, int ntypes, + int eatom, int vflag, + const int * _noalias const numneigh, const int * _noalias const cnumneigh, + const int * _noalias const firstneigh, int ntypes, typename IntelBuffers<flt_t,acc_t>::atom_t * _noalias const x, - const c_inner_t * _noalias const c_inner, - const c_outer_t * _noalias const c_outer, + const c_inner_t * _noalias const c_inner, + const c_outer_t * _noalias const c_outer, typename IntelBuffers<flt_t,acc_t>::vec3_acc_t * _noalias const f, - avec *vsevdwl, avec *vsv0, avec *vsv1, avec *vsv2, avec *vsv3, avec *vsv4, avec *vsv5, - int compress_idx, int i, iarr js, bvec vmask_repulsive + avec *vsevdwl, int compress_idx, int i, iarr js, bvec vmask_repulsive ); }; @@ -257,11 +253,11 @@ struct IntelKernelTersoff : public lmp_intel::vector_routines<flt_t, acc_t, mic> // Dispatch to correct kernel instatiation and perform all the work neccesary // for offloading. In this routine we enter the Phi. // This method is nearly identical to what happens in the other /intel styles -template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> +template <int EFLAG, class flt_t, class acc_t> void PairTersoffIntel::eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> *buffers, - const ForceConst<flt_t> &fc, - const int astart, const int aend) + IntelBuffers<flt_t,acc_t> *buffers, + const ForceConst<flt_t> &fc, + const int astart, const int aend) { const int inum = aend - astart; if (inum == 0) return; @@ -292,9 +288,9 @@ void PairTersoffIntel::eval(const int offload, const int vflag, // Determine how much data to transfer int x_size, q_size, f_stride, ev_size, separate_flag; - IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag, - buffers, offload, fix, separate_flag, - x_size, q_size, ev_size, f_stride); + IP_PRE_get_transfern(ago, 1, EFLAG, vflag, + buffers, offload, fix, separate_flag, + x_size, q_size, ev_size, f_stride); int tc; FORCE_T * _noalias f_start; @@ -330,20 +326,16 @@ void PairTersoffIntel::eval(const int offload, const int vflag, #endif #endif - IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall, - f_stride, x, 0); + IP_PRE_repack_for_offload(1, separate_flag, nlocal, nall, + f_stride, x, 0); acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5; - if (EVFLAG) { - oevdwl = oecoul = (acc_t)0; - if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; - } + if (EFLAG) oevdwl = oecoul = (acc_t)0; + if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0; // loop over neighbors of my atoms #if defined(_OPENMP) - #pragma omp parallel default(none) \ - shared(f_start,f_stride,nlocal,nall,minlocal) \ - reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) + #pragma omp parallel reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5) #endif { int iifrom, iito, tid; @@ -355,61 +347,45 @@ void PairTersoffIntel::eval(const int offload, const int vflag, memset(f + minlocal, 0, f_stride * sizeof(FORCE_T)); { - acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5; - sevdwl = sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = 0.; + acc_t sevdwl; + sevdwl = 0.; #define ARGS iito, iifrom, eatom, vflag, numneigh, numneighhalf, cnumneigh, \ - firstneigh, ntypes, x, c_inner, c_outer, f, &sevdwl, &sv0, &sv1, &sv2, &sv3, &sv4, &sv5 + firstneigh, ntypes, x, c_inner, c_outer, f, &sevdwl // Pick the variable i algorithm under specific conditions // do use scalar algorithm with very short vectors int VL = lmp_intel::vector_routines<flt_t,acc_t,lmp_intel::mode>::VL; - bool pack_i = VL >= 8 && + bool pack_i = VL >= 8 && lmp_intel::vector_traits<lmp_intel::mode>::support_integer_and_gather_ops; bool use_scalar = VL < 4; if (use_scalar) { - IntelKernelTersoff<flt_t,acc_t,lmp_intel::NONE,false>::kernel<EVFLAG,EFLAG>(ARGS); + IntelKernelTersoff<flt_t,acc_t,lmp_intel::NONE,false>::kernel<EFLAG>(ARGS); } else if (pack_i) { - IntelKernelTersoff<flt_t,acc_t,lmp_intel::mode,true >::kernel<EVFLAG,EFLAG>(ARGS); + IntelKernelTersoff<flt_t,acc_t,lmp_intel::mode,true >::kernel<EFLAG>(ARGS); } else { - IntelKernelTersoff<flt_t,acc_t,lmp_intel::mode,false>::kernel<EVFLAG,EFLAG>(ARGS); - } - if (EVFLAG) { - if (EFLAG) oevdwl += sevdwl; - if (vflag == 1) { - ov0 += sv0; - ov1 += sv1; - ov2 += sv2; - ov3 += sv3; - ov4 += sv4; - ov5 += sv5; - } + IntelKernelTersoff<flt_t,acc_t,lmp_intel::mode,false>::kernel<EFLAG>(ARGS); } + if (EFLAG) oevdwl += sevdwl; } - #ifndef _LMP_INTEL_OFFLOAD - if (vflag == 2) - #endif - { - #if defined(_OPENMP) - #pragma omp barrier - #endif - IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG, EFLAG, vflag, eatom, nall, - nlocal, minlocal, nthreads, f_start, f_stride, - x, offload); - } + IP_PRE_fdotr_reduce_omp(1, nall, minlocal, nthreads, f_start, + f_stride, x, offload, vflag, ov0, ov1, ov2, ov3, + ov4, ov5); } // end of omp parallel region - if (EVFLAG) { - if (EFLAG) { - ev_global[0] = oevdwl; - ev_global[1] = 0.0; - } - if (vflag) { - ev_global[2] = ov0; - ev_global[3] = ov1; - ev_global[4] = ov2; - ev_global[5] = ov3; - ev_global[6] = ov4; - ev_global[7] = ov5; - } + + IP_PRE_fdotr_reduce(1, nall, nthreads, f_stride, vflag, + ov0, ov1, ov2, ov3, ov4, ov5); + + if (EFLAG) { + ev_global[0] = oevdwl; + ev_global[1] = 0.0; + } + if (vflag) { + ev_global[2] = ov0; + ev_global[3] = ov1; + ev_global[4] = ov2; + ev_global[5] = ov3; + ev_global[6] = ov4; + ev_global[7] = ov5; } #ifdef _LMP_INTEL_OFFLOAD @@ -424,7 +400,7 @@ void PairTersoffIntel::eval(const int offload, const int vflag, else fix->stop_watch(TIME_HOST_PAIR); - if (EVFLAG) + if (EFLAG || vflag) fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag); else fix->add_result_array(f_start, 0, offload); @@ -455,8 +431,9 @@ void PairTersoffIntel::init_style() error->all(FLERR, "The 'package intel' command is required for /intel styles"); fix = static_cast<FixIntel *>(modify->fix[ifix]); - + fix->pair_init_check(); + fix->three_body_neighbor(1); #ifdef _LMP_INTEL_OFFLOAD _cop = fix->coprocessor_number(); #endif @@ -504,25 +481,25 @@ void PairTersoffIntel::pack_force_const(ForceConst<flt_t> &fc, for (int k = 1; k < tp1; k++) { Param * param = ¶ms[elem2param[map[i]][map[j]][map[k]]]; fc.c_cutoff_inner[i][k][j].cutsq = static_cast<flt_t>(param->cutsq); - fc.c_inner_loop[i][j][k].lam3 = static_cast<flt_t>(param->lam3); + fc.c_inner_loop[i][j][k].lam3 = static_cast<flt_t>(param->lam3); fc.c_inner_loop[i][j][k].bigr = static_cast<flt_t>(param->bigr); fc.c_inner_loop[i][j][k].bigd = static_cast<flt_t>(param->bigd); fc.c_inner_loop[i][j][k].c2 = static_cast<flt_t>(param->c * param->c); fc.c_inner_loop[i][j][k].d2 = static_cast<flt_t>(param->d * param->d); fc.c_inner_loop[i][j][k].h = static_cast<flt_t>(param->h); fc.c_inner_loop[i][j][k].gamma = static_cast<flt_t>(param->gamma); - fc.c_inner_loop[i][j][k].powermint = static_cast<flt_t>(param->powermint); + fc.c_inner_loop[i][j][k].powermint = static_cast<flt_t>(param->powermint); fc.c_inner[i][j][k].cutsq = static_cast<flt_t>(param->cutsq); - fc.c_inner[i][j][k].lam3 = static_cast<flt_t>(param->lam3); + fc.c_inner[i][j][k].lam3 = static_cast<flt_t>(param->lam3); fc.c_inner[i][j][k].bigr = static_cast<flt_t>(param->bigr); fc.c_inner[i][j][k].bigd = static_cast<flt_t>(param->bigd); fc.c_inner[i][j][k].c2 = static_cast<flt_t>(param->c * param->c); fc.c_inner[i][j][k].d2 = static_cast<flt_t>(param->d * param->d); fc.c_inner[i][j][k].h = static_cast<flt_t>(param->h); fc.c_inner[i][j][k].gamma = static_cast<flt_t>(param->gamma); - fc.c_inner[i][j][k].powermint = static_cast<flt_t>(param->powermint); - + fc.c_inner[i][j][k].powermint = static_cast<flt_t>(param->powermint); + } Param * param = ¶ms[elem2param[map[i]][map[j]][map[j]]]; fc.c_cutoff_outer[i][j].cutsq = static_cast<flt_t>(param->cutsq); @@ -538,7 +515,7 @@ void PairTersoffIntel::pack_force_const(ForceConst<flt_t> &fc, fc.c_second_loop[i][j].c2 = static_cast<flt_t>(param->c2); fc.c_second_loop[i][j].c3 = static_cast<flt_t>(param->c3); fc.c_second_loop[i][j].c4 = static_cast<flt_t>(param->c4); - + fc.c_outer[i][j].cutsq = static_cast<flt_t>(param->cutsq); fc.c_outer[i][j].bigr = static_cast<flt_t>(param->bigr); fc.c_outer[i][j].bigd = static_cast<flt_t>(param->bigd); @@ -586,8 +563,8 @@ void PairTersoffIntel::pack_force_const(ForceConst<flt_t> &fc, // As in any other /intel pair style template <class flt_t> void PairTersoffIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, - Memory *memory, - const int cop) { + Memory *memory, + const int cop) { if ( (ntypes != _ntypes) ) { if (_ntypes > 0) { #ifdef _LMP_INTEL_OFFLOAD @@ -598,12 +575,12 @@ void PairTersoffIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, c_cutoff_t * oc_cutoff_outer = c_cutoff_outer[0]; c_inner_t * oc_inner = c_inner[0][0]; c_outer_t * oc_outer = c_outer[0]; - if (c_first_loop != NULL && c_second_loop != NULL && + if (c_first_loop != NULL && c_second_loop != NULL && c_inner_loop != NULL && _cop >= 0) { #pragma offload_transfer target(mic:cop) \ - nocopy(oc_first_loop, oc_second_loop, oc_inner_loop: alloc_if(0) free_if(1)) \ - nocopy(oc_cutoff_outer, oc_cutoff_inner: alloc_if(0) free_if(1)) \ + nocopy(oc_first_loop, oc_second_loop, oc_inner_loop: alloc_if(0) free_if(1)) \ + nocopy(oc_cutoff_outer, oc_cutoff_inner: alloc_if(0) free_if(1)) \ nocopy(oc_inner, oc_outer: alloc_if(0) free_if(0)) } #endif @@ -637,7 +614,7 @@ void PairTersoffIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, int tp1sq = ntypes * ntypes; int tp1cb = ntypes * ntypes * ntypes; int tp1cb_pad = ntypes * ntypes * ntypes_pad; - if (oc_first_loop != NULL && oc_second_loop != NULL && + if (oc_first_loop != NULL && oc_second_loop != NULL && oc_inner_loop != NULL && cop >= 0) { #pragma offload_transfer target(mic:cop) \ nocopy(oc_first_loop: length(tp1sq) alloc_if(1) free_if(0)) \ @@ -663,23 +640,17 @@ void PairTersoffIntel::ForceConst<flt_t>::set_ntypes(const int ntypes, static const int N_CACHE = 8; template<class flt_t, class acc_t, lmp_intel::CalculationMode mic, bool pack_i> -template<int EVFLAG, int EFLAG> +template<int EFLAG> void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( - int eatom, int vflag, - const int * _noalias const numneigh, const int * _noalias const cnumneigh, - const int * _noalias const firstneigh, int ntypes, + int eatom, int vflag, + const int * _noalias const numneigh, const int * _noalias const cnumneigh, + const int * _noalias const firstneigh, int ntypes, typename IntelBuffers<flt_t,acc_t>::atom_t * _noalias const x, - const typename PairTersoffIntel::ForceConst<flt_t>::c_inner_t * _noalias const c_inner, - const typename PairTersoffIntel::ForceConst<flt_t>::c_outer_t * _noalias const c_outer, + const typename PairTersoffIntel::ForceConst<flt_t>::c_inner_t * _noalias const c_inner, + const typename PairTersoffIntel::ForceConst<flt_t>::c_outer_t * _noalias const c_outer, typename IntelBuffers<flt_t,acc_t>::vec3_acc_t * _noalias const f, - avec *vsevdwl, - avec *vsv0, - avec *vsv1, - avec *vsv2, - avec* vsv3, - avec *vsv4, - avec *vsv5, - int compress_idx, + avec *vsevdwl, + int compress_idx, iarr is, iarr js, bvec vmask_repulsive @@ -691,7 +662,7 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( ivec v_i0(0); ivec v_i_ntypes(ntypes); ivec v_i_NEIGHMASK(NEIGHMASK); - + farr fx, fy, fz, fw; int cache_idx = 0; fvec vfkx_cache[N_CACHE]; @@ -701,7 +672,7 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( bvec vmask_cache[N_CACHE]; ivec vkks_final_cache; bvec vmask_final_cache; - iarr ts; + iarr ts; // compute all the stuff we know from i and j // TDO: We could extract this from the driver routine ivec vis = v::int_mullo(v_i4floats, v::int_load_vl(is)); @@ -767,7 +738,7 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( &vfix,&vfiy,&vfiz, &vfjx,&vfjy,&vfjz, &vfkx,&vfky,&vfkz, - &vzeta_contrib); + &vzeta_contrib); vfxtmp = v::mask_add(vfxtmp, veff_mask, vfxtmp, vfix); vfytmp = v::mask_add(vfytmp, veff_mask, vfytmp, vfiy); vfztmp = v::mask_add(vfztmp, veff_mask, vfztmp, vfiz); @@ -778,9 +749,9 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( vfkx_cache[cache_idx] = vfkx; vfky_cache[cache_idx] = vfky; vfkz_cache[cache_idx] = vfkz; - vks_cache[cache_idx] = vks; - vmask_cache[cache_idx] = veff_mask; - cache_idx += 1; + vks_cache[cache_idx] = vks; + vmask_cache[cache_idx] = veff_mask; + cache_idx += 1; vzeta = v::mask_add(vzeta, veff_mask, vzeta, vzeta_contrib); vkks = vkks + v_i1; @@ -828,22 +799,12 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( vfjxtmp = vfjxtmp * vprefactor - vdx_ij * vfpair; vfjytmp = vfjytmp * vprefactor - vdy_ij * vfpair; vfjztmp = vfjztmp * vprefactor - vdz_ij * vfpair; - - if (EVFLAG) { - if (EFLAG) { - *vsevdwl = v::acc_mask_add(*vsevdwl, vmask, *vsevdwl, vevdwl); - if (eatom) { - v::store(fw, (v_0_5 * vevdwl)); - } + + if (EFLAG) { + *vsevdwl = v::acc_mask_add(*vsevdwl, vmask, *vsevdwl, vevdwl); + if (eatom) { + v::store(fw, (v_0_5 * vevdwl)); } - if (vflag == 1) { - *vsv0 = v::acc_mask_add(*vsv0, vmask, *vsv0, vdx_ij * vdx_ij * vfpair); - *vsv1 = v::acc_mask_add(*vsv1, vmask, *vsv1, vdy_ij * vdy_ij * vfpair); - *vsv2 = v::acc_mask_add(*vsv2, vmask, *vsv2, vdz_ij * vdz_ij * vfpair); - *vsv3 = v::acc_mask_add(*vsv3, vmask, *vsv3, vdx_ij * vdy_ij * vfpair); - *vsv4 = v::acc_mask_add(*vsv4, vmask, *vsv4, vdx_ij * vdz_ij * vfpair); - *vsv5 = v::acc_mask_add(*vsv5, vmask, *vsv5, vdy_ij * vdz_ij * vfpair); - } } { while (cache_idx-- > 0) { @@ -872,7 +833,7 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( fvec vx_k, vy_k, vz_k, vcutsq; while (! v::mask_testz(vactive_mask)) { bvec vnew_mask = vactive_mask & ~ veff_old_mask; - vks = v::int_mullo(v_i4floats, v_i_NEIGHMASK & + vks = v::int_mullo(v_i4floats, v_i_NEIGHMASK & v::int_gather<4>(vks, vactive_mask, vkks + vcnumneigh_i, firstneigh)); v::gather_x(vks, vnew_mask, x, &vx_k, &vy_k, &vz_k, &vw_k); fvec vdx_ik = vx_k - vx_i; @@ -894,7 +855,7 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( &vfix,&vfiy,&vfiz, &vfjx,&vfjy,&vfjz, &vfkx,&vfky,&vfkz, - 0); + 0); vfxtmp = v::mask_add(vfxtmp, veff_mask, vfxtmp, vfix); vfytmp = v::mask_add(vfytmp, veff_mask, vfytmp, vfiy); vfztmp = v::mask_add(vfztmp, veff_mask, vfztmp, vfiz); @@ -933,7 +894,7 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( f[t_].x += fx[t]; f[t_].y += fy[t]; f[t_].z += fz[t]; - if (EVFLAG && EFLAG && eatom) { + if (EFLAG && eatom) { f[t_].w += fw[t]; } } @@ -945,7 +906,7 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( f[t_].x += fx[t]; f[t_].y += fy[t]; f[t_].z += fz[t]; - if (EVFLAG && EFLAG && eatom) { + if (EFLAG && eatom) { f[t_].w += fw[t]; } } @@ -954,23 +915,17 @@ void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::kernel_step( // Specialized kernel step for fixed i, means that we don't have to use the // convoluted iteration scheme above, as the loop variables are uniform. template<class flt_t, class acc_t, lmp_intel::CalculationMode mic, bool pack_i> -template<int EVFLAG, int EFLAG> +template<int EFLAG> void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i( - int eatom, int vflag, - const int * _noalias const numneigh, const int * _noalias const cnumneigh, - const int * _noalias const firstneigh, int ntypes, + int eatom, int vflag, + const int * _noalias const numneigh, const int * _noalias const cnumneigh, + const int * _noalias const firstneigh, int ntypes, typename IntelBuffers<flt_t,acc_t>::atom_t * _noalias const x, - const typename PairTersoffIntel::ForceConst<flt_t>::c_inner_t * _noalias const c_inner, - const typename PairTersoffIntel::ForceConst<flt_t>::c_outer_t * _noalias const c_outer, + const typename PairTersoffIntel::ForceConst<flt_t>::c_inner_t * _noalias const c_inner, + const typename PairTersoffIntel::ForceConst<flt_t>::c_outer_t * _noalias const c_outer, typename IntelBuffers<flt_t,acc_t>::vec3_acc_t * _noalias const f, - avec *vsevdwl, - avec *vsv0, - avec *vsv1, - avec *vsv2, - avec* vsv3, - avec *vsv4, - avec *vsv5, - int compress_idx, + avec *vsevdwl, + int compress_idx, int i, iarr js, bvec vmask_repulsive @@ -996,7 +951,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i( int kk_final_cache; aarr fx, fy, fz, fw; - iarr ts; + iarr ts; bvec vmask = v::mask_enable_lower(compress_idx); fvec vx_i(x[i].x), vy_i(x[i].y), vz_i(x[i].z); @@ -1042,7 +997,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i( fvec vfix, vfiy, vfiz; fvec vfjx, vfjy, vfjz; fvec vfkx, vfky, vfkz; - + attractive_vector<true>(&c_inner[ntypes * ntypes * w_i + w_k],vc_idx_j_ntypes,veff_mask,fvec(1.), vrij,vrsq,vdx_ij,vdy_ij,vdz_ij,vdx_ik,vdy_ik,vdz_ik, &vfix,&vfiy,&vfiz, @@ -1055,7 +1010,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i( vfjxtmp = v::acc_mask_add(vfjxtmp, veff_mask, vfjxtmp, vfjx); vfjytmp = v::acc_mask_add(vfjytmp, veff_mask, vfjytmp, vfjy); vfjztmp = v::acc_mask_add(vfjztmp, veff_mask, vfjztmp, vfjz); - + vfkx_cache[cache_idx] = v::mask_add(v::zero(), veff_mask, vfkx, v::zero()); vfky_cache[cache_idx] = v::mask_add(v::zero(), veff_mask, vfky, v::zero()); vfkz_cache[cache_idx] = v::mask_add(v::zero(), veff_mask, vfkz, v::zero()); @@ -1082,7 +1037,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i( bvec vsame_mask = v::int_cmpneq(vjs, ivec(static_cast<int>(4 * sizeof(typename v::fscal) * k))); bvec veff_mask = vcutoff_mask & vsame_mask & vmask; if (! v::mask_testz(veff_mask)) { - fvec vzeta_contrib = zeta_vector(&c_inner[ntypes * ntypes * w_i + w_k], vc_idx_j_ntypes, veff_mask, vrij, vrsq, + fvec vzeta_contrib = zeta_vector(&c_inner[ntypes * ntypes * w_i + w_k], vc_idx_j_ntypes, veff_mask, vrij, vrsq, vdx_ij,vdy_ij,vdz_ij,vdx_ik,vdy_ik,vdz_ik); vzeta = v::acc_mask_add(vzeta, veff_mask, vzeta, vzeta_contrib); } @@ -1096,23 +1051,13 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i( vfjxtmp = vfjxtmp * vaprefactor - avec(vdx_ij * vfpair); vfjytmp = vfjytmp * vaprefactor - avec(vdy_ij * vfpair); vfjztmp = vfjztmp * vaprefactor - avec(vdz_ij * vfpair); - - if (EVFLAG) { - if (EFLAG) { - *vsevdwl = v::acc_mask_add(*vsevdwl, vmask, *vsevdwl, vevdwl); - if (eatom) { - vfwtmp = v_0_5 * vevdwl; - v::store(fw, vfwtmp); - } + + if (EFLAG) { + *vsevdwl = v::acc_mask_add(*vsevdwl, vmask, *vsevdwl, vevdwl); + if (eatom) { + vfwtmp = v_0_5 * vevdwl; + v::store(fw, vfwtmp); } - if (vflag == 1) { - *vsv0 = v::acc_mask_add(*vsv0, vmask, *vsv0, vdx_ij * vdx_ij * vfpair); - *vsv1 = v::acc_mask_add(*vsv1, vmask, *vsv1, vdy_ij * vdy_ij * vfpair); - *vsv2 = v::acc_mask_add(*vsv2, vmask, *vsv2, vdz_ij * vdz_ij * vfpair); - *vsv3 = v::acc_mask_add(*vsv3, vmask, *vsv3, vdx_ij * vdy_ij * vfpair); - *vsv4 = v::acc_mask_add(*vsv4, vmask, *vsv4, vdx_ij * vdz_ij * vfpair); - *vsv5 = v::acc_mask_add(*vsv5, vmask, *vsv5, vdy_ij * vdz_ij * vfpair); - } } while (cache_idx-- > 0) { fvec vfkx = vprefactor * vfkx_cache[cache_idx]; @@ -1148,7 +1093,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i( &vfix,&vfiy,&vfiz, &vfjx,&vfjy,&vfjz, &vfkx,&vfky,&vfkz, - 0); + 0); vfxtmp = v::acc_mask_add(vfxtmp, veff_mask, vfxtmp, vfix); vfytmp = v::acc_mask_add(vfytmp, veff_mask, vfytmp, vfiy); vfztmp = v::acc_mask_add(vfztmp, veff_mask, vfztmp, vfiz); @@ -1169,38 +1114,36 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel_step_const_i( f[t_].x += fx[t]; f[t_].y += fy[t]; f[t_].z += fz[t]; - if (EVFLAG && EFLAG && eatom) { + if (EFLAG && eatom) { f[t_].w += fw[t]; } } f[i].x += v::acc_reduce_add(v::acc_mask_add(v::acc_zero(), vmask, vfxtmp, v::zero())); f[i].y += v::acc_reduce_add(v::acc_mask_add(v::acc_zero(), vmask, vfytmp, v::zero())); f[i].z += v::acc_reduce_add(v::acc_mask_add(v::acc_zero(), vmask, vfztmp, v::zero())); - if (EVFLAG && EFLAG && eatom) { + if (EFLAG && eatom) { f[i].z += v::acc_reduce_add(v::acc_mask_add(v::acc_zero(), vmask, vfwtmp, v::zero())); } } template<class flt_t, class acc_t, lmp_intel::CalculationMode mic, bool pack_i> -template<bool EVFLAG, bool EFLAG> +template<bool EFLAG> void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel( - int iito, int iifrom, int eatom, int vflag, - const int * _noalias const numneigh, - const int * _noalias const numneighhalf, - const int * _noalias const cnumneigh, - const int * _noalias const firstneigh, int ntypes, + int iito, int iifrom, int eatom, int vflag, + const int * _noalias const numneigh, + const int * _noalias const numneighhalf, + const int * _noalias const cnumneigh, + const int * _noalias const firstneigh, int ntypes, typename IntelBuffers<flt_t,acc_t>::atom_t * _noalias const x, - const c_inner_t * _noalias const c_inner, - const c_outer_t * _noalias const c_outer, + const c_inner_t * _noalias const c_inner, + const c_outer_t * _noalias const c_outer, typename IntelBuffers<flt_t,acc_t>::vec3_acc_t * _noalias const f, - acc_t *evdwl, acc_t *ov0, acc_t * ov1, acc_t *ov2, acc_t* ov3, acc_t *ov4, acc_t *ov5 + acc_t *evdwl ) { int compress_idx = 0; int ii, jj; iarr is, js; avec vsevdwl = v::acc_zero(); - avec vsv0 = v::acc_zero(), vsv1 = v::acc_zero(), vsv2 = v::acc_zero(); - avec vsv3 = v::acc_zero(), vsv4 = v::acc_zero(), vsv5 = v::acc_zero(); ivec v_i4floats(static_cast<int>(sizeof(typename v::fscal) * 4)); ivec vj, v_NEIGHMASK(NEIGHMASK); bvec vmask_repulsive(0); @@ -1237,11 +1180,11 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel( if (pack_i) { if (compress_idx == v::VL) { vmask_repulsive = v::int_cmpneq(v::int_load_vl(repulsive_flag), ivec(0)); - kernel_step<EVFLAG,EFLAG>( - eatom, vflag, + kernel_step<EFLAG>( + eatom, vflag, numneigh, cnumneigh, firstneigh, ntypes, x, c_inner, c_outer, f, - &vsevdwl, &vsv0, &vsv1, &vsv2, &vsv3, &vsv4, &vsv5, compress_idx, + &vsevdwl, compress_idx, is, js, vmask_repulsive ); compress_idx = 0; @@ -1250,11 +1193,11 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel( } else { if (compress_idx == v::VL || (compress_idx > 0 && jj == jnum-1)) { vmask_repulsive = v::int_cmpneq(v::int_load_vl(repulsive_flag), ivec(0)); - kernel_step_const_i<EVFLAG,EFLAG>( - eatom, vflag, + kernel_step_const_i<EFLAG>( + eatom, vflag, numneigh, cnumneigh, firstneigh, ntypes, x, c_inner, c_outer, f, - &vsevdwl, &vsv0, &vsv1, &vsv2, &vsv3, &vsv4, &vsv5, compress_idx, + &vsevdwl, compress_idx, i, js, vmask_repulsive ); compress_idx = 0; @@ -1265,36 +1208,26 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::kernel( } if (compress_idx > 0) { vmask_repulsive = v::int_cmpneq(v::int_load_vl(repulsive_flag), ivec(0)); - IntelKernelTersoff::kernel_step<EVFLAG,EFLAG>( - eatom, vflag, + IntelKernelTersoff::kernel_step<EFLAG>( + eatom, vflag, numneigh, cnumneigh, firstneigh, ntypes, x, c_inner, c_outer, f, - &vsevdwl, &vsv0, &vsv1, &vsv2, &vsv3, &vsv4, &vsv5, compress_idx, + &vsevdwl, compress_idx, is, js, vmask_repulsive ); } - if (EVFLAG) { - if (EFLAG) { - *evdwl += v::acc_reduce_add(vsevdwl); - } - if (vflag == 1) { - *ov0 += v::acc_reduce_add(vsv0); - *ov1 += v::acc_reduce_add(vsv1); - *ov2 += v::acc_reduce_add(vsv2); - *ov3 += v::acc_reduce_add(vsv3); - *ov4 += v::acc_reduce_add(vsv4); - *ov5 += v::acc_reduce_add(vsv5); - } + if (EFLAG) { + *evdwl += v::acc_reduce_add(vsevdwl); } } template<class flt_t, class acc_t, lmp_intel::CalculationMode mic, bool pack_i> IntelKernelTersoff<flt_t,acc_t,mic,pack_i>::fvec IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::zeta_vector( - const c_inner_t * param, - ivec xjw, bvec mask, - fvec vrij, fvec rsq2, - fvec vdijx, fvec vdijy, fvec vdijz, + const c_inner_t * param, + ivec xjw, bvec mask, + fvec vrij, fvec rsq2, + fvec vdijx, fvec vdijy, fvec vdijz, fvec dikx, fvec diky, fvec dikz ) { fvec v_1_0(1.0); @@ -1317,7 +1250,7 @@ IntelKernelTersoff<flt_t,acc_t,mic,pack_i>::fvec IntelKernelTersoff<flt_t, acc_t // Its kind of important to check the mask. // Some simulations never/rarely invoke this branch. if (! v::mask_testz(vmask_need_sine)) { - vfc = v::blend(vmask_need_sine, vfc, + vfc = v::blend(vmask_need_sine, vfc, v_0_5 * (v_1_0 - sin(fvec(MY_PI2) * (vrik - vpbigr) * v::recip(vpbigd)))); } return vgijk * vex_delr * vfc; @@ -1325,7 +1258,7 @@ IntelKernelTersoff<flt_t,acc_t,mic,pack_i>::fvec IntelKernelTersoff<flt_t, acc_t template<class flt_t, class acc_t, lmp_intel::CalculationMode mic, bool pack_i> void IntelKernelTersoff<flt_t, acc_t, mic, pack_i>::force_zeta_vector( - const c_outer_t * param, + const c_outer_t * param, ivec xjw, bvec mask, fvec vrij, fvec vzeta_ij, @@ -1469,9 +1402,9 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::attractive_vector( vfc_d = v::blend(vmask_need_sine, vfc_d, fvec(-0.5) * vtmp * vfccos); } - fvec vzeta_d_fc = vfc_d * vgijk * vex_delr; - fvec vzeta_d_gijk = vfc * vgijk_d * vex_delr; - fvec vzeta_d_ex_delr = vfc * vgijk * vex_delr_d; + fvec vzeta_d_fc = vfc_d * vgijk * vex_delr; + fvec vzeta_d_gijk = vfc * vgijk_d * vex_delr; + fvec vzeta_d_ex_delr = vfc * vgijk * vex_delr_d; if (ZETA) *zeta = vfc * vgijk * vex_delr; fvec vminus_costheta = - vcostheta; @@ -1484,7 +1417,7 @@ void IntelKernelTersoff<flt_t,acc_t,mic, pack_i>::attractive_vector( fvec vdcosdrix = -(vdcosdrjx + vdcosdrkx); fvec vdcosdriy = -(vdcosdrjy + vdcosdrky); fvec vdcosdriz = -(vdcosdrjz + vdcosdrkz); - + *fix = vprefactor * (vzeta_d_gijk * vdcosdrix + vzeta_d_ex_delr * (rik_hatx - vrij_hatx) - vzeta_d_fc * rik_hatx); *fiy = vprefactor * (vzeta_d_gijk * vdcosdriy + vzeta_d_ex_delr * (rik_haty - vrij_haty) - vzeta_d_fc * rik_haty); *fiz = vprefactor * (vzeta_d_gijk * vdcosdriz + vzeta_d_ex_delr * (rik_hatz - vrij_hatz) - vzeta_d_fc * rik_hatz); diff --git a/src/USER-INTEL/pair_tersoff_intel.h b/src/USER-INTEL/pair_tersoff_intel.h index c9604f2797a77f7523d2472c7f00c514cb760eb7..6da478c10faa59786daeba33b4261d2cb2dc8591 100644 --- a/src/USER-INTEL/pair_tersoff_intel.h +++ b/src/USER-INTEL/pair_tersoff_intel.h @@ -75,14 +75,14 @@ class PairTersoffIntel : public PairTersoff { }; ForceConst<float> force_const_single; ForceConst<double> force_const_double; - + template <class flt_t, class acc_t> void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers, const ForceConst<flt_t> &fc); - template <int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t> + template <int EFLAG, class flt_t, class acc_t> void eval(const int offload, const int vflag, - IntelBuffers<flt_t,acc_t> * buffers, - const ForceConst<flt_t> &fc, const int astart, const int aend); + IntelBuffers<flt_t,acc_t> * buffers, + const ForceConst<flt_t> &fc, const int astart, const int aend); template <class flt_t, class acc_t> void pack_force_const(ForceConst<flt_t> &fc, diff --git a/src/USER-INTEL/pppm_disp_intel.cpp b/src/USER-INTEL/pppm_disp_intel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ec5f5150c2546c1fbdc1bfb74096c27d9e0321e5 --- /dev/null +++ b/src/USER-INTEL/pppm_disp_intel.cpp @@ -0,0 +1,3034 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: William McDoniel (RWTH Aachen University) +------------------------------------------------------------------------- */ + +#include <mpi.h> +#include <stdlib.h> +#include <math.h> +#include "pppm_disp_intel.h" +#include "atom.h" +#include "error.h" +#include "fft3d_wrap.h" +#include "gridcomm.h" +#include "math_const.h" +#include "math_special.h" +#include "memory.h" +#include "suffix.h" + +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#define MAXORDER 7 +#define OFFSET 16384 +#define SMALL 0.00001 +#define LARGE 10000.0 +#define EPS_HOC 1.0e-7 + +enum{GEOMETRIC,ARITHMETIC,SIXTHPOWER}; +enum{REVERSE_RHO, REVERSE_RHO_G, REVERSE_RHO_A, REVERSE_RHO_NONE}; +enum{FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM, + FORWARD_IK_G, FORWARD_AD_G, FORWARD_IK_PERATOM_G, FORWARD_AD_PERATOM_G, + FORWARD_IK_A, FORWARD_AD_A, FORWARD_IK_PERATOM_A, FORWARD_AD_PERATOM_A, + FORWARD_IK_NONE, FORWARD_AD_NONE, FORWARD_IK_PERATOM_NONE, + FORWARD_AD_PERATOM_NONE}; + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMDispIntel::PPPMDispIntel(LAMMPS *lmp, int narg, char **arg) : + PPPMDisp(lmp, narg, arg) +{ + suffix_flag |= Suffix::INTEL; + + order = 7; + order_6 = 7; //sets default stencil sizes to 7 + + perthread_density = NULL; + particle_ekx = particle_eky = particle_ekz = NULL; + particle_ekx0 = particle_eky0 = particle_ekz0 = NULL; + particle_ekx1 = particle_eky1 = particle_ekz1 = NULL; + particle_ekx2 = particle_eky2 = particle_ekz2 = NULL; + particle_ekx3 = particle_eky3 = particle_ekz3 = NULL; + particle_ekx4 = particle_eky4 = particle_ekz4 = NULL; + particle_ekx5 = particle_eky5 = particle_ekz5 = NULL; + particle_ekx6 = particle_eky6 = particle_ekz6 = NULL; + + rho_lookup = drho_lookup = NULL; + rho6_lookup = drho6_lookup = NULL; + rho_points = 0; + + _use_table = _use_packing = _use_lrt = 0; +} + +PPPMDispIntel::~PPPMDispIntel() +{ + memory->destroy(perthread_density); + memory->destroy(particle_ekx); + memory->destroy(particle_eky); + memory->destroy(particle_ekz); + + memory->destroy(rho_lookup); + memory->destroy(drho_lookup); + memory->destroy(rho6_lookup); + memory->destroy(drho6_lookup); +} + + + +/* ---------------------------------------------------------------------- + called once before run +------------------------------------------------------------------------- */ + + +void PPPMDispIntel::init() +{ + + PPPMDisp::init(); + int ifix = modify->find_fix("package_intel"); + if (ifix < 0) + error->all(FLERR, + "The 'package intel' command is required for /intel styles"); + fix = static_cast<FixIntel *>(modify->fix[ifix]); + + #ifdef _LMP_INTEL_OFFLOAD + _use_base = 0; + if (fix->offload_balance() != 0.0) { + _use_base = 1; + return; + } + #endif + + fix->kspace_init_check(); + + _use_lrt = fix->lrt(); + if (_use_lrt) + error->all(FLERR, + "LRT mode is currently not supported for pppm/disp/intel"); + + + // For vectorization, we need some padding in the end + // The first thread computes on the global density + if ((comm->nthreads > 1) && !_use_lrt) { + memory->destroy(perthread_density); + memory->create(perthread_density, comm->nthreads-1, + ngrid + INTEL_P3M_ALIGNED_MAXORDER, + "pppmdispintel:perthread_density"); + } + + _use_table = fix->pppm_table(); + if (_use_table) { + rho_points = 5000; + memory->destroy(rho_lookup); + memory->create(rho_lookup, rho_points, INTEL_P3M_ALIGNED_MAXORDER, + "pppmdispintel:rho_lookup"); + memory->destroy(rho6_lookup); + memory->create(rho6_lookup, rho_points, INTEL_P3M_ALIGNED_MAXORDER, + "pppmdispintel:rho6_lookup"); + + if(differentiation_flag == 1) { + memory->destroy(drho_lookup); + memory->create(drho_lookup, rho_points, INTEL_P3M_ALIGNED_MAXORDER, + "pppmdispintel:drho_lookup"); + memory->destroy(drho6_lookup); + memory->create(drho6_lookup, rho_points, INTEL_P3M_ALIGNED_MAXORDER, + "pppmdispintel:drho6_lookup"); + } + precompute_rho(); + } + if (order > INTEL_P3M_MAXORDER) + error->all(FLERR,"PPPM order greater than supported by USER-INTEL\n"); +} + +/* ---------------------------------------------------------------------- + compute the PPPMDispIntel long-range force, energy, virial +------------------------------------------------------------------------- */ + +void PPPMDispIntel::compute(int eflag, int vflag) +{ + #ifdef _LMP_INTEL_OFFLOAD + if (_use_base) { + PPPMDisp::compute(eflag, vflag); + return; + } + #endif + int i; + // convert atoms from box to lamda coords + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = evflag_atom = eflag_global = vflag_global = + eflag_atom = vflag_atom = 0; + + if (evflag_atom && !peratom_allocate_flag) { + allocate_peratom(); + if (function[0]) { + cg_peratom->ghost_notify(); + cg_peratom->setup(); + } + if (function[1] + function[2] + function[3]) { + cg_peratom_6->ghost_notify(); + cg_peratom_6->setup(); + } + peratom_allocate_flag = 1; + } + if (triclinic == 0) boxlo = domain->boxlo; + else { + boxlo = domain->boxlo_lamda; + domain->x2lamda(atom->nlocal); + } + // extend size of per-atom arrays if necessary + + if (atom->nmax > nmax) { + + if (function[0]) memory->destroy(part2grid); + if (function[1] + function[2] + function[3]) memory->destroy(part2grid_6); + if (differentiation_flag == 1) { + memory->destroy(particle_ekx); + memory->destroy(particle_eky); + memory->destroy(particle_ekz); + if (function[2] == 1){ + memory->destroy(particle_ekx0); + memory->destroy(particle_eky0); + memory->destroy(particle_ekz0); + memory->destroy(particle_ekx1); + memory->destroy(particle_eky1); + memory->destroy(particle_ekz1); + memory->destroy(particle_ekx2); + memory->destroy(particle_eky2); + memory->destroy(particle_ekz2); + memory->destroy(particle_ekx3); + memory->destroy(particle_eky3); + memory->destroy(particle_ekz3); + memory->destroy(particle_ekx4); + memory->destroy(particle_eky4); + memory->destroy(particle_ekz4); + memory->destroy(particle_ekx5); + memory->destroy(particle_eky5); + memory->destroy(particle_ekz5); + memory->destroy(particle_ekx6); + memory->destroy(particle_eky6); + memory->destroy(particle_ekz6); + } + + } + nmax = atom->nmax; + if (function[0]) memory->create(part2grid,nmax,3,"pppm/disp:part2grid"); + if (function[1] + function[2] + function[3]) + memory->create(part2grid_6,nmax,3,"pppm/disp:part2grid_6"); + if (differentiation_flag == 1) { + memory->create(particle_ekx, nmax, "pppmdispintel:pekx"); + memory->create(particle_eky, nmax, "pppmdispintel:peky"); + memory->create(particle_ekz, nmax, "pppmdispintel:pekz"); + if (function[2] == 1){ + memory->create(particle_ekx0, nmax, "pppmdispintel:pekx0"); + memory->create(particle_eky0, nmax, "pppmdispintel:peky0"); + memory->create(particle_ekz0, nmax, "pppmdispintel:pekz0"); + memory->create(particle_ekx1, nmax, "pppmdispintel:pekx1"); + memory->create(particle_eky1, nmax, "pppmdispintel:peky1"); + memory->create(particle_ekz1, nmax, "pppmdispintel:pekz1"); + memory->create(particle_ekx2, nmax, "pppmdispintel:pekx2"); + memory->create(particle_eky2, nmax, "pppmdispintel:peky2"); + memory->create(particle_ekz2, nmax, "pppmdispintel:pekz2"); + memory->create(particle_ekx3, nmax, "pppmdispintel:pekx3"); + memory->create(particle_eky3, nmax, "pppmdispintel:peky3"); + memory->create(particle_ekz3, nmax, "pppmdispintel:pekz3"); + memory->create(particle_ekx4, nmax, "pppmdispintel:pekx4"); + memory->create(particle_eky4, nmax, "pppmdispintel:peky4"); + memory->create(particle_ekz4, nmax, "pppmdispintel:pekz4"); + memory->create(particle_ekx5, nmax, "pppmdispintel:pekx5"); + memory->create(particle_eky5, nmax, "pppmdispintel:peky5"); + memory->create(particle_ekz5, nmax, "pppmdispintel:pekz5"); + memory->create(particle_ekx6, nmax, "pppmdispintel:pekx6"); + memory->create(particle_eky6, nmax, "pppmdispintel:peky6"); + memory->create(particle_ekz6, nmax, "pppmdispintel:pekz6"); + } + } + } + energy = 0.0; + energy_1 = 0.0; + energy_6 = 0.0; + if (vflag) for (i = 0; i < 6; i++) virial_6[i] = virial_1[i] = 0.0; + + // find grid points for all my particles + // distribute partcles' charges/dispersion coefficients on the grid + // communication between processors and remapping two fft + // Solution of poissons equation in k-space and backtransformation + // communication between processors + // calculation of forces + + if (function[0]) { + + //perform calculations for coulomb interactions only + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + particle_map<float,double>(delxinv, delyinv, delzinv, shift, part2grid, + nupper, nlower, nxlo_out, nylo_out, nzlo_out, + nxhi_out, nyhi_out, nzhi_out, + fix->get_mixed_buffers()); + make_rho_c<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + particle_map<double,double>(delxinv, delyinv, delzinv, shift, part2grid, + nupper, nlower, nxlo_out, nylo_out, + nzlo_out, nxhi_out, nyhi_out, nzhi_out, + fix->get_double_buffers()); + make_rho_c<double,double>(fix->get_double_buffers()); + } else { + particle_map<float,float>(delxinv, delyinv, delzinv, shift, part2grid, + nupper, nlower, nxlo_out, nylo_out, nzlo_out, + nxhi_out, nyhi_out, nzhi_out, + fix->get_single_buffers()); + make_rho_c<float,float>(fix->get_single_buffers()); + } + + cg->reverse_comm(this,REVERSE_RHO); + + brick2fft(nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + density_brick, density_fft, work1,remap); + + if (differentiation_flag == 1) { + poisson_ad(work1, work2, density_fft, fft1, fft2, + nx_pppm, ny_pppm, nz_pppm, nfft, + nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + energy_1, greensfn, virial_1, vg,vg2, u_brick, v0_brick, + v1_brick, v2_brick, v3_brick, v4_brick, v5_brick); + + cg->forward_comm(this,FORWARD_AD); + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fieldforce_c_ad<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fieldforce_c_ad<double,double>(fix->get_double_buffers()); + } else { + fieldforce_c_ad<float,float>(fix->get_single_buffers()); + } + + if (vflag_atom) cg_peratom->forward_comm(this, FORWARD_AD_PERATOM); + + } else { + poisson_ik(work1, work2, density_fft, fft1, fft2, + nx_pppm, ny_pppm, nz_pppm, nfft, + nxlo_fft, nylo_fft, nzlo_fft, nxhi_fft, nyhi_fft, nzhi_fft, + nxlo_in, nylo_in, nzlo_in, nxhi_in, nyhi_in, nzhi_in, + energy_1, greensfn, fkx, fky, fkz,fkx2, fky2, fkz2, + vdx_brick, vdy_brick, vdz_brick, virial_1, vg,vg2, + u_brick, v0_brick, v1_brick, v2_brick, v3_brick, v4_brick, + v5_brick); + + cg->forward_comm(this, FORWARD_IK); + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fieldforce_c_ik<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fieldforce_c_ik<double,double>(fix->get_double_buffers()); + } else { + fieldforce_c_ik<float,float>(fix->get_single_buffers()); + } + + if (evflag_atom) cg_peratom->forward_comm(this, FORWARD_IK_PERATOM); + } + if (evflag_atom) fieldforce_c_peratom(); + } + + if (function[1]) { + //perfrom calculations for geometric mixing + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + particle_map<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6, + part2grid_6, nupper_6, nlower_6, nxlo_out_6, + nylo_out_6, nzlo_out_6, nxhi_out_6, + nyhi_out_6, nzhi_out_6, + fix->get_mixed_buffers()); + make_rho_g<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + particle_map<double,double>(delxinv_6, delyinv_6, delzinv_6, shift_6, + part2grid_6, nupper_6, nlower_6, nxlo_out_6, + nylo_out_6, nzlo_out_6, nxhi_out_6, + nyhi_out_6, nzhi_out_6, + fix->get_double_buffers()); + make_rho_g<double,double>(fix->get_double_buffers()); + } else { + particle_map<float,float>(delxinv_6, delyinv_6, delzinv_6, shift_6, + part2grid_6, nupper_6, nlower_6, nxlo_out_6, + nylo_out_6, nzlo_out_6, nxhi_out_6, + nyhi_out_6, nzhi_out_6, + fix->get_single_buffers()); + make_rho_g<float,float>(fix->get_single_buffers()); + } + + + cg_6->reverse_comm(this, REVERSE_RHO_G); + + brick2fft(nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, nzhi_in_6, + density_brick_g, density_fft_g, work1_6,remap_6); + + if (differentiation_flag == 1) { + + poisson_ad(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, + nxlo_fft_6, nylo_fft_6, nzlo_fft_6, nxhi_fft_6, + nyhi_fft_6, nzhi_fft_6, nxlo_in_6, nylo_in_6, nzlo_in_6, + nxhi_in_6, nyhi_in_6, nzhi_in_6, energy_6, greensfn_6, + virial_6, vg_6, vg2_6, u_brick_g, v0_brick_g, v1_brick_g, + v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); + + cg_6->forward_comm(this,FORWARD_AD_G); + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fieldforce_g_ad<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fieldforce_g_ad<double,double>(fix->get_double_buffers()); + } else { + fieldforce_g_ad<float,float>(fix->get_single_buffers()); + } + + if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_G); + + } else { + poisson_ik(work1_6, work2_6, density_fft_g, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, nxlo_fft_6, + nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, + nzhi_in_6, energy_6, greensfn_6, fkx_6, fky_6, fkz_6, + fkx2_6, fky2_6, fkz2_6, vdx_brick_g, vdy_brick_g, + vdz_brick_g, virial_6, vg_6, vg2_6, u_brick_g, v0_brick_g, + v1_brick_g, v2_brick_g, v3_brick_g, v4_brick_g, v5_brick_g); + + cg_6->forward_comm(this,FORWARD_IK_G); + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fieldforce_g_ik<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fieldforce_g_ik<double,double>(fix->get_double_buffers()); + } else { + fieldforce_g_ik<float,float>(fix->get_single_buffers()); + } + + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_G); + } + if (evflag_atom) fieldforce_g_peratom(); + } + + if (function[2]) { + //perform calculations for arithmetic mixing + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + particle_map<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6, + part2grid_6, nupper_6, nlower_6, + nxlo_out_6, nylo_out_6, nzlo_out_6, + nxhi_out_6, nyhi_out_6, nzhi_out_6, + fix->get_mixed_buffers()); + make_rho_a<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + particle_map<double,double>(delxinv_6, delyinv_6, delzinv_6, shift_6, + part2grid_6, nupper_6, nlower_6, nxlo_out_6, + nylo_out_6, nzlo_out_6, nxhi_out_6, + nyhi_out_6, nzhi_out_6, + fix->get_double_buffers()); + make_rho_a<double,double>(fix->get_double_buffers()); + } else { + particle_map<float,float>(delxinv_6, delyinv_6, delzinv_6, shift_6, + part2grid_6, nupper_6, nlower_6, nxlo_out_6, + nylo_out_6, nzlo_out_6, nxhi_out_6, + nyhi_out_6, nzhi_out_6, + fix->get_single_buffers()); + make_rho_a<float,float>(fix->get_single_buffers()); + } + + cg_6->reverse_comm(this, REVERSE_RHO_A); + + brick2fft_a(); + + if ( differentiation_flag == 1) { + + poisson_ad(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, nxlo_fft_6, + nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, + nzhi_in_6, energy_6, greensfn_6, virial_6, vg_6, vg2_6, + u_brick_a3, v0_brick_a3, v1_brick_a3, v2_brick_a3, + v3_brick_a3, v4_brick_a3, v5_brick_a3); + poisson_2s_ad(density_fft_a0, density_fft_a6, u_brick_a0, v0_brick_a0, + v1_brick_a0, v2_brick_a0, v3_brick_a0, v4_brick_a0, + v5_brick_a0, u_brick_a6, v0_brick_a6, v1_brick_a6, + v2_brick_a6, v3_brick_a6, v4_brick_a6, v5_brick_a6); + poisson_2s_ad(density_fft_a1, density_fft_a5, u_brick_a1, v0_brick_a1, + v1_brick_a1, v2_brick_a1, v3_brick_a1, v4_brick_a1, + v5_brick_a1, u_brick_a5, v0_brick_a5, v1_brick_a5, + v2_brick_a5, v3_brick_a5, v4_brick_a5, v5_brick_a5); + poisson_2s_ad(density_fft_a2, density_fft_a4, u_brick_a2, v0_brick_a2, + v1_brick_a2, v2_brick_a2, v3_brick_a2, v4_brick_a2, + v5_brick_a2, u_brick_a4, v0_brick_a4, v1_brick_a4, + v2_brick_a4, v3_brick_a4, v4_brick_a4, v5_brick_a4); + + cg_6->forward_comm(this, FORWARD_AD_A); + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fieldforce_a_ad<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fieldforce_a_ad<double,double>(fix->get_double_buffers()); + } else { + fieldforce_a_ad<float,float>(fix->get_single_buffers()); + } + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_AD_PERATOM_A); + + } else { + + poisson_ik(work1_6, work2_6, density_fft_a3, fft1_6, fft2_6, + nx_pppm_6, ny_pppm_6, nz_pppm_6, nfft_6, nxlo_fft_6, + nylo_fft_6, nzlo_fft_6, nxhi_fft_6, nyhi_fft_6, nzhi_fft_6, + nxlo_in_6, nylo_in_6, nzlo_in_6, nxhi_in_6, nyhi_in_6, + nzhi_in_6, energy_6, greensfn_6, fkx_6, fky_6, fkz_6,fkx2_6, + fky2_6, fkz2_6, vdx_brick_a3, vdy_brick_a3, vdz_brick_a3, + virial_6, vg_6, vg2_6, u_brick_a3, v0_brick_a3, v1_brick_a3, + v2_brick_a3, v3_brick_a3, v4_brick_a3, v5_brick_a3); + poisson_2s_ik(density_fft_a0, density_fft_a6, vdx_brick_a0, + vdy_brick_a0, vdz_brick_a0, vdx_brick_a6, vdy_brick_a6, + vdz_brick_a6, u_brick_a0, v0_brick_a0, v1_brick_a0, + v2_brick_a0, v3_brick_a0, v4_brick_a0, v5_brick_a0, + u_brick_a6, v0_brick_a6, v1_brick_a6, v2_brick_a6, + v3_brick_a6, v4_brick_a6, v5_brick_a6); + poisson_2s_ik(density_fft_a1, density_fft_a5, vdx_brick_a1, + vdy_brick_a1, vdz_brick_a1, vdx_brick_a5, vdy_brick_a5, + vdz_brick_a5, u_brick_a1, v0_brick_a1, v1_brick_a1, + v2_brick_a1, v3_brick_a1, v4_brick_a1, v5_brick_a1, + u_brick_a5, v0_brick_a5, v1_brick_a5, v2_brick_a5, + v3_brick_a5, v4_brick_a5, v5_brick_a5); + poisson_2s_ik(density_fft_a2, density_fft_a4, vdx_brick_a2, + vdy_brick_a2, vdz_brick_a2, vdx_brick_a4, vdy_brick_a4, + vdz_brick_a4, u_brick_a2, v0_brick_a2, v1_brick_a2, + v2_brick_a2, v3_brick_a2, v4_brick_a2, v5_brick_a2, + u_brick_a4, v0_brick_a4, v1_brick_a4, v2_brick_a4, + v3_brick_a4, v4_brick_a4, v5_brick_a4); + + cg_6->forward_comm(this, FORWARD_IK_A); + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fieldforce_a_ik<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fieldforce_a_ik<double,double>(fix->get_double_buffers()); + } else { + fieldforce_a_ik<float,float>(fix->get_single_buffers()); + } + + if (evflag_atom) cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_A); + } + if (evflag_atom) fieldforce_a_peratom(); + } + + if (function[3]) { + //perform calculations if no mixing rule applies + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + particle_map<float,double>(delxinv_6, delyinv_6, delzinv_6, shift_6, + part2grid_6, nupper_6, nlower_6, nxlo_out_6, + nylo_out_6, nzlo_out_6, nxhi_out_6, + nyhi_out_6, nzhi_out_6, + fix->get_mixed_buffers()); + make_rho_none<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + particle_map<double,double>(delxinv_6, delyinv_6, delzinv_6, shift_6, + part2grid_6, nupper_6, nlower_6, nxlo_out_6, + nylo_out_6, nzlo_out_6, nxhi_out_6, + nyhi_out_6, nzhi_out_6, + fix->get_double_buffers()); + make_rho_none<double,double>(fix->get_double_buffers()); + } else { + particle_map<float,float>(delxinv_6, delyinv_6, delzinv_6, shift_6, + part2grid_6, nupper_6, nlower_6, nxlo_out_6, + nylo_out_6, nzlo_out_6, nxhi_out_6, + nyhi_out_6, nzhi_out_6, + fix->get_single_buffers()); + make_rho_none<float,float>(fix->get_single_buffers()); + } + + cg_6->reverse_comm(this, REVERSE_RHO_NONE); + + brick2fft_none(); + + if (differentiation_flag == 1) { + + int n = 0; + for (int k = 0; k<nsplit_alloc/2; k++) { + poisson_none_ad(n,n+1,density_fft_none[n],density_fft_none[n+1], + u_brick_none[n],u_brick_none[n+1], + v0_brick_none, v1_brick_none, v2_brick_none, + v3_brick_none, v4_brick_none, v5_brick_none); + n += 2; + } + + cg_6->forward_comm(this,FORWARD_AD_NONE); + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fieldforce_none_ad<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fieldforce_none_ad<double,double>(fix->get_double_buffers()); + } else { + fieldforce_none_ad<float,float>(fix->get_single_buffers()); + } + + if (vflag_atom) cg_peratom_6->forward_comm(this,FORWARD_AD_PERATOM_NONE); + + } else { + int n = 0; + for (int k = 0; k<nsplit_alloc/2; k++) { + + poisson_none_ik(n,n+1,density_fft_none[n], density_fft_none[n+1], + vdx_brick_none[n], vdy_brick_none[n], + vdz_brick_none[n], vdx_brick_none[n+1], + vdy_brick_none[n+1], vdz_brick_none[n+1], + u_brick_none, v0_brick_none, v1_brick_none, + v2_brick_none, v3_brick_none, v4_brick_none, + v5_brick_none); + n += 2; + } + + cg_6->forward_comm(this,FORWARD_IK_NONE); + + if (fix->precision() == FixIntel::PREC_MODE_MIXED) { + fieldforce_none_ik<float,double>(fix->get_mixed_buffers()); + } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) { + fieldforce_none_ik<double,double>(fix->get_double_buffers()); + } else { + fieldforce_none_ik<float,float>(fix->get_single_buffers()); + } + + if (evflag_atom) + cg_peratom_6->forward_comm(this, FORWARD_IK_PERATOM_NONE); + } + if (evflag_atom) fieldforce_none_peratom(); + } + + // update qsum and qsqsum, if atom count has changed and energy needed + + if ((eflag_global || eflag_atom) && atom->natoms != natoms_original) { + qsum_qsq(); + natoms_original = atom->natoms; + } + + // sum energy across procs and add in volume-dependent term + + const double qscale = force->qqrd2e * scale; + if (eflag_global) { + double energy_all; + MPI_Allreduce(&energy_1,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy_1 = energy_all; + MPI_Allreduce(&energy_6,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); + energy_6 = energy_all; + + energy_1 *= 0.5*volume; + energy_6 *= 0.5*volume; + + energy_1 -= g_ewald*qsqsum/MY_PIS + + MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume); + energy_6 += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij + + 1.0/12.0*pow(g_ewald_6,6)*csum; + energy_1 *= qscale; + } + + // sum virial across procs + + if (vflag_global) { + double virial_all[6]; + MPI_Allreduce(virial_1,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; + MPI_Allreduce(virial_6,virial_all,6,MPI_DOUBLE,MPI_SUM,world); + for (i = 0; i < 6; i++) virial[i] += 0.5*volume*virial_all[i]; + if (function[1]+function[2]+function[3]){ + double a = MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumij; + virial[0] -= a; + virial[1] -= a; + virial[2] -= a; + } + } + + if (eflag_atom) { + if (function[0]) { + double *q = atom->q; + for (i = 0; i < atom->nlocal; i++) { + eatom[i] -= qscale*g_ewald*q[i]*q[i]/MY_PIS + qscale*MY_PI2*q[i]* + qsum / (g_ewald*g_ewald*volume); //coulomb self energy correction + } + } + if (function[1] + function[2] + function[3]) { + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + eatom[i] += - MY_PI*MY_PIS/(6*volume)*pow(g_ewald_6,3)*csumi[tmp] + + 1.0/12.0*pow(g_ewald_6,6)*cii[tmp]; + } + } + } + + if (vflag_atom) { + if (function[1] + function[2] + function[3]) { + int tmp; + for (i = 0; i < atom->nlocal; i++) { + tmp = atom->type[i]; + //dispersion self virial correction + for (int n = 0; n < 3; n++) vatom[i][n] -= MY_PI*MY_PIS/(6*volume)* + pow(g_ewald_6,3)*csumi[tmp]; + } + } + } + + + // 2d slab correction + + if (slabflag) slabcorr(eflag); + if (function[0]) energy += energy_1; + if (function[1] + function[2] + function[3]) energy += energy_6; + + // convert atoms back from lamda to box coords + + if (triclinic) domain->lamda2x(atom->nlocal); +} + + +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t> +void PPPMDispIntel::particle_map(double delx, double dely, double delz, + double sft, int** p2g, int nup, int nlow, + int nxlo, int nylo, int nzlo, + int nxhi, int nyhi, int nzhi, + IntelBuffers<flt_t,acc_t> *buffers) +{ + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2])) + error->one(FLERR,"Non-numeric box dimensions - simulation unstable"); + + int flag = 0; + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr, delx, dely, delz, sft, p2g, nup, nlow, nxlo,\ + nylo, nzlo, nxhi, nyhi, nzhi) reduction(+:flag) if(!_use_lrt) + #endif + { + double **x = atom->x; + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delx; + const flt_t yi = dely; + const flt_t zi = delz; + const flt_t fshift = sft; + + + int iifrom, iito, tid; + IP_PRE_omp_range_id_align(iifrom, iito, tid, nlocal, nthr, sizeof(ATOM_T)); + + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd reduction(+:flag) + #endif + for (int i = iifrom; i < iito; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + int nx = static_cast<int> ((x[i][0]-lo0)*xi+fshift) - OFFSET; + int ny = static_cast<int> ((x[i][1]-lo1)*yi+fshift) - OFFSET; + int nz = static_cast<int> ((x[i][2]-lo2)*zi+fshift) - OFFSET; + + p2g[i][0] = nx; + p2g[i][1] = ny; + p2g[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlow < nxlo || nx+nup > nxhi || + ny+nlow < nylo || ny+nup > nyhi || + nz+nlow < nzlo || nz+nup > nzhi) + flag = 1; + } + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPMDisp"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::make_rho_c(IntelBuffers<flt_t,acc_t> *buffers) +{ + // clear 3d density array + + FFT_SCALAR * _noalias global_density = + &(density_brick[nzlo_out][nylo_out][nxlo_out]); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + //double *q = atom->q; + //double **x = atom->x; + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nthr, nlocal, global_density) if(!_use_lrt) + #endif + { + double *q = atom->q; + double **x = atom->x; + + const int nix = nxhi_out - nxlo_out + 1; + const int niy = nyhi_out - nylo_out + 1; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv; + const flt_t yi = delyinv; + const flt_t zi = delzinv; + const flt_t fshift = shift; + const flt_t fshiftone = shiftone; + const flt_t fdelvolinv = delvolinv; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + FFT_SCALAR * _noalias my_density = tid == 0 ? global_density : + perthread_density[tid - 1]; + // clear 3d density array + memset(my_density, 0, ngrid * sizeof(FFT_SCALAR)); + + for (int i = ifrom; i < ito; i++) { + + int nx = part2grid[i][0]; + int ny = part2grid[i][1]; + int nz = part2grid[i][2]; + + int nysum = nlower + ny - nylo_out; + int nxsum = nlower + nx - nxlo_out; + int nzsum = (nlower + nz - nzlo_out)*nix*niy + nysum*nix + nxsum; + + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + _alignvar(flt_t rho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho[0][k] = rho_lookup[idx][k]; + rho[1][k] = rho_lookup[idy][k]; + rho[2][k] = rho_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower; k <= nupper; k++) { + FFT_SCALAR r1,r2,r3; + r1 = r2 = r3 = ZEROF; + + for (int l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + rho[0][k-nlower] = r1; + rho[1][k-nlower] = r2; + rho[2][k-nlower] = r3; + } + } + + FFT_SCALAR z0 = fdelvolinv * q[i]; + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order; n++) { + int mz = n*nix*niy + nzsum; + FFT_SCALAR y0 = z0*rho[2][n]; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order; m++) { + int mzy = m*nix + mz; + FFT_SCALAR x0 = y0*rho[1][m]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mzyx = l + mzy; + my_density[mzyx] += x0*rho[0][l]; + } + } + } + } + } + + // reduce all the perthread_densities into global_density + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nthr, global_density) if(!_use_lrt) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, ngrid, nthr); + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int i = ifrom; i < ito; i++) { + for(int j = 1; j < nthr; j++) { + global_density[i] += perthread_density[j-1][i]; + } + } + } +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- geometric mixing +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::make_rho_g(IntelBuffers<flt_t,acc_t> *buffers) +{ + // clear 3d density array + + FFT_SCALAR * _noalias global_density = + &(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nthr, nlocal, global_density) if(!_use_lrt) + #endif + { + int type; + double **x = atom->x; + + const int nix = nxhi_out_6 - nxlo_out_6 + 1; + const int niy = nyhi_out_6 - nylo_out_6 + 1; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv_6; + const flt_t yi = delyinv_6; + const flt_t zi = delzinv_6; + const flt_t fshift = shift_6; + const flt_t fshiftone = shiftone_6; + const flt_t fdelvolinv = delvolinv_6; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + FFT_SCALAR * _noalias my_density = tid == 0 ? global_density : + perthread_density[tid - 1]; + + // clear 3d density array + memset(my_density, 0, ngrid_6 * sizeof(FFT_SCALAR)); + + for (int i = ifrom; i < ito; i++) { + + int nx = part2grid_6[i][0]; + int ny = part2grid_6[i][1]; + int nz = part2grid_6[i][2]; + + int nysum = nlower_6 + ny - nylo_out_6; + int nxsum = nlower_6 + nx - nxlo_out_6; + int nzsum = (nlower_6 + nz - nzlo_out_6)*nix*niy + nysum*nix + nxsum; + + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + _alignvar(flt_t rho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho[0][k] = rho6_lookup[idx][k]; + rho[1][k] = rho6_lookup[idy][k]; + rho[2][k] = rho6_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower_6; k <= nupper_6; k++) { + FFT_SCALAR r1,r2,r3; + r1 = r2 = r3 = ZEROF; + + for (int l = order_6-1; l >= 0; l--) { + r1 = rho_coeff_6[l][k] + r1*dx; + r2 = rho_coeff_6[l][k] + r2*dy; + r3 = rho_coeff_6[l][k] + r3*dz; + } + rho[0][k-nlower_6] = r1; + rho[1][k-nlower_6] = r2; + rho[2][k-nlower_6] = r3; + } + } + + type = atom->type[i]; + FFT_SCALAR z0 = fdelvolinv * B[type]; + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order_6; n++) { + int mz = n*nix*niy + nzsum; + FFT_SCALAR y0 = z0*rho[2][n]; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order_6; m++) { + int mzy = m*nix + mz; + FFT_SCALAR x0 = y0*rho[1][m]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mzyx = l + mzy; + my_density[mzyx] += x0*rho[0][l]; + } + } + } + } + } + + // reduce all the perthread_densities into global_density + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nthr, global_density) if(!_use_lrt) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, ngrid_6, nthr); + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int i = ifrom; i < ito; i++) { + for(int j = 1; j < nthr; j++) { + global_density[i] += perthread_density[j-1][i]; + } + } + } + +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- arithmetic mixing +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::make_rho_a(IntelBuffers<flt_t,acc_t> *buffers) +{ + // clear 3d density array + + memset(&(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + memset(&(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]),0, + ngrid_6*sizeof(FFT_SCALAR)); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + int nlocal = atom->nlocal; + + double **x = atom->x; + + const int nix = nxhi_out_6 - nxlo_out_6 + 1; + const int niy = nyhi_out_6 - nylo_out_6 + 1; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv_6; + const flt_t yi = delyinv_6; + const flt_t zi = delzinv_6; + const flt_t fshift = shift_6; + const flt_t fshiftone = shiftone_6; + const flt_t fdelvolinv = delvolinv_6; + + for (int i = 0; i < nlocal; i++) { + + int nx = part2grid_6[i][0]; + int ny = part2grid_6[i][1]; + int nz = part2grid_6[i][2]; + + int nxsum = nx + nlower_6; + int nysum = ny + nlower_6; + int nzsum = nz + nlower_6; + + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + _alignvar(flt_t rho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho[0][k] = rho6_lookup[idx][k]; + rho[1][k] = rho6_lookup[idy][k]; + rho[2][k] = rho6_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower_6; k <= nupper_6; k++) { + FFT_SCALAR r1,r2,r3; + r1 = r2 = r3 = ZEROF; + + for (int l = order_6-1; l >= 0; l--) { + r1 = rho_coeff_6[l][k] + r1*dx; + r2 = rho_coeff_6[l][k] + r2*dy; + r3 = rho_coeff_6[l][k] + r3*dz; + } + rho[0][k-nlower_6] = r1; + rho[1][k-nlower_6] = r2; + rho[2][k-nlower_6] = r3; + } + } + + const int type = atom->type[i]; + FFT_SCALAR z0 = fdelvolinv; + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order_6; n++) { + int mz = n + nzsum; + FFT_SCALAR y0 = z0*rho[2][n]; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order_6; m++) { + int my = m + nysum; + FFT_SCALAR x0 = y0*rho[1][m]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l + nxsum; + FFT_SCALAR w = x0*rho[0][l]; + density_brick_a0[mz][my][mx] += w*B[7*type]; + density_brick_a1[mz][my][mx] += w*B[7*type+1]; + density_brick_a2[mz][my][mx] += w*B[7*type+2]; + density_brick_a3[mz][my][mx] += w*B[7*type+3]; + density_brick_a4[mz][my][mx] += w*B[7*type+4]; + density_brick_a5[mz][my][mx] += w*B[7*type+5]; + density_brick_a6[mz][my][mx] += w*B[7*type+6]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = dispersion "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid --- case when mixing rules don't apply +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::make_rho_none(IntelBuffers<flt_t,acc_t> *buffers) +{ + + FFT_SCALAR * _noalias global_density = &(density_brick_none[0][nzlo_out_6][nylo_out_6][nxlo_out_6]); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nthr, nlocal, global_density) if(!_use_lrt) + #endif + { + int type; + double **x = atom->x; + + const int nix = nxhi_out_6 - nxlo_out_6 + 1; + const int niy = nyhi_out_6 - nylo_out_6 + 1; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv_6; + const flt_t yi = delyinv_6; + const flt_t zi = delzinv_6; + const flt_t fshift = shift_6; + const flt_t fshiftone = shiftone_6; + const flt_t fdelvolinv = delvolinv_6; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + FFT_SCALAR * _noalias my_density = tid == 0 ? global_density : + perthread_density[tid - 1]; + // clear 3d density array + memset(my_density, 0, ngrid_6 * sizeof(FFT_SCALAR)); + + for (int i = ifrom; i < ito; i++) { + + int nx = part2grid_6[i][0]; + int ny = part2grid_6[i][1]; + int nz = part2grid_6[i][2]; + + int nysum = nlower_6 + ny - nylo_out_6; + int nxsum = nlower_6 + nx - nxlo_out_6; + int nzsum = (nlower_6 + nz - nzlo_out_6)*nix*niy + nysum*nix + nxsum; + + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + _alignvar(flt_t rho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho[0][k] = rho6_lookup[idx][k]; + rho[1][k] = rho6_lookup[idy][k]; + rho[2][k] = rho6_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower_6; k <= nupper_6; k++) { + FFT_SCALAR r1,r2,r3; + r1 = r2 = r3 = ZEROF; + + for (int l = order_6-1; l >= 0; l--) { + r1 = rho_coeff_6[l][k] + r1*dx; + r2 = rho_coeff_6[l][k] + r2*dy; + r3 = rho_coeff_6[l][k] + r3*dz; + } + rho[0][k-nlower_6] = r1; + rho[1][k-nlower_6] = r2; + rho[2][k-nlower_6] = r3; + } + } + + type = atom->type[i]; + FFT_SCALAR z0 = fdelvolinv; + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order_6; n++) { + int mz = n*nix*niy + nzsum; + FFT_SCALAR y0 = z0*rho[2][n]; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order_6; m++) { + int mzy = m*nix + mz; + FFT_SCALAR x0 = y0*rho[1][m]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mzyx = l + mzy; + FFT_SCALAR w0 = x0*rho[0][l]; + for(int k = 0; k < nsplit; k++) + my_density[mzyx + k*ngrid_6] += x0*rho[0][l]; + } + } + } + } + } + + // reduce all the perthread_densities into global_density + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nthr, global_density) if(!_use_lrt) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, ngrid_6*nsplit, nthr); + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int i = ifrom; i < ito; i++) { + for(int j = 1; j < nthr; j++) { + global_density[i] += perthread_density[j-1][i]; + } + } + } + +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ik scheme +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::fieldforce_c_ik(IntelBuffers<flt_t,acc_t> *buffers) +{ + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + //double *q = atom->q; + //double **x = atom->x; + //double **f = atom->f; + + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) if(!_use_lrt) + #endif + { + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv; + const flt_t yi = delyinv; + const flt_t zi = delzinv; + const flt_t fshiftone = shiftone; + const flt_t fqqrd2es = qqrd2e * scale; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + + _alignvar(flt_t rho0[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(flt_t rho1[INTEL_P3M_ALIGNED_MAXORDER] , 64)= {0}; + _alignvar(flt_t rho2[INTEL_P3M_ALIGNED_MAXORDER] , 64)= {0}; + + for (int i = ifrom; i < ito; i++) { + int nx = part2grid[i][0]; + int ny = part2grid[i][1]; + int nz = part2grid[i][2]; + + int nxsum = nx + nlower; + int nysum = ny + nlower; + int nzsum = nz + nlower;; + + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho0[k] = rho_lookup[idx][k]; + rho1[k] = rho_lookup[idy][k]; + rho2[k] = rho_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower; k <= nupper; k++) { + FFT_SCALAR r1 = rho_coeff[order-1][k]; + FFT_SCALAR r2 = rho_coeff[order-1][k]; + FFT_SCALAR r3 = rho_coeff[order-1][k]; + for (int l = order-2; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + + rho0[k-nlower] = r1; + rho1[k-nlower] = r2; + rho2[k-nlower] = r3; + } + } + + _alignvar(FFT_SCALAR ekx_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order; n++) { + int mz = n+nzsum; + FFT_SCALAR z0 = rho2[n]; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order; m++) { + int my = m+nysum; + FFT_SCALAR y0 = z0*rho1[m]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l+nxsum; + FFT_SCALAR x0 = y0*rho0[l]; + ekx_arr[l] -= x0*vdx_brick[mz][my][mx]; + eky_arr[l] -= x0*vdy_brick[mz][my][mx]; + ekz_arr[l] -= x0*vdz_brick[mz][my][mx]; + + } + } + } + + FFT_SCALAR ekx, eky, ekz; + ekx = eky = ekz = ZEROF; + + + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + ekx += ekx_arr[l]; + eky += eky_arr[l]; + ekz += ekz_arr[l]; + } + + // convert E-field to force + + const flt_t qfactor = fqqrd2es * q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ad scheme +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::fieldforce_c_ad(IntelBuffers<flt_t,acc_t> *buffers) +{ + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + //double *q = atom->q; + //double **x = atom->x; + //double **f = atom->f; + + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + FFT_SCALAR * _noalias const particle_ekx = this->particle_ekx; + FFT_SCALAR * _noalias const particle_eky = this->particle_eky; + FFT_SCALAR * _noalias const particle_ekz = this->particle_ekz; + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) if(!_use_lrt) + #endif + { + + double *prd; + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + const flt_t ftwo_pi = MY_PI * 2.0; + const flt_t ffour_pi = MY_PI * 4.0; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv; + const flt_t yi = delyinv; + const flt_t zi = delzinv; + const flt_t fshiftone = shiftone; + const flt_t fqqrd2es = qqrd2e * scale; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]*slab_volfactor; + + const flt_t hx_inv = nx_pppm/xprd; + const flt_t hy_inv = ny_pppm/yprd; + const flt_t hz_inv = nz_pppm/zprd; + + const flt_t fsf_coeff0 = sf_coeff[0]; + const flt_t fsf_coeff1 = sf_coeff[1]; + const flt_t fsf_coeff2 = sf_coeff[2]; + const flt_t fsf_coeff3 = sf_coeff[3]; + const flt_t fsf_coeff4 = sf_coeff[4]; + const flt_t fsf_coeff5 = sf_coeff[5]; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + + _alignvar(flt_t rho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(flt_t drho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + for (int i = ifrom; i < ito; i++) { + int nx = part2grid[i][0]; + int ny = part2grid[i][1]; + int nz = part2grid[i][2]; + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + int nxsum = nx + nlower; + int nysum = ny + nlower; + int nzsum = nz + nlower; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho[0][k] = rho_lookup[idx][k]; + rho[1][k] = rho_lookup[idy][k]; + rho[2][k] = rho_lookup[idz][k]; + drho[0][k] = drho_lookup[idx][k]; + drho[1][k] = drho_lookup[idy][k]; + drho[2][k] = drho_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower; k <= nupper; k++) { + FFT_SCALAR r1,r2,r3,dr1,dr2,dr3; + dr1 = dr2 = dr3 = ZEROF; + + r1 = rho_coeff[order-1][k]; + r2 = rho_coeff[order-1][k]; + r3 = rho_coeff[order-1][k]; + for (int l = order-2; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1 * dx; + r2 = rho_coeff[l][k] + r2 * dy; + r3 = rho_coeff[l][k] + r3 * dz; + dr1 = drho_coeff[l][k] + dr1 * dx; + dr2 = drho_coeff[l][k] + dr2 * dy; + dr3 = drho_coeff[l][k] + dr3 * dz; + } + rho[0][k-nlower] = r1; + rho[1][k-nlower] = r2; + rho[2][k-nlower] = r3; + drho[0][k-nlower] = dr1; + drho[1][k-nlower] = dr2; + drho[2][k-nlower] = dr3; + } + } + _alignvar(FFT_SCALAR ekx[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + particle_ekx[i] = particle_eky[i] = particle_ekz[i] = ZEROF; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order; n++) { + int mz = n + nzsum; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order; m++) { + int my = m + nysum; + FFT_SCALAR ekx_p = rho[1][m] * rho[2][n]; + FFT_SCALAR eky_p = drho[1][m] * rho[2][n]; + FFT_SCALAR ekz_p = rho[1][m] * drho[2][n]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l + nxsum; + ekx[l] += drho[0][l] * ekx_p * u_brick[mz][my][mx]; + eky[l] += rho[0][l] * eky_p * u_brick[mz][my][mx]; + ekz[l] += rho[0][l] * ekz_p * u_brick[mz][my][mx]; + } + } + } + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++){ + particle_ekx[i] += ekx[l]; + particle_eky[i] += eky[l]; + particle_ekz[i] += ekz[l]; + } + } + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int i = ifrom; i < ito; i++) { + particle_ekx[i] *= hx_inv; + particle_eky[i] *= hy_inv; + particle_ekz[i] *= hz_inv; + + // convert E-field to force + + const flt_t qfactor = fqqrd2es * q[i]; + const flt_t twoqsq = (flt_t)2.0 * q[i] * q[i]; + + const flt_t s1 = x[i][0] * hx_inv; + const flt_t s2 = x[i][1] * hy_inv; + const flt_t s3 = x[i][2] * hz_inv; + flt_t sf = fsf_coeff0 * sin(ftwo_pi * s1); + sf += fsf_coeff1 * sin(ffour_pi * s1); + sf *= twoqsq; + f[i][0] += qfactor * particle_ekx[i] - fqqrd2es * sf; + + sf = fsf_coeff2 * sin(ftwo_pi * s2); + sf += fsf_coeff3 * sin(ffour_pi * s2); + sf *= twoqsq; + f[i][1] += qfactor * particle_eky[i] - fqqrd2es * sf; + + sf = fsf_coeff4 * sin(ftwo_pi * s3); + sf += fsf_coeff5 * sin(ffour_pi * s3); + sf *= twoqsq; + + if (slabflag != 2) f[i][2] += qfactor * particle_ekz[i] - fqqrd2es * sf; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::fieldforce_g_ik(IntelBuffers<flt_t,acc_t> *buffers) +{ + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) if(!_use_lrt) + #endif + { + + double lj; + int type; + double **x = atom->x; + double **f = atom->f; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv_6; + const flt_t yi = delyinv_6; + const flt_t zi = delzinv_6; + const flt_t fshiftone = shiftone_6; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + + _alignvar(flt_t rho0[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(flt_t rho1[INTEL_P3M_ALIGNED_MAXORDER] , 64)= {0}; + _alignvar(flt_t rho2[INTEL_P3M_ALIGNED_MAXORDER] , 64)= {0}; + + for (int i = ifrom; i < ito; i++) { + int nx = part2grid_6[i][0]; + int ny = part2grid_6[i][1]; + int nz = part2grid_6[i][2]; + + int nxsum = nx + nlower_6; + int nysum = ny + nlower_6; + int nzsum = nz + nlower_6; + + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho0[k] = rho6_lookup[idx][k]; + rho1[k] = rho6_lookup[idy][k]; + rho2[k] = rho6_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower_6; k <= nupper_6; k++) { + FFT_SCALAR r1 = rho_coeff_6[order_6-1][k]; + FFT_SCALAR r2 = rho_coeff_6[order_6-1][k]; + FFT_SCALAR r3 = rho_coeff_6[order_6-1][k]; + for (int l = order_6-2; l >= 0; l--) { + r1 = rho_coeff_6[l][k] + r1*dx; + r2 = rho_coeff_6[l][k] + r2*dy; + r3 = rho_coeff_6[l][k] + r3*dz; + } + + rho0[k-nlower_6] = r1; + rho1[k-nlower_6] = r2; + rho2[k-nlower_6] = r3; + } + } + + _alignvar(FFT_SCALAR ekx_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order_6; n++) { + int mz = n+nzsum; + FFT_SCALAR z0 = rho2[n]; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order_6; m++) { + int my = m+nysum; + FFT_SCALAR y0 = z0*rho1[m]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l+nxsum; + FFT_SCALAR x0 = y0*rho0[l]; + ekx_arr[l] -= x0*vdx_brick_g[mz][my][mx]; + eky_arr[l] -= x0*vdy_brick_g[mz][my][mx]; + ekz_arr[l] -= x0*vdz_brick_g[mz][my][mx]; + + } + } + } + + FFT_SCALAR ekx, eky, ekz; + ekx = eky = ekz = ZEROF; + + + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + ekx += ekx_arr[l]; + eky += eky_arr[l]; + ekz += ekz_arr[l]; + } + + // convert E-field to force + + type = atom->type[i]; + lj = B[type]; + f[i][0] += lj*ekx; + f[i][1] += lj*eky; + if (slabflag != 2) f[i][2] += lj*ekz; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for geometric mixing rule for ad scheme +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::fieldforce_g_ad(IntelBuffers<flt_t,acc_t> *buffers) +{ + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + FFT_SCALAR * _noalias const particle_ekx = this->particle_ekx; + FFT_SCALAR * _noalias const particle_eky = this->particle_eky; + FFT_SCALAR * _noalias const particle_ekz = this->particle_ekz; + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) if(!_use_lrt) + #endif + { + + double *prd; + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double **x = atom->x; + double **f = atom->f; + const flt_t ftwo_pi = MY_PI * 2.0; + const flt_t ffour_pi = MY_PI * 4.0; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv_6; + const flt_t yi = delyinv_6; + const flt_t zi = delzinv_6; + const flt_t fshiftone = shiftone_6; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]*slab_volfactor; + + const flt_t hx_inv = nx_pppm_6/xprd; + const flt_t hy_inv = ny_pppm_6/yprd; + const flt_t hz_inv = nz_pppm_6/zprd; + + const flt_t fsf_coeff0 = sf_coeff_6[0]; + const flt_t fsf_coeff1 = sf_coeff_6[1]; + const flt_t fsf_coeff2 = sf_coeff_6[2]; + const flt_t fsf_coeff3 = sf_coeff_6[3]; + const flt_t fsf_coeff4 = sf_coeff_6[4]; + const flt_t fsf_coeff5 = sf_coeff_6[5]; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + + _alignvar(flt_t rho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(flt_t drho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + for (int i = ifrom; i < ito; i++) { + int nx = part2grid_6[i][0]; + int ny = part2grid_6[i][1]; + int nz = part2grid_6[i][2]; + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + int nxsum = nx + nlower_6; + int nysum = ny + nlower_6; + int nzsum = nz + nlower_6; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho[0][k] = rho6_lookup[idx][k]; + rho[1][k] = rho6_lookup[idy][k]; + rho[2][k] = rho6_lookup[idz][k]; + drho[0][k] = drho6_lookup[idx][k]; + drho[1][k] = drho6_lookup[idy][k]; + drho[2][k] = drho6_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower_6; k <= nupper_6; k++) { + FFT_SCALAR r1,r2,r3,dr1,dr2,dr3; + dr1 = dr2 = dr3 = ZEROF; + + r1 = rho_coeff_6[order_6-1][k]; + r2 = rho_coeff_6[order_6-1][k]; + r3 = rho_coeff_6[order_6-1][k]; + for (int l = order_6-2; l >= 0; l--) { + r1 = rho_coeff_6[l][k] + r1 * dx; + r2 = rho_coeff_6[l][k] + r2 * dy; + r3 = rho_coeff_6[l][k] + r3 * dz; + dr1 = drho_coeff_6[l][k] + dr1 * dx; + dr2 = drho_coeff_6[l][k] + dr2 * dy; + dr3 = drho_coeff_6[l][k] + dr3 * dz; + } + rho[0][k-nlower_6] = r1; + rho[1][k-nlower_6] = r2; + rho[2][k-nlower_6] = r3; + drho[0][k-nlower_6] = dr1; + drho[1][k-nlower_6] = dr2; + drho[2][k-nlower_6] = dr3; + } + } + _alignvar(FFT_SCALAR ekx[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + particle_ekx[i] = particle_eky[i] = particle_ekz[i] = ZEROF; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order_6; n++) { + int mz = n + nzsum; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order_6; m++) { + int my = m + nysum; + FFT_SCALAR ekx_p = rho[1][m] * rho[2][n]; + FFT_SCALAR eky_p = drho[1][m] * rho[2][n]; + FFT_SCALAR ekz_p = rho[1][m] * drho[2][n]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l + nxsum; + ekx[l] += drho[0][l] * ekx_p * u_brick_g[mz][my][mx]; + eky[l] += rho[0][l] * eky_p * u_brick_g[mz][my][mx]; + ekz[l] += rho[0][l] * ekz_p * u_brick_g[mz][my][mx]; + } + } + } + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++){ + particle_ekx[i] += ekx[l]; + particle_eky[i] += eky[l]; + particle_ekz[i] += ekz[l]; + } + } + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int i = ifrom; i < ito; i++) { + particle_ekx[i] *= hx_inv; + particle_eky[i] *= hy_inv; + particle_ekz[i] *= hz_inv; + + // convert E-field to force + + const int type = atom->type[i]; + const flt_t lj = B[type]; + const flt_t twoljsq = 2.*lj*lj; + + const flt_t s1 = x[i][0] * hx_inv; + const flt_t s2 = x[i][1] * hy_inv; + const flt_t s3 = x[i][2] * hz_inv; + flt_t sf = fsf_coeff0 * sin(ftwo_pi * s1); + sf += fsf_coeff1 * sin(ffour_pi * s1); + sf *= twoljsq; + f[i][0] += lj * particle_ekx[i] - sf; + + sf = fsf_coeff2 * sin(ftwo_pi * s2); + sf += fsf_coeff3 * sin(ffour_pi * s2); + sf *= twoljsq; + f[i][1] += lj * particle_eky[i] - sf; + + sf = fsf_coeff4 * sin(ftwo_pi * s3); + sf += fsf_coeff5 * sin(ffour_pi * s3); + sf *= twoljsq; + + if (slabflag != 2) f[i][2] += lj * particle_ekz[i] - sf; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule and ik scheme +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::fieldforce_a_ik(IntelBuffers<flt_t,acc_t> *buffers) +{ + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) if(!_use_lrt) + #endif + { + double **x = atom->x; + double **f = atom->f; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv_6; + const flt_t yi = delyinv_6; + const flt_t zi = delzinv_6; + const flt_t fshiftone = shiftone_6; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + + _alignvar(flt_t rho0[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(flt_t rho1[INTEL_P3M_ALIGNED_MAXORDER] , 64)= {0}; + _alignvar(flt_t rho2[INTEL_P3M_ALIGNED_MAXORDER] , 64)= {0}; + + for (int i = ifrom; i < ito; i++) { + int nx = part2grid_6[i][0]; + int ny = part2grid_6[i][1]; + int nz = part2grid_6[i][2]; + + int nxsum = nx + nlower_6; + int nysum = ny + nlower_6; + int nzsum = nz + nlower_6; + + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho0[k] = rho6_lookup[idx][k]; + rho1[k] = rho6_lookup[idy][k]; + rho2[k] = rho6_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower_6; k <= nupper_6; k++) { + FFT_SCALAR r1 = rho_coeff_6[order_6-1][k]; + FFT_SCALAR r2 = rho_coeff_6[order_6-1][k]; + FFT_SCALAR r3 = rho_coeff_6[order_6-1][k]; + for (int l = order_6-2; l >= 0; l--) { + r1 = rho_coeff_6[l][k] + r1*dx; + r2 = rho_coeff_6[l][k] + r2*dy; + r3 = rho_coeff_6[l][k] + r3*dz; + } + + rho0[k-nlower_6] = r1; + rho1[k-nlower_6] = r2; + rho2[k-nlower_6] = r3; + } + } + + _alignvar(FFT_SCALAR ekx0_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky0_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz0_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx1_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky1_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz1_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx2_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky2_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz2_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx3_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky3_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz3_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx4_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky4_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz4_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx5_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky5_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz5_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx6_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky6_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz6_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order_6; n++) { + int mz = n+nzsum; + FFT_SCALAR z0 = rho2[n]; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order_6; m++) { + int my = m+nysum; + FFT_SCALAR y0 = z0*rho1[m]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l+nxsum; + FFT_SCALAR x0 = y0*rho0[l]; + ekx0_arr[l] -= x0*vdx_brick_a0[mz][my][mx]; + eky0_arr[l] -= x0*vdy_brick_a0[mz][my][mx]; + ekz0_arr[l] -= x0*vdz_brick_a0[mz][my][mx]; + ekx1_arr[l] -= x0*vdx_brick_a1[mz][my][mx]; + eky1_arr[l] -= x0*vdy_brick_a1[mz][my][mx]; + ekz1_arr[l] -= x0*vdz_brick_a1[mz][my][mx]; + ekx2_arr[l] -= x0*vdx_brick_a2[mz][my][mx]; + eky2_arr[l] -= x0*vdy_brick_a2[mz][my][mx]; + ekz2_arr[l] -= x0*vdz_brick_a2[mz][my][mx]; + ekx3_arr[l] -= x0*vdx_brick_a3[mz][my][mx]; + eky3_arr[l] -= x0*vdy_brick_a3[mz][my][mx]; + ekz3_arr[l] -= x0*vdz_brick_a3[mz][my][mx]; + ekx4_arr[l] -= x0*vdx_brick_a4[mz][my][mx]; + eky4_arr[l] -= x0*vdy_brick_a4[mz][my][mx]; + ekz4_arr[l] -= x0*vdz_brick_a4[mz][my][mx]; + ekx5_arr[l] -= x0*vdx_brick_a5[mz][my][mx]; + eky5_arr[l] -= x0*vdy_brick_a5[mz][my][mx]; + ekz5_arr[l] -= x0*vdz_brick_a5[mz][my][mx]; + ekx6_arr[l] -= x0*vdx_brick_a6[mz][my][mx]; + eky6_arr[l] -= x0*vdy_brick_a6[mz][my][mx]; + ekz6_arr[l] -= x0*vdz_brick_a6[mz][my][mx]; + } + } + } + + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + ekx0 += ekx0_arr[l]; + eky0 += eky0_arr[l]; + ekz0 += ekz0_arr[l]; + ekx1 += ekx1_arr[l]; + eky1 += eky1_arr[l]; + ekz1 += ekz1_arr[l]; + ekx2 += ekx2_arr[l]; + eky2 += eky2_arr[l]; + ekz2 += ekz2_arr[l]; + ekx3 += ekx3_arr[l]; + eky3 += eky3_arr[l]; + ekz3 += ekz3_arr[l]; + ekx4 += ekx4_arr[l]; + eky4 += eky4_arr[l]; + ekz4 += ekz4_arr[l]; + ekx5 += ekx5_arr[l]; + eky5 += eky5_arr[l]; + ekz5 += ekz5_arr[l]; + ekx6 += ekx6_arr[l]; + eky6 += eky6_arr[l]; + ekz6 += ekz6_arr[l]; + } + + // convert D-field to force + + const int type = atom->type[i]; + const FFT_SCALAR lj0 = B[7*type+6]; + const FFT_SCALAR lj1 = B[7*type+5]; + const FFT_SCALAR lj2 = B[7*type+4]; + const FFT_SCALAR lj3 = B[7*type+3]; + const FFT_SCALAR lj4 = B[7*type+2]; + const FFT_SCALAR lj5 = B[7*type+1]; + const FFT_SCALAR lj6 = B[7*type]; + + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + + lj4*eky4 + lj5*eky5 + lj6*eky6; + if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for arithmetic mixing rule for the ad scheme +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::fieldforce_a_ad(IntelBuffers<flt_t,acc_t> *buffers) +{ + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + FFT_SCALAR * _noalias const particle_ekx0 = this->particle_ekx0; + FFT_SCALAR * _noalias const particle_eky0 = this->particle_eky0; + FFT_SCALAR * _noalias const particle_ekz0 = this->particle_ekz0; + FFT_SCALAR * _noalias const particle_ekx1 = this->particle_ekx1; + FFT_SCALAR * _noalias const particle_eky1 = this->particle_eky1; + FFT_SCALAR * _noalias const particle_ekz1 = this->particle_ekz1; + FFT_SCALAR * _noalias const particle_ekx2 = this->particle_ekx2; + FFT_SCALAR * _noalias const particle_eky2 = this->particle_eky2; + FFT_SCALAR * _noalias const particle_ekz2 = this->particle_ekz2; + FFT_SCALAR * _noalias const particle_ekx3 = this->particle_ekx3; + FFT_SCALAR * _noalias const particle_eky3 = this->particle_eky3; + FFT_SCALAR * _noalias const particle_ekz3 = this->particle_ekz3; + FFT_SCALAR * _noalias const particle_ekx4 = this->particle_ekx4; + FFT_SCALAR * _noalias const particle_eky4 = this->particle_eky4; + FFT_SCALAR * _noalias const particle_ekz4 = this->particle_ekz4; + FFT_SCALAR * _noalias const particle_ekx5 = this->particle_ekx5; + FFT_SCALAR * _noalias const particle_eky5 = this->particle_eky5; + FFT_SCALAR * _noalias const particle_ekz5 = this->particle_ekz5; + FFT_SCALAR * _noalias const particle_ekx6 = this->particle_ekx6; + FFT_SCALAR * _noalias const particle_eky6 = this->particle_eky6; + FFT_SCALAR * _noalias const particle_ekz6 = this->particle_ekz6; + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) if(!_use_lrt) + #endif + { + + double *prd; + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double **x = atom->x; + double **f = atom->f; + const flt_t ftwo_pi = MY_PI * 2.0; + const flt_t ffour_pi = MY_PI * 4.0; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv_6; + const flt_t yi = delyinv_6; + const flt_t zi = delzinv_6; + const flt_t fshiftone = shiftone_6; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]*slab_volfactor; + + const flt_t hx_inv = nx_pppm_6/xprd; + const flt_t hy_inv = ny_pppm_6/yprd; + const flt_t hz_inv = nz_pppm_6/zprd; + + const flt_t fsf_coeff0 = sf_coeff_6[0]; + const flt_t fsf_coeff1 = sf_coeff_6[1]; + const flt_t fsf_coeff2 = sf_coeff_6[2]; + const flt_t fsf_coeff3 = sf_coeff_6[3]; + const flt_t fsf_coeff4 = sf_coeff_6[4]; + const flt_t fsf_coeff5 = sf_coeff_6[5]; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + + _alignvar(flt_t rho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(flt_t drho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + for (int i = ifrom; i < ito; i++) { + int nx = part2grid_6[i][0]; + int ny = part2grid_6[i][1]; + int nz = part2grid_6[i][2]; + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + int nxsum = nx + nlower_6; + int nysum = ny + nlower_6; + int nzsum = nz + nlower_6; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho[0][k] = rho6_lookup[idx][k]; + rho[1][k] = rho6_lookup[idy][k]; + rho[2][k] = rho6_lookup[idz][k]; + drho[0][k] = drho6_lookup[idx][k]; + drho[1][k] = drho6_lookup[idy][k]; + drho[2][k] = drho6_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower_6; k <= nupper_6; k++) { + FFT_SCALAR r1,r2,r3,dr1,dr2,dr3; + dr1 = dr2 = dr3 = ZEROF; + + r1 = rho_coeff_6[order_6-1][k]; + r2 = rho_coeff_6[order_6-1][k]; + r3 = rho_coeff_6[order_6-1][k]; + for (int l = order_6-2; l >= 0; l--) { + r1 = rho_coeff_6[l][k] + r1 * dx; + r2 = rho_coeff_6[l][k] + r2 * dy; + r3 = rho_coeff_6[l][k] + r3 * dz; + dr1 = drho_coeff_6[l][k] + dr1 * dx; + dr2 = drho_coeff_6[l][k] + dr2 * dy; + dr3 = drho_coeff_6[l][k] + dr3 * dz; + } + rho[0][k-nlower_6] = r1; + rho[1][k-nlower_6] = r2; + rho[2][k-nlower_6] = r3; + drho[0][k-nlower_6] = dr1; + drho[1][k-nlower_6] = dr2; + drho[2][k-nlower_6] = dr3; + } + } + _alignvar(FFT_SCALAR ekx0[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky0[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz0[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx1[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky1[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz1[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx2[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky2[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz2[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx3[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky3[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz3[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx4[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky4[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz4[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx5[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky5[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz5[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekx6[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky6[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz6[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + particle_ekx0[i] = particle_eky0[i] = particle_ekz0[i] = ZEROF; + particle_ekx1[i] = particle_eky1[i] = particle_ekz1[i] = ZEROF; + particle_ekx2[i] = particle_eky2[i] = particle_ekz2[i] = ZEROF; + particle_ekx3[i] = particle_eky3[i] = particle_ekz3[i] = ZEROF; + particle_ekx4[i] = particle_eky4[i] = particle_ekz4[i] = ZEROF; + particle_ekx5[i] = particle_eky5[i] = particle_ekz5[i] = ZEROF; + particle_ekx6[i] = particle_eky6[i] = particle_ekz6[i] = ZEROF; + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order_6; n++) { + int mz = n + nzsum; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order_6; m++) { + int my = m + nysum; + FFT_SCALAR ekx_p = rho[1][m] * rho[2][n]; + FFT_SCALAR eky_p = drho[1][m] * rho[2][n]; + FFT_SCALAR ekz_p = rho[1][m] * drho[2][n]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l + nxsum; + FFT_SCALAR x0 = drho[0][l] * ekx_p; + FFT_SCALAR y0 = rho[0][l] * eky_p; + FFT_SCALAR z0 = rho[0][l] * ekz_p; + + ekx0[l] += x0 * u_brick_a0[mz][my][mx]; + eky0[l] += y0 * u_brick_a0[mz][my][mx]; + ekz0[l] += z0 * u_brick_a0[mz][my][mx]; + ekx1[l] += x0 * u_brick_a1[mz][my][mx]; + eky1[l] += y0 * u_brick_a1[mz][my][mx]; + ekz1[l] += z0 * u_brick_a1[mz][my][mx]; + ekx2[l] += x0 * u_brick_a2[mz][my][mx]; + eky2[l] += y0 * u_brick_a2[mz][my][mx]; + ekz2[l] += z0 * u_brick_a2[mz][my][mx]; + ekx3[l] += x0 * u_brick_a3[mz][my][mx]; + eky3[l] += y0 * u_brick_a3[mz][my][mx]; + ekz3[l] += z0 * u_brick_a3[mz][my][mx]; + ekx4[l] += x0 * u_brick_a4[mz][my][mx]; + eky4[l] += y0 * u_brick_a4[mz][my][mx]; + ekz4[l] += z0 * u_brick_a4[mz][my][mx]; + ekx5[l] += x0 * u_brick_a5[mz][my][mx]; + eky5[l] += y0 * u_brick_a5[mz][my][mx]; + ekz5[l] += z0 * u_brick_a5[mz][my][mx]; + ekx6[l] += x0 * u_brick_a6[mz][my][mx]; + eky6[l] += y0 * u_brick_a6[mz][my][mx]; + ekz6[l] += z0 * u_brick_a6[mz][my][mx]; + } + } + } + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++){ + particle_ekx0[i] += ekx0[l]; + particle_eky0[i] += eky0[l]; + particle_ekz0[i] += ekz0[l]; + particle_ekx1[i] += ekx1[l]; + particle_eky1[i] += eky1[l]; + particle_ekz1[i] += ekz1[l]; + particle_ekx2[i] += ekx2[l]; + particle_eky2[i] += eky2[l]; + particle_ekz2[i] += ekz2[l]; + particle_ekx3[i] += ekx3[l]; + particle_eky3[i] += eky3[l]; + particle_ekz3[i] += ekz3[l]; + particle_ekx4[i] += ekx4[l]; + particle_eky4[i] += eky4[l]; + particle_ekz4[i] += ekz4[l]; + particle_ekx5[i] += ekx5[l]; + particle_eky5[i] += eky5[l]; + particle_ekz5[i] += ekz5[l]; + particle_ekx6[i] += ekx6[l]; + particle_eky6[i] += eky6[l]; + particle_ekz6[i] += ekz6[l]; + } + } + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int i = ifrom; i < ito; i++) { + particle_ekx0[i] *= hx_inv; + particle_eky0[i] *= hy_inv; + particle_ekz0[i] *= hz_inv; + particle_ekx1[i] *= hx_inv; + particle_eky1[i] *= hy_inv; + particle_ekz1[i] *= hz_inv; + particle_ekx2[i] *= hx_inv; + particle_eky2[i] *= hy_inv; + particle_ekz2[i] *= hz_inv; + particle_ekx3[i] *= hx_inv; + particle_eky3[i] *= hy_inv; + particle_ekz3[i] *= hz_inv; + particle_ekx4[i] *= hx_inv; + particle_eky4[i] *= hy_inv; + particle_ekz4[i] *= hz_inv; + particle_ekx5[i] *= hx_inv; + particle_eky5[i] *= hy_inv; + particle_ekz5[i] *= hz_inv; + particle_ekx6[i] *= hx_inv; + particle_eky6[i] *= hy_inv; + particle_ekz6[i] *= hz_inv; + + // convert D-field to force + + const int type = atom->type[i]; + const FFT_SCALAR lj0 = B[7*type+6]; + const FFT_SCALAR lj1 = B[7*type+5]; + const FFT_SCALAR lj2 = B[7*type+4]; + const FFT_SCALAR lj3 = B[7*type+3]; + const FFT_SCALAR lj4 = B[7*type+2]; + const FFT_SCALAR lj5 = B[7*type+1]; + const FFT_SCALAR lj6 = B[7*type]; + + const flt_t s1 = x[i][0] * hx_inv; + const flt_t s2 = x[i][1] * hy_inv; + const flt_t s3 = x[i][2] * hz_inv; + flt_t sf = fsf_coeff0 * sin(ftwo_pi * s1); + sf += fsf_coeff1 * sin(ffour_pi * s1); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][0] += lj0*particle_ekx0[i] + lj1*particle_ekx1[i] + + lj2*particle_ekx2[i] + lj3*particle_ekx3[i] + lj4*particle_ekx4[i] + + lj5*particle_ekx5[i] + lj6*particle_ekx6[i] - sf; + + sf = fsf_coeff2 * sin(ftwo_pi * s2); + sf += fsf_coeff3 * sin(ffour_pi * s2); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][1] += lj0*particle_eky0[i] + lj1*particle_eky1[i] + + lj2*particle_eky2[i] + lj3*particle_eky3[i] + lj4*particle_eky4[i] + + lj5*particle_eky5[i] + lj6*particle_eky6[i] - sf; + + sf = fsf_coeff4 * sin(ftwo_pi * s3); + sf += fsf_coeff5 * sin(ffour_pi * s3); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + if (slabflag != 2) + f[i][2] += lj0*particle_ekz0[i] + lj1*particle_ekz1[i] + + lj2*particle_ekz2[i] + lj3*particle_ekz3[i] + lj4*particle_ekz4[i] + + lj5*particle_ekz5[i] + lj6*particle_ekz6[i] - sf; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for no mixing rule and ik scheme +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::fieldforce_none_ik(IntelBuffers<flt_t,acc_t> *buffers) +{ + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) if(!_use_lrt) + #endif + { + + double lj; + int type; + double **x = atom->x; + double **f = atom->f; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv_6; + const flt_t yi = delyinv_6; + const flt_t zi = delzinv_6; + const flt_t fshiftone = shiftone_6; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + + _alignvar(flt_t rho0[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(flt_t rho1[INTEL_P3M_ALIGNED_MAXORDER] , 64)= {0}; + _alignvar(flt_t rho2[INTEL_P3M_ALIGNED_MAXORDER] , 64)= {0}; + + for (int i = ifrom; i < ito; i++) { + int nx = part2grid_6[i][0]; + int ny = part2grid_6[i][1]; + int nz = part2grid_6[i][2]; + + int nxsum = nx + nlower_6; + int nysum = ny + nlower_6; + int nzsum = nz + nlower_6; + + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho0[k] = rho6_lookup[idx][k]; + rho1[k] = rho6_lookup[idy][k]; + rho2[k] = rho6_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower_6; k <= nupper_6; k++) { + FFT_SCALAR r1 = rho_coeff_6[order_6-1][k]; + FFT_SCALAR r2 = rho_coeff_6[order_6-1][k]; + FFT_SCALAR r3 = rho_coeff_6[order_6-1][k]; + for (int l = order_6-2; l >= 0; l--) { + r1 = rho_coeff_6[l][k] + r1*dx; + r2 = rho_coeff_6[l][k] + r2*dy; + r3 = rho_coeff_6[l][k] + r3*dz; + } + + rho0[k-nlower_6] = r1; + rho1[k-nlower_6] = r2; + rho2[k-nlower_6] = r3; + } + } + + + _alignvar(FFT_SCALAR ekx_arr[nsplit*INTEL_P3M_ALIGNED_MAXORDER],64); + _alignvar(FFT_SCALAR eky_arr[nsplit*INTEL_P3M_ALIGNED_MAXORDER],64); + _alignvar(FFT_SCALAR ekz_arr[nsplit*INTEL_P3M_ALIGNED_MAXORDER],64); + + for (int k = 0; k < nsplit*INTEL_P3M_ALIGNED_MAXORDER; k++) { + ekx_arr[k] = eky_arr[k] = ekz_arr[k] = ZEROF; + } + + for (int k = 0; k < nsplit; k++) { + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order_6; n++) { + int mz = n+nzsum; + FFT_SCALAR z0 = rho2[n]; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order_6; m++) { + int my = m+nysum; + FFT_SCALAR y0 = z0*rho1[m]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l+nxsum; + FFT_SCALAR x0 = y0*rho0[l]; + ekx_arr[k*INTEL_P3M_ALIGNED_MAXORDER + l] -= + x0*vdx_brick_none[k][mz][my][mx]; + eky_arr[k*INTEL_P3M_ALIGNED_MAXORDER + l] -= + x0*vdy_brick_none[k][mz][my][mx]; + ekz_arr[k*INTEL_P3M_ALIGNED_MAXORDER + l] -= + x0*vdz_brick_none[k][mz][my][mx]; + } + } + } + } + + _alignvar(FFT_SCALAR ekx[nsplit], 64); + _alignvar(FFT_SCALAR eky[nsplit], 64); + _alignvar(FFT_SCALAR ekz[nsplit], 64); + for (int k = 0; k < nsplit; k++) { + ekx[k] = eky[k] = ekz[k] = ZEROF; + } + + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + for (int k = 0; k < nsplit; k++) { + ekx[k] += ekx_arr[k*INTEL_P3M_ALIGNED_MAXORDER + l]; + eky[k] += eky_arr[k*INTEL_P3M_ALIGNED_MAXORDER + l]; + ekz[k] += ekz_arr[k*INTEL_P3M_ALIGNED_MAXORDER + l]; + } + } + + // convert E-field to force + + type = atom->type[i]; + for (int k = 0; k < nsplit; k++) { + lj = B[nsplit*type + k]; + f[i][0] += lj*ekx[k]; + f[i][1] += lj*eky[k]; + if (slabflag != 2) f[i][2] += lj*ekz[k]; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for no mixing rule for the ad scheme +------------------------------------------------------------------------- */ + +template<class flt_t, class acc_t, int use_table> +void PPPMDispIntel::fieldforce_none_ad(IntelBuffers<flt_t,acc_t> *buffers) +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of dispersion field on particle + + int nlocal = atom->nlocal; + int nthr = comm->nthreads; + + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) if(!_use_lrt) + #endif + { + + double *prd; + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double **x = atom->x; + double **f = atom->f; + const flt_t ftwo_pi = MY_PI * 2.0; + const flt_t ffour_pi = MY_PI * 4.0; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv_6; + const flt_t yi = delyinv_6; + const flt_t zi = delzinv_6; + const flt_t fshiftone = shiftone_6; + + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]*slab_volfactor; + + const flt_t hx_inv = nx_pppm_6/xprd; + const flt_t hy_inv = ny_pppm_6/yprd; + const flt_t hz_inv = nz_pppm_6/zprd; + + const flt_t fsf_coeff0 = sf_coeff_6[0]; + const flt_t fsf_coeff1 = sf_coeff_6[1]; + const flt_t fsf_coeff2 = sf_coeff_6[2]; + const flt_t fsf_coeff3 = sf_coeff_6[3]; + const flt_t fsf_coeff4 = sf_coeff_6[4]; + const flt_t fsf_coeff5 = sf_coeff_6[5]; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + + _alignvar(flt_t rho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(flt_t drho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + for (int i = ifrom; i < ito; i++) { + int nx = part2grid_6[i][0]; + int ny = part2grid_6[i][1]; + int nz = part2grid_6[i][2]; + FFT_SCALAR dx = nx+fshiftone - (x[i][0]-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i][1]-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i][2]-lo2)*zi; + + int nxsum = nx + nlower_6; + int nysum = ny + nlower_6; + int nzsum = nz + nlower_6; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho[0][k] = rho6_lookup[idx][k]; + rho[1][k] = rho6_lookup[idy][k]; + rho[2][k] = rho6_lookup[idz][k]; + drho[0][k] = drho6_lookup[idx][k]; + drho[1][k] = drho6_lookup[idy][k]; + drho[2][k] = drho6_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower_6; k <= nupper_6; k++) { + FFT_SCALAR r1,r2,r3,dr1,dr2,dr3; + dr1 = dr2 = dr3 = ZEROF; + + r1 = rho_coeff_6[order_6-1][k]; + r2 = rho_coeff_6[order_6-1][k]; + r3 = rho_coeff_6[order_6-1][k]; + for (int l = order_6-2; l >= 0; l--) { + r1 = rho_coeff_6[l][k] + r1 * dx; + r2 = rho_coeff_6[l][k] + r2 * dy; + r3 = rho_coeff_6[l][k] + r3 * dz; + dr1 = drho_coeff_6[l][k] + dr1 * dx; + dr2 = drho_coeff_6[l][k] + dr2 * dy; + dr3 = drho_coeff_6[l][k] + dr3 * dz; + } + rho[0][k-nlower_6] = r1; + rho[1][k-nlower_6] = r2; + rho[2][k-nlower_6] = r3; + drho[0][k-nlower_6] = dr1; + drho[1][k-nlower_6] = dr2; + drho[2][k-nlower_6] = dr3; + } + } + _alignvar(FFT_SCALAR ekx[nsplit*INTEL_P3M_ALIGNED_MAXORDER], 64); + _alignvar(FFT_SCALAR eky[nsplit*INTEL_P3M_ALIGNED_MAXORDER], 64); + _alignvar(FFT_SCALAR ekz[nsplit*INTEL_P3M_ALIGNED_MAXORDER], 64); + + for (int k = 0; k < nsplit*INTEL_P3M_ALIGNED_MAXORDER; k++) { + ekx[k]=eky[k]=ekz[k]=ZEROF; + } + + for (int k = 0; k < nsplit; k++) { + particle_ekx[i] = particle_eky[i] = particle_ekz[i] = ZEROF; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order_6; n++) { + int mz = n + nzsum; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order_6; m++) { + int my = m + nysum; + FFT_SCALAR ekx_p = rho[1][m] * rho[2][n]; + FFT_SCALAR eky_p = drho[1][m] * rho[2][n]; + FFT_SCALAR ekz_p = rho[1][m] * drho[2][n]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l + nxsum; + ekx[k*INTEL_P3M_ALIGNED_MAXORDER+l] += drho[0][l] * ekx_p * + u_brick_none[k][mz][my][mx]; + eky[k*INTEL_P3M_ALIGNED_MAXORDER+l] += rho[0][l] * eky_p * + u_brick_none[k][mz][my][mx]; + ekz[k*INTEL_P3M_ALIGNED_MAXORDER+l] += rho[0][l] * ekz_p * + u_brick_none[k][mz][my][mx]; + } + } + } + } + + _alignvar(FFT_SCALAR ekx_tot[nsplit], 64); + _alignvar(FFT_SCALAR eky_tot[nsplit], 64); + _alignvar(FFT_SCALAR ekz_tot[nsplit], 64); + for (int k = 0; k < nsplit; k++) { + ekx_tot[k] = eky_tot[k] = ekz_tot[k] = ZEROF; + } + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++){ + for (int k = 0; k < nsplit; k++) { + ekx_tot[k] += ekx[k*INTEL_P3M_ALIGNED_MAXORDER+l]; + eky_tot[k] += eky[k*INTEL_P3M_ALIGNED_MAXORDER+l]; + ekz_tot[k] += ekz[k*INTEL_P3M_ALIGNED_MAXORDER+l]; + } + } + + for (int k = 0; k < nsplit; k++) { + ekx_tot[k] *= hx_inv; + eky_tot[k] *= hy_inv; + ekz_tot[k] *= hz_inv; + } + // convert D-field to force + + const int type = atom->type[i]; + + const flt_t s1 = x[i][0] * hx_inv; + const flt_t s2 = x[i][1] * hy_inv; + const flt_t s3 = x[i][2] * hz_inv; + flt_t sf1 = fsf_coeff0 * sin(ftwo_pi * s1); + sf1 += fsf_coeff1 * sin(ffour_pi * s1); + + flt_t sf2 = fsf_coeff2 * sin(ftwo_pi * s2); + sf2 += fsf_coeff3 * sin(ffour_pi * s2); + + flt_t sf3 = fsf_coeff4 * sin(ftwo_pi * s3); + sf3 += fsf_coeff5 * sin(ffour_pi * s3); + for (int k = 0; k < nsplit; k++) { + const flt_t lj = B[nsplit*type + k]; + const flt_t twoljsq = lj*lj * B[k] * 2; + flt_t sf = sf1*twoljsq; + f[i][0] += lj * ekx_tot[k] - sf; + sf = sf2*twoljsq; + f[i][1] += lj * eky_tot[k] - sf; + sf = sf3*twoljsq; + if (slabflag != 2) f[i][2] += lj * ekz_tot[k] - sf; + } + } + } +} + +/* ---------------------------------------------------------------------- + precompute rho coefficients as a lookup table to save time in make_rho + and fieldforce. Instead of doing this polynomial for every atom 6 times + per time step, precompute it for some number of points. +------------------------------------------------------------------------- */ + +void PPPMDispIntel::precompute_rho() +{ + + half_rho_scale = (rho_points - 1.)/2.; + half_rho_scale_plus = half_rho_scale + 0.5; + + for (int i = 0; i < rho_points; i++) { + FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k=nlower; k<=nupper;k++){ + FFT_SCALAR r1 = ZEROF; + for(int l=order-1; l>=0; l--){ + r1 = rho_coeff[l][k] + r1*dx; + } + rho_lookup[i][k-nlower] = r1; + } + for (int k = nupper-nlower+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho_lookup[i][k] = 0; + } + if (differentiation_flag == 1) { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k=nlower; k<=nupper;k++){ + FFT_SCALAR r1 = ZEROF; + for(int l=order-2; l>=0; l--){ + r1 = drho_coeff[l][k] + r1*dx; + } + drho_lookup[i][k-nlower] = r1; + } + for (int k = nupper-nlower+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + drho_lookup[i][k] = 0; + } + } + } + for (int i = 0; i < rho_points; i++) { + FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k=nlower_6; k<=nupper_6;k++){ + FFT_SCALAR r1 = ZEROF; + for(int l=order_6-1; l>=0; l--){ + r1 = rho_coeff_6[l][k] + r1*dx; + } + rho6_lookup[i][k-nlower_6] = r1; + } + for (int k = nupper_6-nlower_6+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho6_lookup[i][k] = 0; + } + if (differentiation_flag == 1) { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k=nlower_6; k<=nupper_6;k++){ + FFT_SCALAR r1 = ZEROF; + for(int l=order_6-2; l>=0; l--){ + r1 = drho_coeff_6[l][k] + r1*dx; + } + drho6_lookup[i][k-nlower_6] = r1; + } + for (int k = nupper_6-nlower_6+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + drho6_lookup[i][k] = 0; + } + } + } +} + +/* ---------------------------------------------------------------------- + Returns 0 if Intel optimizations for PPPM ignored due to offload +------------------------------------------------------------------------- */ + +#ifdef _LMP_INTEL_OFFLOAD +int PPPMDispIntel::use_base() { + return _use_base; +} +#endif diff --git a/src/USER-INTEL/pppm_disp_intel.h b/src/USER-INTEL/pppm_disp_intel.h new file mode 100644 index 0000000000000000000000000000000000000000..65c43dd486ba3219c29a17fe74f82802c1784fa1 --- /dev/null +++ b/src/USER-INTEL/pppm_disp_intel.h @@ -0,0 +1,238 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: William McDoniel (RWTH Aachen University) +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/disp/intel,PPPMDispIntel) + +#else + +#ifndef LMP_PPPMINTEL_DISP_H +#define LMP_PPPMINTEL_DISP_H + +#include "pppm_disp.h" +#include "fix_intel.h" + +namespace LAMMPS_NS { + + class PPPMDispIntel : public PPPMDisp { + public: + PPPMDispIntel(class LAMMPS *, int, char **); + virtual ~PPPMDispIntel(); + virtual void init(); + virtual void compute(int, int); + + #ifdef _LMP_INTEL_OFFLOAD + int use_base(); + #endif + + protected: + FixIntel *fix; + + int _use_lrt; + FFT_SCALAR **perthread_density; + FFT_SCALAR *particle_ekx; + FFT_SCALAR *particle_eky; + FFT_SCALAR *particle_ekz; + FFT_SCALAR *particle_ekx0; + FFT_SCALAR *particle_eky0; + FFT_SCALAR *particle_ekz0; + FFT_SCALAR *particle_ekx1; + FFT_SCALAR *particle_eky1; + FFT_SCALAR *particle_ekz1; + FFT_SCALAR *particle_ekx2; + FFT_SCALAR *particle_eky2; + FFT_SCALAR *particle_ekz2; + FFT_SCALAR *particle_ekx3; + FFT_SCALAR *particle_eky3; + FFT_SCALAR *particle_ekz3; + FFT_SCALAR *particle_ekx4; + FFT_SCALAR *particle_eky4; + FFT_SCALAR *particle_ekz4; + FFT_SCALAR *particle_ekx5; + FFT_SCALAR *particle_eky5; + FFT_SCALAR *particle_ekz5; + FFT_SCALAR *particle_ekx6; + FFT_SCALAR *particle_eky6; + FFT_SCALAR *particle_ekz6; + + + + int _use_table; + int rho_points; + FFT_SCALAR **rho_lookup; + FFT_SCALAR **rho6_lookup; + FFT_SCALAR **drho_lookup; + FFT_SCALAR **drho6_lookup; + FFT_SCALAR half_rho_scale, half_rho_scale_plus; + + int _use_packing; + + + #ifdef _LMP_INTEL_OFFLOAD + int _use_base; + #endif + + template<class flt_t, class acc_t> + void particle_map(double, double, double, + double, int **, int, int, + int, int, int, + int, int, int, + IntelBuffers<flt_t,acc_t> *buffers); + + template<class flt_t, class acc_t, int use_table> + void make_rho_c(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void make_rho_c(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + make_rho_c<flt_t,acc_t,1>(buffers); + } else { + make_rho_c<flt_t,acc_t,0>(buffers); + } + } + + template<class flt_t, class acc_t, int use_table> + void make_rho_g(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void make_rho_g(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + make_rho_g<flt_t,acc_t,1>(buffers); + } else { + make_rho_g<flt_t,acc_t,0>(buffers); + } + } + + template<class flt_t, class acc_t, int use_table> + void make_rho_a(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void make_rho_a(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + make_rho_a<flt_t,acc_t,1>(buffers); + } else { + make_rho_a<flt_t,acc_t,0>(buffers); + } + } + + + template<class flt_t, class acc_t, int use_table> + void make_rho_none(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void make_rho_none(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + make_rho_none<flt_t,acc_t,1>(buffers); + } else { + make_rho_none<flt_t,acc_t,0>(buffers); + } + } + + + template<class flt_t, class acc_t, int use_table> + void fieldforce_c_ik(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void fieldforce_c_ik(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + fieldforce_c_ik<flt_t,acc_t,1>(buffers); + } else { + fieldforce_c_ik<flt_t,acc_t,0>(buffers); + } + } + + template<class flt_t, class acc_t, int use_table> + void fieldforce_c_ad(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void fieldforce_c_ad(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + fieldforce_c_ad<flt_t,acc_t,1>(buffers); + } else { + fieldforce_c_ad<flt_t,acc_t,0>(buffers); + } + } + + template<class flt_t, class acc_t, int use_table> + void fieldforce_g_ik(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void fieldforce_g_ik(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + fieldforce_g_ik<flt_t,acc_t,1>(buffers); + } else { + fieldforce_g_ik<flt_t,acc_t,0>(buffers); + } + } + + template<class flt_t, class acc_t, int use_table> + void fieldforce_g_ad(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void fieldforce_g_ad(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + fieldforce_g_ad<flt_t,acc_t,1>(buffers); + } else { + fieldforce_g_ad<flt_t,acc_t,0>(buffers); + } + } + + template<class flt_t, class acc_t, int use_table> + void fieldforce_a_ik(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void fieldforce_a_ik(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + fieldforce_a_ik<flt_t,acc_t,1>(buffers); + } else { + fieldforce_a_ik<flt_t,acc_t,0>(buffers); + } + } + + template<class flt_t, class acc_t, int use_table> + void fieldforce_a_ad(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void fieldforce_a_ad(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + fieldforce_a_ad<flt_t,acc_t,1>(buffers); + } else { + fieldforce_a_ad<flt_t,acc_t,0>(buffers); + } + } + template<class flt_t, class acc_t, int use_table> + void fieldforce_none_ik(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void fieldforce_none_ik(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + fieldforce_none_ik<flt_t,acc_t,1>(buffers); + } else { + fieldforce_none_ik<flt_t,acc_t,0>(buffers); + } + } + + template<class flt_t, class acc_t, int use_table> + void fieldforce_none_ad(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void fieldforce_none_ad(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + fieldforce_none_ad<flt_t,acc_t,1>(buffers); + } else { + fieldforce_none_ad<flt_t,acc_t,0>(buffers); + } + } + + void precompute_rho(); + + }; + +} +#endif +#endif + + diff --git a/src/USER-INTEL/pppm_intel.cpp b/src/USER-INTEL/pppm_intel.cpp index c420a23bf4234dd0e89dc4fd99df5209dcd0624a..8416b6f3a35cb74159af8131e92c5b0b04c09355 100644 --- a/src/USER-INTEL/pppm_intel.cpp +++ b/src/USER-INTEL/pppm_intel.cpp @@ -12,7 +12,9 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: Rodrigo Canales (RWTH Aachen University) + Contributing authors: William McDoniel (RWTH Aachen University) + Rodrigo Canales (RWTH Aachen University) + Markus Hoehnerbach (RWTH Aachen University) W. Michael Brown (Intel) ------------------------------------------------------------------------- */ @@ -22,6 +24,7 @@ #include "pppm_intel.h" #include "atom.h" #include "error.h" +#include "fft3d_wrap.h" #include "gridcomm.h" #include "math_const.h" #include "math_special.h" @@ -54,10 +57,37 @@ enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM}; PPPMIntel::PPPMIntel(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg) { suffix_flag |= Suffix::INTEL; + + order = 7; //sets default stencil size to 7 + + perthread_density = NULL; + particle_ekx = particle_eky = particle_ekz = NULL; + + rho_lookup = drho_lookup = NULL; + rho_points = 0; + + vdxy_brick = vdz0_brick = NULL; + work3 = NULL; + cg_pack = NULL; + + _use_table = _use_packing = _use_lrt = 0; } PPPMIntel::~PPPMIntel() { + memory->destroy(perthread_density); + memory->destroy(particle_ekx); + memory->destroy(particle_eky); + memory->destroy(particle_ekz); + + memory->destroy(rho_lookup); + memory->destroy(drho_lookup); + + memory->destroy3d_offset(vdxy_brick, nzlo_out, nylo_out, 2*nxlo_out); + memory->destroy3d_offset(vdz0_brick, nzlo_out, nylo_out, 2*nxlo_out); + memory->destroy(work3); + + delete cg_pack; } /* ---------------------------------------------------------------------- @@ -83,17 +113,64 @@ void PPPMIntel::init() fix->kspace_init_check(); + _use_lrt = fix->lrt(); + + // For vectorization, we need some padding in the end + // The first thread computes on the global density + if ((comm->nthreads > 1) && !_use_lrt) { + memory->destroy(perthread_density); + memory->create(perthread_density, comm->nthreads-1, + ngrid + INTEL_P3M_ALIGNED_MAXORDER, + "pppmintel:perthread_density"); + } + + _use_table = fix->pppm_table(); + if (_use_table) { + rho_points = 5000; + memory->destroy(rho_lookup); + memory->create(rho_lookup, rho_points, INTEL_P3M_ALIGNED_MAXORDER, + "pppmintel:rho_lookup"); + if(differentiation_flag == 1) { + memory->destroy(drho_lookup); + memory->create(drho_lookup, rho_points, INTEL_P3M_ALIGNED_MAXORDER, + "pppmintel:drho_lookup"); + } + precompute_rho(); + } + if (order > INTEL_P3M_MAXORDER) error->all(FLERR,"PPPM order greater than supported by USER-INTEL\n"); - /* - if (fix->precision() == FixIntel::PREC_MODE_MIXED) - pack_force_const(force_const_single, fix->get_mixed_buffers()); - else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) - pack_force_const(force_const_double, fix->get_double_buffers()); - else - pack_force_const(force_const_single, fix->get_single_buffers()); - */ + _use_packing = (order == 7) && (INTEL_VECTOR_WIDTH == 16) + && (sizeof(FFT_SCALAR) == sizeof(float)) + && (differentiation_flag == 0); + if (_use_packing) { + memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out); + memory->destroy3d_offset(vdxy_brick, nzlo_out, nylo_out, 2*nxlo_out); + memory->create3d_offset(vdxy_brick, nzlo_out, nzhi_out+2, + nylo_out, nyhi_out, 2*nxlo_out, 2*nxhi_out+1, + "pppmintel:vdxy_brick"); + memory->destroy3d_offset(vdz0_brick, nzlo_out, nylo_out, 2*nxlo_out); + memory->create3d_offset(vdz0_brick, nzlo_out, nzhi_out+2, + nylo_out, nyhi_out, 2*nxlo_out, 2*nxhi_out+1, + "pppmintel:vdz0_brick"); + memory->destroy(work3); + memory->create(work3, 2*nfft_both, "pppmintel:work3"); + + // new communicator for the double-size bricks + delete cg_pack; + int (*procneigh)[2] = comm->procneigh; + cg_pack = new GridComm(lmp,world,2,0, 2*nxlo_in,2*nxhi_in+1,nylo_in, + nyhi_in,nzlo_in,nzhi_in, 2*nxlo_out,2*nxhi_out+1, + nylo_out,nyhi_out,nzlo_out,nzhi_out, + procneigh[0][0],procneigh[0][1],procneigh[1][0], + procneigh[1][1],procneigh[2][0],procneigh[2][1]); + + cg_pack->ghost_notify(); + cg_pack->setup(); + } } /* ---------------------------------------------------------------------- @@ -154,8 +231,18 @@ void PPPMIntel::compute_first(int eflag, int vflag) if (atom->nmax > nmax) { memory->destroy(part2grid); + if (differentiation_flag == 1) { + memory->destroy(particle_ekx); + memory->destroy(particle_eky); + memory->destroy(particle_ekz); + } nmax = atom->nmax; memory->create(part2grid,nmax,3,"pppm:part2grid"); + if (differentiation_flag == 1) { + memory->create(particle_ekx, nmax, "pppmintel:pekx"); + memory->create(particle_eky, nmax, "pppmintel:peky"); + memory->create(particle_ekz, nmax, "pppmintel:pekz"); + } } // find grid points for all my particles @@ -184,13 +271,19 @@ void PPPMIntel::compute_first(int eflag, int vflag) // return gradients (electric fields) in 3d brick decomposition // also performs per-atom calculations via poisson_peratom() - poisson(); + if (differentiation_flag == 1) poisson_ad(); + else poisson_ik_intel(); // all procs communicate E-field values // to fill ghost cells surrounding their 3d bricks if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD); - else cg->forward_comm(this,FORWARD_IK); + else { + if (_use_packing) + cg_pack->forward_comm(this,FORWARD_IK); + else + cg->forward_comm(this,FORWARD_IK); + } // extra per-atom energy/virial communication @@ -297,48 +390,60 @@ void PPPMIntel::compute_second(int eflag, int vflag) template<class flt_t, class acc_t> void PPPMIntel::particle_map(IntelBuffers<flt_t,acc_t> *buffers) { - int nx,ny,nz; - ATOM_T * _noalias const x = buffers->get_x(0); int nlocal = atom->nlocal; + int nthr; + if (_use_lrt) + nthr = 1; + else + nthr = comm->nthreads; int flag = 0; if (!ISFINITE(boxlo[0]) || !ISFINITE(boxlo[1]) || !ISFINITE(boxlo[2])) error->one(FLERR,"Non-numeric box dimensions - simulation unstable"); - const flt_t lo0 = boxlo[0]; - const flt_t lo1 = boxlo[1]; - const flt_t lo2 = boxlo[2]; - const flt_t xi = delxinv; - const flt_t yi = delyinv; - const flt_t zi = delzinv; - const flt_t fshift = shift; - - #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned - #pragma simd + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) reduction(+:flag) if(!_use_lrt) #endif - for (int i = 0; i < nlocal; i++) { - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast<int> ((x[i].x-lo0)*xi+fshift) - OFFSET; - ny = static_cast<int> ((x[i].y-lo1)*yi+fshift) - OFFSET; - nz = static_cast<int> ((x[i].z-lo2)*zi+fshift) - OFFSET; - - part2grid[i][0] = nx; - part2grid[i][1] = ny; - part2grid[i][2] = nz; - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || - ny+nlower < nylo_out || ny+nupper > nyhi_out || - nz+nlower < nzlo_out || nz+nupper > nzhi_out) - flag = 1; + { + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv; + const flt_t yi = delyinv; + const flt_t zi = delzinv; + const flt_t fshift = shift; + + int iifrom, iito, tid; + IP_PRE_omp_range_id_align(iifrom, iito, tid, nlocal, nthr, sizeof(ATOM_T)); + + #if defined(LMP_SIMD_COMPILER) + #pragma vector aligned + #pragma simd reduction(+:flag) + #endif + for (int i = iifrom; i < iito; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + int nx = static_cast<int> ((x[i].x-lo0)*xi+fshift) - OFFSET; + int ny = static_cast<int> ((x[i].y-lo1)*yi+fshift) - OFFSET; + int nz = static_cast<int> ((x[i].z-lo2)*zi+fshift) - OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag = 1; + } } if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); @@ -352,13 +457,11 @@ void PPPMIntel::particle_map(IntelBuffers<flt_t,acc_t> *buffers) in global grid ------------------------------------------------------------------------- */ -template<class flt_t, class acc_t> +template<class flt_t, class acc_t, int use_table> void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers) { - // clear 3d density array - - memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0, - ngrid*sizeof(FFT_SCALAR)); + FFT_SCALAR * _noalias global_density = + &(density_brick[nzlo_out][nylo_out][nxlo_out]); // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge @@ -368,52 +471,129 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers) ATOM_T * _noalias const x = buffers->get_x(0); flt_t * _noalias const q = buffers->get_q(0); int nlocal = atom->nlocal; + int nthr; + if (_use_lrt) + nthr = 1; + else + nthr = comm->nthreads; - const flt_t lo0 = boxlo[0]; - const flt_t lo1 = boxlo[1]; - const flt_t lo2 = boxlo[2]; - const flt_t xi = delxinv; - const flt_t yi = delyinv; - const flt_t zi = delzinv; - const flt_t fshift = shift; - const flt_t fshiftone = shiftone; - const flt_t fdelvolinv = delvolinv; - - for (int i = 0; i < nlocal; i++) { - - int nx = part2grid[i][0]; - int ny = part2grid[i][1]; - int nz = part2grid[i][2]; - FFT_SCALAR dx = nx+fshiftone - (x[i].x-lo0)*xi; - FFT_SCALAR dy = ny+fshiftone - (x[i].y-lo1)*yi; - FFT_SCALAR dz = nz+fshiftone - (x[i].z-lo2)*zi; - - flt_t rho[3][INTEL_P3M_MAXORDER]; + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nthr, nlocal, global_density) if(!_use_lrt) + #endif + { + const int nix = nxhi_out - nxlo_out + 1; + const int niy = nyhi_out - nylo_out + 1; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv; + const flt_t yi = delyinv; + const flt_t zi = delzinv; + const flt_t fshift = shift; + const flt_t fshiftone = shiftone; + const flt_t fdelvolinv = delvolinv; - for (int k = nlower; k <= nupper; k++) { - FFT_SCALAR r1,r2,r3; - r1 = r2 = r3 = ZEROF; + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + FFT_SCALAR * _noalias my_density = tid == 0 ? + global_density : perthread_density[tid - 1]; + // clear 3d density array + memset(my_density, 0, ngrid * sizeof(FFT_SCALAR)); + + for (int i = ifrom; i < ito; i++) { + + int nx = part2grid[i][0]; + int ny = part2grid[i][1]; + int nz = part2grid[i][2]; + + int nysum = nlower + ny - nylo_out; + int nxsum = nlower + nx - nxlo_out; + int nzsum = (nlower + nz - nzlo_out)*nix*niy + nysum*nix + nxsum; + + FFT_SCALAR dx = nx+fshiftone - (x[i].x-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i].y-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i].z-lo2)*zi; + + _alignvar(flt_t rho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho[0][k] = rho_lookup[idx][k]; + rho[1][k] = rho_lookup[idy][k]; + rho[2][k] = rho_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower; k <= nupper; k++) { + FFT_SCALAR r1,r2,r3; + r1 = r2 = r3 = ZEROF; + + for (int l = order-1; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + rho[0][k-nlower] = r1; + rho[1][k-nlower] = r2; + rho[2][k-nlower] = r3; + } + } - for (int l = order-1; l >= 0; l--) { - r1 = rho_coeff[l][k] + r1*dx; - r2 = rho_coeff[l][k] + r2*dy; - r3 = rho_coeff[l][k] + r3*dz; + FFT_SCALAR z0 = fdelvolinv * q[i]; + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order; n++) { + int mz = n*nix*niy + nzsum; + FFT_SCALAR y0 = z0*rho[2][n]; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order; m++) { + int mzy = m*nix + mz; + FFT_SCALAR x0 = y0*rho[1][m]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mzyx = l + mzy; + my_density[mzyx] += x0*rho[0][l]; + } + } } - rho[0][k-nlower] = r1; - rho[1][k-nlower] = r2; - rho[2][k-nlower] = r3; } + } - FFT_SCALAR z0 = fdelvolinv * q[i]; - for (int n = nlower; n <= nupper; n++) { - int mz = n+nz; - FFT_SCALAR y0 = z0*rho[2][n-nlower]; - for (int m = nlower; m <= nupper; m++) { - int my = m+ny; - FFT_SCALAR x0 = y0*rho[1][m-nlower]; - for (int l = nlower; l <= nupper; l++) { - int mx = l+nx; - density_brick[mz][my][mx] += x0*rho[0][l-nlower]; + // reduce all the perthread_densities into global_density + if (nthr > 1) { + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nthr, global_density) if(!_use_lrt) + #endif + { + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, ngrid, nthr); + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int i = ifrom; i < ito; i++) { + for(int j = 1; j < nthr; j++) { + global_density[i] += perthread_density[j-1][i]; } } } @@ -424,7 +604,7 @@ void PPPMIntel::make_rho(IntelBuffers<flt_t,acc_t> *buffers) interpolate from grid to get electric field & force on my particles for ik ------------------------------------------------------------------------- */ -template<class flt_t, class acc_t> +template<class flt_t, class acc_t, int use_table, int use_packing> void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) { // loop over my charges, interpolate electric field from nearby grid points @@ -437,68 +617,151 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) flt_t * _noalias const q = buffers->get_q(0); FORCE_T * _noalias const f = buffers->get_f(); int nlocal = atom->nlocal; + int nthr; + if (_use_lrt) + nthr = 1; + else + nthr = comm->nthreads; - const flt_t lo0 = boxlo[0]; - const flt_t lo1 = boxlo[1]; - const flt_t lo2 = boxlo[2]; - const flt_t xi = delxinv; - const flt_t yi = delyinv; - const flt_t zi = delzinv; - const flt_t fshiftone = shiftone; - const flt_t fqqrd2es = qqrd2e * scale; - - #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned nontemporal - #pragma simd + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) if(!_use_lrt) #endif - for (int i = 0; i < nlocal; i++) { - int nx = part2grid[i][0]; - int ny = part2grid[i][1]; - int nz = part2grid[i][2]; - FFT_SCALAR dx = nx+fshiftone - (x[i].x-lo0)*xi; - FFT_SCALAR dy = ny+fshiftone - (x[i].y-lo1)*yi; - FFT_SCALAR dz = nz+fshiftone - (x[i].z-lo2)*zi; - - flt_t rho[3][INTEL_P3M_MAXORDER]; - - for (int k = nlower; k <= nupper; k++) { - FFT_SCALAR r1 = rho_coeff[order-1][k]; - FFT_SCALAR r2 = rho_coeff[order-1][k]; - FFT_SCALAR r3 = rho_coeff[order-1][k]; - for (int l = order-2; l >= 0; l--) { - r1 = rho_coeff[l][k] + r1*dx; - r2 = rho_coeff[l][k] + r2*dy; - r3 = rho_coeff[l][k] + r3*dz; + { + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv; + const flt_t yi = delyinv; + const flt_t zi = delzinv; + const flt_t fshiftone = shiftone; + const flt_t fqqrd2es = qqrd2e * scale; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + + _alignvar(flt_t rho0[2 * INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(flt_t rho1[INTEL_P3M_ALIGNED_MAXORDER] , 64)= {0}; + _alignvar(flt_t rho2[INTEL_P3M_ALIGNED_MAXORDER] , 64)= {0}; + + for (int i = ifrom; i < ito; i++) { + int nx = part2grid[i][0]; + int ny = part2grid[i][1]; + int nz = part2grid[i][2]; + + int nxsum = (use_packing ? 2 : 1) * (nx + nlower); + int nysum = ny + nlower; + int nzsum = nz + nlower;; + + FFT_SCALAR dx = nx+fshiftone - (x[i].x-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i].y-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i].z-lo2)*zi; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + if (use_packing) { + rho0[2 * k] = rho_lookup[idx][k]; + rho0[2 * k + 1] = rho_lookup[idx][k]; + } else { + rho0[k] = rho_lookup[idx][k]; + } + rho1[k] = rho_lookup[idy][k]; + rho2[k] = rho_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower; k <= nupper; k++) { + FFT_SCALAR r1 = rho_coeff[order-1][k]; + FFT_SCALAR r2 = rho_coeff[order-1][k]; + FFT_SCALAR r3 = rho_coeff[order-1][k]; + for (int l = order-2; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1*dx; + r2 = rho_coeff[l][k] + r2*dy; + r3 = rho_coeff[l][k] + r3*dz; + } + if (use_packing) { + rho0[2 * (k-nlower)] = r1; + rho0[2 * (k-nlower) + 1] = r1; + } else { + rho0[k-nlower] = r1; + } + rho1[k-nlower] = r2; + rho2[k-nlower] = r3; + } } - rho[0][k-nlower] = r1; - rho[1][k-nlower] = r2; - rho[2][k-nlower] = r3; - } - FFT_SCALAR ekx, eky, ekz; - ekx = eky = ekz = ZEROF; - for (int n = nlower; n <= nupper; n++) { - int mz = n+nz; - FFT_SCALAR z0 = rho[2][n-nlower]; - for (int m = nlower; m <= nupper; m++) { - int my = m+ny; - FFT_SCALAR y0 = z0*rho[1][m-nlower]; - for (int l = nlower; l <= nupper; l++) { - int mx = l+nx; - FFT_SCALAR x0 = y0*rho[0][l-nlower]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; + _alignvar(FFT_SCALAR ekx_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz_arr[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekxy_arr[2 * INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz0_arr[2 * INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order; n++) { + int mz = n+nzsum; + FFT_SCALAR z0 = rho2[n]; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order; m++) { + int my = m+nysum; + FFT_SCALAR y0 = z0*rho1[m]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < (use_packing ? 2 : 1) * + INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l+nxsum; + FFT_SCALAR x0 = y0*rho0[l]; + if (use_packing) { + ekxy_arr[l] -= x0*vdxy_brick[mz][my][mx]; + ekz0_arr[l] -= x0*vdz0_brick[mz][my][mx]; + } else { + ekx_arr[l] -= x0*vdx_brick[mz][my][mx]; + eky_arr[l] -= x0*vdy_brick[mz][my][mx]; + ekz_arr[l] -= x0*vdz_brick[mz][my][mx]; + } + } + } + } + + FFT_SCALAR ekx, eky, ekz; + ekx = eky = ekz = ZEROF; + + if (use_packing) { + for (int l = 0; l < 2*INTEL_P3M_ALIGNED_MAXORDER; l += 2) { + ekx += ekxy_arr[l]; + eky += ekxy_arr[l+1]; + ekz += ekz0_arr[l]; + } + } else { + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + ekx += ekx_arr[l]; + eky += eky_arr[l]; + ekz += ekz_arr[l]; } } - } - // convert E-field to force + // convert E-field to force - const flt_t qfactor = fqqrd2es * q[i]; - f[i].x += qfactor*ekx; - f[i].y += qfactor*eky; - if (slabflag != 2) f[i].z += qfactor*ekz; + const flt_t qfactor = fqqrd2es * q[i]; + f[i].x += qfactor*ekx; + f[i].y += qfactor*eky; + if (slabflag != 2) f[i].z += qfactor*ekz; + } } } @@ -506,7 +769,7 @@ void PPPMIntel::fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) interpolate from grid to get electric field & force on my particles for ad ------------------------------------------------------------------------- */ -template<class flt_t, class acc_t> +template<class flt_t, class acc_t, int use_table> void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers) { // loop over my charges, interpolate electric field from nearby grid points @@ -519,118 +782,434 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers) const flt_t * _noalias const q = buffers->get_q(0); FORCE_T * _noalias const f = buffers->get_f(); int nlocal = atom->nlocal; + int nthr; + if (_use_lrt) + nthr = 1; + else + nthr = comm->nthreads; + + FFT_SCALAR * _noalias const particle_ekx = this->particle_ekx; + FFT_SCALAR * _noalias const particle_eky = this->particle_eky; + FFT_SCALAR * _noalias const particle_ekz = this->particle_ekz; - const flt_t ftwo_pi = MY_PI * 2.0; - const flt_t ffour_pi = MY_PI * 4.0; - - const flt_t lo0 = boxlo[0]; - const flt_t lo1 = boxlo[1]; - const flt_t lo2 = boxlo[2]; - const flt_t xi = delxinv; - const flt_t yi = delyinv; - const flt_t zi = delzinv; - const flt_t fshiftone = shiftone; - const flt_t fqqrd2es = qqrd2e * scale; - - const double *prd = domain->prd; - const double xprd = prd[0]; - const double yprd = prd[1]; - const double zprd = prd[2]; - - const flt_t hx_inv = nx_pppm/xprd; - const flt_t hy_inv = ny_pppm/yprd; - const flt_t hz_inv = nz_pppm/zprd; - - const flt_t fsf_coeff0 = sf_coeff[0]; - const flt_t fsf_coeff1 = sf_coeff[1]; - const flt_t fsf_coeff2 = sf_coeff[2]; - const flt_t fsf_coeff3 = sf_coeff[3]; - const flt_t fsf_coeff4 = sf_coeff[4]; - const flt_t fsf_coeff5 = sf_coeff[5]; - - #if defined(LMP_SIMD_COMPILER) - #pragma vector aligned nontemporal - #pragma simd + #if defined(_OPENMP) + #pragma omp parallel default(none) \ + shared(nlocal, nthr) if(!_use_lrt) #endif - for (int i = 0; i < nlocal; i++) { - int nx = part2grid[i][0]; - int ny = part2grid[i][1]; - int nz = part2grid[i][2]; - FFT_SCALAR dx = nx+fshiftone - (x[i].x-lo0)*xi; - FFT_SCALAR dy = ny+fshiftone - (x[i].y-lo1)*yi; - FFT_SCALAR dz = nz+fshiftone - (x[i].z-lo2)*zi; - - flt_t rho[3][INTEL_P3M_MAXORDER]; - flt_t drho[3][INTEL_P3M_MAXORDER]; - - for (int k = nlower; k <= nupper; k++) { - FFT_SCALAR r1,r2,r3,dr1,dr2,dr3; - dr1 = dr2 = dr3 = ZEROF; - - r1 = rho_coeff[order-1][k]; - r2 = rho_coeff[order-1][k]; - r3 = rho_coeff[order-1][k]; - for (int l = order-2; l >= 0; l--) { - r1 = rho_coeff[l][k] + r1 * dx; - r2 = rho_coeff[l][k] + r2 * dy; - r3 = rho_coeff[l][k] + r3 * dz; - dr1 = drho_coeff[l][k] + dr1 * dx; - dr2 = drho_coeff[l][k] + dr2 * dy; - dr3 = drho_coeff[l][k] + dr3 * dz; + { + const flt_t ftwo_pi = MY_PI * 2.0; + const flt_t ffour_pi = MY_PI * 4.0; + + const flt_t lo0 = boxlo[0]; + const flt_t lo1 = boxlo[1]; + const flt_t lo2 = boxlo[2]; + const flt_t xi = delxinv; + const flt_t yi = delyinv; + const flt_t zi = delzinv; + const flt_t fshiftone = shiftone; + const flt_t fqqrd2es = qqrd2e * scale; + + const double *prd = domain->prd; + const double xprd = prd[0]; + const double yprd = prd[1]; + const double zprd = prd[2]; + + const flt_t hx_inv = nx_pppm/xprd; + const flt_t hy_inv = ny_pppm/yprd; + const flt_t hz_inv = nz_pppm/zprd; + + const flt_t fsf_coeff0 = sf_coeff[0]; + const flt_t fsf_coeff1 = sf_coeff[1]; + const flt_t fsf_coeff2 = sf_coeff[2]; + const flt_t fsf_coeff3 = sf_coeff[3]; + const flt_t fsf_coeff4 = sf_coeff[4]; + const flt_t fsf_coeff5 = sf_coeff[5]; + + int ifrom, ito, tid; + IP_PRE_omp_range_id(ifrom, ito, tid, nlocal, nthr); + + _alignvar(flt_t rho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(flt_t drho[3][INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + for (int i = ifrom; i < ito; i++) { + int nx = part2grid[i][0]; + int ny = part2grid[i][1]; + int nz = part2grid[i][2]; + FFT_SCALAR dx = nx+fshiftone - (x[i].x-lo0)*xi; + FFT_SCALAR dy = ny+fshiftone - (x[i].y-lo1)*yi; + FFT_SCALAR dz = nz+fshiftone - (x[i].z-lo2)*zi; + + int nxsum = nx + nlower; + int nysum = ny + nlower; + int nzsum = nz + nlower; + + if (use_table) { + dx = dx*half_rho_scale + half_rho_scale_plus; + int idx = dx; + dy = dy*half_rho_scale + half_rho_scale_plus; + int idy = dy; + dz = dz*half_rho_scale + half_rho_scale_plus; + int idz = dz; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k = 0; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho[0][k] = rho_lookup[idx][k]; + rho[1][k] = rho_lookup[idy][k]; + rho[2][k] = rho_lookup[idz][k]; + drho[0][k] = drho_lookup[idx][k]; + drho[1][k] = drho_lookup[idy][k]; + drho[2][k] = drho_lookup[idz][k]; + } + } else { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k = nlower; k <= nupper; k++) { + FFT_SCALAR r1,r2,r3,dr1,dr2,dr3; + dr1 = dr2 = dr3 = ZEROF; + + r1 = rho_coeff[order-1][k]; + r2 = rho_coeff[order-1][k]; + r3 = rho_coeff[order-1][k]; + for (int l = order-2; l >= 0; l--) { + r1 = rho_coeff[l][k] + r1 * dx; + r2 = rho_coeff[l][k] + r2 * dy; + r3 = rho_coeff[l][k] + r3 * dz; + dr1 = drho_coeff[l][k] + dr1 * dx; + dr2 = drho_coeff[l][k] + dr2 * dy; + dr3 = drho_coeff[l][k] + dr3 * dz; + } + rho[0][k-nlower] = r1; + rho[1][k-nlower] = r2; + rho[2][k-nlower] = r3; + drho[0][k-nlower] = dr1; + drho[1][k-nlower] = dr2; + drho[2][k-nlower] = dr3; + } + } + + _alignvar(FFT_SCALAR ekx[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR eky[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + _alignvar(FFT_SCALAR ekz[INTEL_P3M_ALIGNED_MAXORDER], 64) = {0}; + + particle_ekx[i] = particle_eky[i] = particle_ekz[i] = ZEROF; + + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int n = 0; n < order; n++) { + int mz = n + nzsum; + #if defined(LMP_SIMD_COMPILER) + #pragma loop_count=7 + #endif + for (int m = 0; m < order; m++) { + int my = m + nysum; + FFT_SCALAR ekx_p = rho[1][m] * rho[2][n]; + FFT_SCALAR eky_p = drho[1][m] * rho[2][n]; + FFT_SCALAR ekz_p = rho[1][m] * drho[2][n]; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++) { + int mx = l + nxsum; + ekx[l] += drho[0][l] * ekx_p * u_brick[mz][my][mx]; + eky[l] += rho[0][l] * eky_p * u_brick[mz][my][mx]; + ekz[l] += rho[0][l] * ekz_p * u_brick[mz][my][mx]; + } + } + } + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int l = 0; l < INTEL_P3M_ALIGNED_MAXORDER; l++){ + particle_ekx[i] += ekx[l]; + particle_eky[i] += eky[l]; + particle_ekz[i] += ekz[l]; + } + } + + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int i = ifrom; i < ito; i++) { + particle_ekx[i] *= hx_inv; + particle_eky[i] *= hy_inv; + particle_ekz[i] *= hz_inv; + + // convert E-field to force + + const flt_t qfactor = fqqrd2es * q[i]; + const flt_t twoqsq = (flt_t)2.0 * q[i] * q[i]; + + const flt_t s1 = x[i].x * hx_inv; + const flt_t s2 = x[i].y * hy_inv; + const flt_t s3 = x[i].z * hz_inv; + flt_t sf = fsf_coeff0 * sin(ftwo_pi * s1); + sf += fsf_coeff1 * sin(ffour_pi * s1); + sf *= twoqsq; + f[i].x += qfactor * particle_ekx[i] - fqqrd2es * sf; + + sf = fsf_coeff2 * sin(ftwo_pi * s2); + sf += fsf_coeff3 * sin(ffour_pi * s2); + sf *= twoqsq; + f[i].y += qfactor * particle_eky[i] - fqqrd2es * sf; + + sf = fsf_coeff4 * sin(ftwo_pi * s3); + sf += fsf_coeff5 * sin(ffour_pi * s3); + sf *= twoqsq; + + if (slabflag != 2) f[i].z += qfactor * particle_ekz[i] - fqqrd2es * sf; + } + } +} + +/* ---------------------------------------------------------------------- + FFT-based Poisson solver for ik + Does special things for packing mode to avoid repeated copies +------------------------------------------------------------------------- */ + +void PPPMIntel::poisson_ik_intel() +{ + if (_use_packing == 0) { + poisson_ik(); + return; + } + + int i,j,k,n; + double eng; + + // transform charge density (r -> k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] = density_fft[i]; + work1[n++] = ZEROF; + } + + fft1->compute(work1,work1,1); + + // global energy and virial contribution + + double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); + double s2 = scaleinv*scaleinv; + + if (eflag_global || vflag_global) { + if (vflag_global) { + n = 0; + for (i = 0; i < nfft; i++) { + eng = s2 * greensfn[i] * (work1[n]*work1[n] + + work1[n+1]*work1[n+1]); + for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; + if (eflag_global) energy += eng; + n += 2; + } + } else { + n = 0; + for (i = 0; i < nfft; i++) { + energy += + s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); + n += 2; } - rho[0][k-nlower] = r1; - rho[1][k-nlower] = r2; - rho[2][k-nlower] = r3; - drho[0][k-nlower] = dr1; - drho[1][k-nlower] = dr2; - drho[2][k-nlower] = dr3; } + } + + // scale by 1/total-grid-pts to get rho(k) + // multiply by Green's function to get V(k) + + n = 0; + for (i = 0; i < nfft; i++) { + work1[n++] *= scaleinv * greensfn[i]; + work1[n++] *= scaleinv * greensfn[i]; + } + + // extra FFTs for per-atom energy/virial + + if (evflag_atom) poisson_peratom(); + + // triclinic system + + if (triclinic) { + poisson_ik_triclinic(); + return; + } + + // compute gradients of V(r) in each of 3 dims by transformimg -ik*V(k) + // FFT leaves data in 3d brick decomposition + // copy it into inner portion of vdx,vdy,vdz arrays + + // x direction gradient + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkx[i]*work1[n+1]; + work2[n+1] = -fkx[i]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + // y direction gradient + + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work3[n] = fky[j]*work1[n+1]; + work3[n+1] = -fky[j]*work1[n]; + n += 2; + } + + fft2->compute(work3,work3,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdxy_brick[k][j][2*i] = work2[n]; + vdxy_brick[k][j][2*i+1] = work3[n]; + n += 2; + } + + // z direction gradient - FFT_SCALAR ekx, eky, ekz; - ekx = eky = ekz = ZEROF; - for (int n = nlower; n <= nupper; n++) { - int mz = n+nz; - for (int m = nlower; m <= nupper; m++) { - int my = m+ny; - FFT_SCALAR ekx_p = rho[1][m-nlower] * rho[2][n-nlower]; - FFT_SCALAR eky_p = drho[1][m-nlower] * rho[2][n-nlower]; - FFT_SCALAR ekz_p = rho[1][m-nlower] * drho[2][n-nlower]; - for (int l = nlower; l <= nupper; l++) { - int mx = l+nx; - ekx += drho[0][l-nlower] * ekx_p * u_brick[mz][my][mx]; - eky += rho[0][l-nlower] * eky_p * u_brick[mz][my][mx]; - ekz += rho[0][l-nlower] * ekz_p * u_brick[mz][my][mx]; + n = 0; + for (k = nzlo_fft; k <= nzhi_fft; k++) + for (j = nylo_fft; j <= nyhi_fft; j++) + for (i = nxlo_fft; i <= nxhi_fft; i++) { + work2[n] = fkz[k]*work1[n+1]; + work2[n+1] = -fkz[k]*work1[n]; + n += 2; + } + + fft2->compute(work2,work2,-1); + + n = 0; + for (k = nzlo_in; k <= nzhi_in; k++) + for (j = nylo_in; j <= nyhi_in; j++) + for (i = nxlo_in; i <= nxhi_in; i++) { + vdz0_brick[k][j][2*i] = work2[n]; + vdz0_brick[k][j][2*i+1] = 0.; + n += 2; + } +} + +/* ---------------------------------------------------------------------- + precompute rho coefficients as a lookup table to save time in make_rho + and fieldforce. Instead of doing this polynomial for every atom 6 times + per time step, precompute it for some number of points. +------------------------------------------------------------------------- */ + +void PPPMIntel::precompute_rho() +{ + + half_rho_scale = (rho_points - 1.)/2.; + half_rho_scale_plus = half_rho_scale + 0.5; + + for (int i = 0; i < rho_points; i++) { + FFT_SCALAR dx = -1. + 1./half_rho_scale * (FFT_SCALAR)i; + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for (int k=nlower; k<=nupper;k++){ + FFT_SCALAR r1 = ZEROF; + for(int l=order-1; l>=0; l--){ + r1 = rho_coeff[l][k] + r1*dx; + } + rho_lookup[i][k-nlower] = r1; + } + for (int k = nupper-nlower+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + rho_lookup[i][k] = 0; + } + if (differentiation_flag == 1) { + #if defined(LMP_SIMD_COMPILER) + #pragma simd + #endif + for(int k=nlower; k<=nupper;k++){ + FFT_SCALAR r1 = ZEROF; + for(int l=order-2; l>=0; l--){ + r1 = drho_coeff[l][k] + r1*dx; } + drho_lookup[i][k-nlower] = r1; + } + for (int k = nupper-nlower+1; k < INTEL_P3M_ALIGNED_MAXORDER; k++) { + drho_lookup[i][k] = 0; } } - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; + } +} - // convert E-field to force +/* ---------------------------------------------------------------------- + pack own values to buf to send to another proc +------------------------------------------------------------------------- */ - const flt_t qfactor = fqqrd2es * q[i]; - const flt_t twoqsq = (flt_t)2.0 * q[i] * q[i]; +void PPPMIntel::pack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + if ((flag == FORWARD_IK) && _use_packing) { + FFT_SCALAR *xsrc = &vdxy_brick[nzlo_out][nylo_out][2*nxlo_out]; + FFT_SCALAR *zsrc = &vdz0_brick[nzlo_out][nylo_out][2*nxlo_out]; + for (int i = 0; i < nlist; i++) { + buf[n++] = xsrc[list[i]]; + buf[n++] = zsrc[list[i]]; + } + } else { + PPPM::pack_forward(flag, buf, nlist, list); + } +} - const flt_t s1 = x[i].x * hx_inv; - const flt_t s2 = x[i].y * hy_inv; - const flt_t s3 = x[i].z * hz_inv; - flt_t sf = fsf_coeff0 * sin(ftwo_pi * s1); - sf += fsf_coeff1 * sin(ffour_pi * s1); - sf *= twoqsq; - f[i].x += qfactor * ekx - fqqrd2es * sf; +/* ---------------------------------------------------------------------- + unpack another proc's own values from buf and set own ghost values +------------------------------------------------------------------------- */ - sf = fsf_coeff2 * sin(ftwo_pi * s2); - sf += fsf_coeff3 * sin(ffour_pi * s2); - sf *= twoqsq; - f[i].y += qfactor * eky - fqqrd2es * sf; +void PPPMIntel::unpack_forward(int flag, FFT_SCALAR *buf, int nlist, int *list) +{ + int n = 0; + + if ((flag == FORWARD_IK) && _use_packing) { + FFT_SCALAR *xdest = &vdxy_brick[nzlo_out][nylo_out][2*nxlo_out]; + FFT_SCALAR *zdest = &vdz0_brick[nzlo_out][nylo_out][2*nxlo_out]; + for (int i = 0; i < nlist; i++) { + xdest[list[i]] = buf[n++]; + zdest[list[i]] = buf[n++]; + } + } else { + PPPM::unpack_forward(flag, buf, nlist, list); + } +} - sf = fsf_coeff4 * sin(ftwo_pi * s3); - sf += fsf_coeff5 * sin(ffour_pi * s3); - sf *= twoqsq; +/* ---------------------------------------------------------------------- + memory usage of local arrays +------------------------------------------------------------------------- */ - if (slabflag != 2) f[i].z += qfactor * ekz - fqqrd2es * sf; +double PPPMIntel::memory_usage() +{ + double bytes = PPPM::memory_usage(); + if ((comm->nthreads > 1) && !_use_lrt) { + bytes += (comm->nthreads - 1) * (ngrid + INTEL_P3M_ALIGNED_MAXORDER) * + sizeof(FFT_SCALAR); + } + if (differentiation_flag == 1) { + bytes += 3 * nmax * sizeof(FFT_SCALAR); + } + if (_use_table) { + bytes += rho_points * INTEL_P3M_ALIGNED_MAXORDER * sizeof(FFT_SCALAR); + if (differentiation_flag == 1) { + bytes += rho_points * INTEL_P3M_ALIGNED_MAXORDER * sizeof(FFT_SCALAR); + } + } + if (_use_packing) { + bytes += 2 * (nzhi_out + 2 - nzlo_out + 1) * (nyhi_out - nylo_out + 1) + * (2 * nxhi_out + 1 - 2 * nxlo_out + 1) * sizeof(FFT_SCALAR); + bytes -= 3 * (nxhi_out - nxlo_out + 1) * (nyhi_out - nylo_out + 1) + * (nzhi_out - nzlo_out + 1) * sizeof(FFT_SCALAR); + bytes += 2 * nfft_both * sizeof(FFT_SCALAR); + bytes += cg_pack->memory_usage(); } + return bytes; } /* ---------------------------------------------------------------------- @@ -640,13 +1219,16 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers) void PPPMIntel::pack_buffers() { fix->start_watch(TIME_PACK); + int packthreads; + if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads; + else packthreads = 1; #if defined(_OPENMP) - #pragma omp parallel default(none) + #pragma omp parallel if(packthreads > 1) #endif { int ifrom, ito, tid; IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost, - comm->nthreads, + packthreads, sizeof(IntelBuffers<float,double>::atom_t)); if (fix->precision() == FixIntel::PREC_MODE_MIXED) fix->get_mixed_buffers()->thr_pack(ifrom,ito,1); diff --git a/src/USER-INTEL/pppm_intel.h b/src/USER-INTEL/pppm_intel.h index 40669a55616a2d2437dfddf47bdbed8de934fde1..e152486b29b38e2a9cfd279089dd54d06b8465e3 100644 --- a/src/USER-INTEL/pppm_intel.h +++ b/src/USER-INTEL/pppm_intel.h @@ -1,4 +1,4 @@ -/* -*- c++ -*- ---------------------------------------------------------- +/* *- c++ -*- ----------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -12,7 +12,9 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing authors: Rodrigo Canales (RWTH Aachen University) + Contributing authors: William McDoniel (RWTH Aachen University) + Rodrigo Canales (RWTH Aachen University) + Markus Hoehnerbach (RWTH Aachen University) W. Michael Brown (Intel) ------------------------------------------------------------------------- */ @@ -36,6 +38,9 @@ class PPPMIntel : public PPPM { virtual ~PPPMIntel(); virtual void init(); virtual void compute(int, int); + virtual void pack_forward(int, FFT_SCALAR *, int, int *); + virtual void unpack_forward(int, FFT_SCALAR *, int, int *); + virtual double memory_usage(); void compute_first(int, int); void compute_second(int, int); void pack_buffers(); @@ -47,18 +52,74 @@ class PPPMIntel : public PPPM { protected: FixIntel *fix; + int _use_lrt; + FFT_SCALAR **perthread_density; + FFT_SCALAR *particle_ekx; + FFT_SCALAR *particle_eky; + FFT_SCALAR *particle_ekz; + + int _use_table; + int rho_points; + FFT_SCALAR **rho_lookup; + FFT_SCALAR **drho_lookup; + FFT_SCALAR half_rho_scale, half_rho_scale_plus; + + int _use_packing; + FFT_SCALAR ***vdxy_brick; + FFT_SCALAR ***vdz0_brick; + FFT_SCALAR *work3; + class GridComm *cg_pack; + #ifdef _LMP_INTEL_OFFLOAD int _use_base; #endif + template<class flt_t, class acc_t> + void test_function(IntelBuffers<flt_t,acc_t> *buffers); + + + void precompute_rho(); template<class flt_t, class acc_t> void particle_map(IntelBuffers<flt_t,acc_t> *buffers); - template<class flt_t, class acc_t> + template<class flt_t, class acc_t, int use_table> void make_rho(IntelBuffers<flt_t,acc_t> *buffers); template<class flt_t, class acc_t> + void make_rho(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + make_rho<flt_t,acc_t,1>(buffers); + } else { + make_rho<flt_t,acc_t,0>(buffers); + } + } + void poisson_ik_intel(); + template<class flt_t, class acc_t, int use_table, int use_packing> void fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers); template<class flt_t, class acc_t> + void fieldforce_ik(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + if (_use_packing == 1) { + fieldforce_ik<flt_t, acc_t, 1, 1>(buffers); + } else { + fieldforce_ik<flt_t, acc_t, 1, 0>(buffers); + } + } else { + if (_use_packing == 1) { + fieldforce_ik<flt_t, acc_t, 0, 1>(buffers); + } else { + fieldforce_ik<flt_t, acc_t, 0, 0>(buffers); + } + } + } + template<class flt_t, class acc_t, int use_table> void fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers); + template<class flt_t, class acc_t> + void fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers) { + if (_use_table == 1) { + fieldforce_ad<flt_t,acc_t,1>(buffers); + } else { + fieldforce_ad<flt_t,acc_t,0>(buffers); + } + } }; } diff --git a/src/USER-INTEL/verlet_lrt_intel.cpp b/src/USER-INTEL/verlet_lrt_intel.cpp index afb7852f983d16ac75ca949fadd9007950e324bd..81f458614372cca3a6d1e74a5544db263ac60b67 100644 --- a/src/USER-INTEL/verlet_lrt_intel.cpp +++ b/src/USER-INTEL/verlet_lrt_intel.cpp @@ -51,7 +51,7 @@ VerletLRTIntel::VerletLRTIntel(LAMMPS *lmp, int narg, char **arg) : /* ---------------------------------------------------------------------- */ -VerletLRTIntel::~VerletLRTIntel() +VerletLRTIntel::~VerletLRTIntel() { #if defined(_LMP_INTEL_LRT_PTHREAD) pthread_mutex_destroy(&_kmutex); @@ -67,10 +67,10 @@ void VerletLRTIntel::init() Verlet::init(); _intel_kspace = (PPPMIntel*)(force->kspace_match("pppm/intel", 0)); - + #ifdef LMP_INTEL_NOLRT - error->all(FLERR, - "LRT otion for Intel package disabled at compile time"); + error->all(FLERR, + "LRT otion for Intel package disabled at compile time"); #endif } @@ -78,17 +78,17 @@ void VerletLRTIntel::init() setup before run ------------------------------------------------------------------------- */ -void VerletLRTIntel::setup() +void VerletLRTIntel::setup(int flag) { if (_intel_kspace == 0) { - Verlet::setup(); + Verlet::setup(flag); return; - } + } #ifdef _LMP_INTEL_OFFLOAD if (_intel_kspace->use_base()) { _intel_kspace = 0; - Verlet::setup(); + Verlet::setup(flag); return; } #endif @@ -154,15 +154,15 @@ void VerletLRTIntel::setup() _intel_kspace->setup(); #if defined(_LMP_INTEL_LRT_PTHREAD) - pthread_create(&_kspace_thread, &_kspace_attr, - &VerletLRTIntel::k_launch_loop, this); + pthread_create(&_kspace_thread, &_kspace_attr, + &VerletLRTIntel::k_launch_loop, this); #elif defined(_LMP_INTEL_LRT_11) std::thread kspace_thread; - if (kspace_compute_flag) - _kspace_thread=std::thread([=]{ _intel_kspace->compute_first(eflag, + if (kspace_compute_flag) + _kspace_thread=std::thread([=]{ _intel_kspace->compute_first(eflag, vflag); }); - else - _kspace_thread=std::thread([=]{ _intel_kspace->compute_dummy(eflag, + else + _kspace_thread=std::thread([=]{ _intel_kspace->compute_dummy(eflag, vflag); }); #endif @@ -297,8 +297,8 @@ void VerletLRTIntel::run(int n) pthread_mutex_unlock(&_kmutex); #elif defined(_LMP_INTEL_LRT_11) std::thread kspace_thread; - if (kspace_compute_flag) - kspace_thread=std::thread([=] { + if (kspace_compute_flag) + kspace_thread=std::thread([=] { _intel_kspace->compute_first(eflag, vflag); timer->stamp(Timer::KSPACE); } ); @@ -329,7 +329,7 @@ void VerletLRTIntel::run(int n) _kspace_done = 0; pthread_mutex_unlock(&_kmutex); #elif defined(_LMP_INTEL_LRT_11) - if (kspace_compute_flag) + if (kspace_compute_flag) kspace_thread.join(); #endif @@ -367,7 +367,7 @@ void VerletLRTIntel::run(int n) } #if defined(_LMP_INTEL_LRT_PTHREAD) - if (run_cancelled) + if (run_cancelled) pthread_cancel(_kspace_thread); else { pthread_mutex_lock(&_kmutex); @@ -390,9 +390,9 @@ void * VerletLRTIntel::k_launch_loop(void *context) { VerletLRTIntel * const c = (VerletLRTIntel *)context; - if (c->kspace_compute_flag) + if (c->kspace_compute_flag) c->_intel_kspace->compute_first(c->eflag, c->vflag); - else + else c->_intel_kspace->compute_dummy(c->eflag, c->vflag); pthread_mutex_lock(&(c->_kmutex)); @@ -408,7 +408,7 @@ void * VerletLRTIntel::k_launch_loop(void *context) pthread_mutex_unlock(&(c->_kmutex)); for (int i = 0; i < n; i++) { - + if (c->kspace_compute_flag) { c->_intel_kspace->compute_first(c->eflag, c->vflag); c->timer->stamp(Timer::KSPACE); diff --git a/src/USER-INTEL/verlet_lrt_intel.h b/src/USER-INTEL/verlet_lrt_intel.h index a699c207965d0dc6fef078502bc2b26cdde702c8..0521b161c70379b3a70f32c52017ba5387690ac9 100644 --- a/src/USER-INTEL/verlet_lrt_intel.h +++ b/src/USER-INTEL/verlet_lrt_intel.h @@ -42,7 +42,7 @@ class VerletLRTIntel : public Verlet { VerletLRTIntel(class LAMMPS *, int, char **); virtual ~VerletLRTIntel(); virtual void init(); - virtual void setup(); + virtual void setup(int flag = 1); virtual void run(int); protected: diff --git a/src/USER-MANIFOLD/README b/src/USER-MANIFOLD/README index f55a9bb8e32de355a75d7f3506ef7d848ff833db..eb83cfc5abce1496de1a0579073f9f9c1729b604 100644 --- a/src/USER-MANIFOLD/README +++ b/src/USER-MANIFOLD/README @@ -7,10 +7,14 @@ box). It achieves this using the RATTLE constraint algorithm applied to single-particle constraint functions g(xi,yi,zi) = 0 and their derivative (i.e. the normal of the manifold) n = grad(g). -Stefan Paquay, s.paquay@tue.nl -Applied Physics/Theory of Polymers and Soft Matter, +Stefan Paquay, stefanpaquay@gmail.com + +until 2017: Applied Physics/Theory of Polymers and Soft Matter, Eindhoven University of Technology (TU/e), The Netherlands +since 2017: Brandeis University, Waltham, MA, USA. + + Thanks to Remy Kusters at TU/e for testing. This software is distributed under the GNU General Public License. diff --git a/src/USER-MANIFOLD/manifold.h b/src/USER-MANIFOLD/manifold.h index d0ffa214ac4b0bd11fa6334f534c0d4c71377a50..b89e765a6eab068f99194cd492d867fe456fa4f8 100644 --- a/src/USER-MANIFOLD/manifold.h +++ b/src/USER-MANIFOLD/manifold.h @@ -24,7 +24,7 @@ testing purposes) and a wave-y plane. See the README file for more info. - Stefan Paquay, s.paquay@tue.nl + Stefan Paquay, stefanpaquay@gmail.com Applied Physics/Theory of Polymers and Soft Matter, Eindhoven University of Technology (TU/e), The Netherlands diff --git a/src/USER-MANIFOLD/manifold_gaussian_bump.h b/src/USER-MANIFOLD/manifold_gaussian_bump.h index 43f69fba185512a4a25282df31aa97535f599875..f3401a4a33d0d1a9082d13a6e9d201ed0f8e05fd 100644 --- a/src/USER-MANIFOLD/manifold_gaussian_bump.h +++ b/src/USER-MANIFOLD/manifold_gaussian_bump.h @@ -24,7 +24,10 @@ testing purposes) and a wave-y plane. See the README file for more info. - Stefan Paquay, s.paquay@tue.nl + Stefan Paquay, spaquay@brandeis.edu + Brandeis University, Waltham, MA, USA. + + This package was mainly developed at Applied Physics/Theory of Polymers and Soft Matter, Eindhoven University of Technology (TU/e), The Netherlands diff --git a/src/USER-MISC/README b/src/USER-MISC/README index cacee41e0cfe567d68f2f4ad0f72648da2342132..93d771efbc7645b08003bcb66f61b8b886be99f2 100644 --- a/src/USER-MISC/README +++ b/src/USER-MISC/README @@ -28,6 +28,7 @@ bond_style harmonic/shift, Carsten Svaneborg, science at zqex.dk, 8 Aug 11 bond_style harmonic/shift/cut, Carsten Svaneborg, science at zqex.dk, 8 Aug 11 compute ackland/atom, Gerolf Ziegenhain, gerolf at ziegenhain.com, 4 Oct 2007 compute basal/atom, Christopher Barrett, cdb333 at cavs.msstate.edu, 3 Mar 2013 +compute cnp/atom, Paulo Branicio (USC), branicio at usc.edu, 31 May 2017 compute temp/rotate, Laurent Joly (U Lyon), ljoly.ulyon at gmail.com, 8 Aug 11 compute PRESSURE/GREM, David Stelter, dstelter@bu.edu, 22 Nov 16 dihedral_style cosine/shift/exp, Carsten Svaneborg, science at zqex.dk, 8 Aug 11 @@ -63,7 +64,6 @@ pair_style gauss/cut, Axel Kohlmeyer, akohlmey at gmail.com, 1 Dec 11 pair_style lennard/mdf, Paolo Raiteri, p.raiteri at curtin.edu.au, 2 Dec 15 pair_style list, Axel Kohlmeyer (Temple U), akohlmey at gmail.com, 1 Jun 13 pair_style lj/mdf, Paolo Raiteri, p.raiteri at curtin.edu.au, 2 Dec 15 -pair_style lj/sf, Laurent Joly (U Lyon), ljoly.ulyon at gmail.com, 8 Aug 11 pair_style kolmogorov/crespi/z, Jaap Kroes (Radboud U), jaapkroes at gmail dot com, 28 Feb 17 pair_style meam/spline, Alexander Stukowski (LLNL), alex at stukowski.com, 1 Feb 12 pair_style meam/sw/spline, Robert Rudd (LLNL), robert.rudd at llnl.gov, 1 Oct 12 diff --git a/src/USER-MISC/compute_cnp_atom.cpp b/src/USER-MISC/compute_cnp_atom.cpp new file mode 100644 index 0000000000000000000000000000000000000000..89568c6731a85c05bfc5d621cd04807f10c37f00 --- /dev/null +++ b/src/USER-MISC/compute_cnp_atom.cpp @@ -0,0 +1,331 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. + + Common Neighbor Parameter as proposed in: + Tsuzuki, Branicio, Rino, Comput Phys Comm, 177, 518 (2007) + Cite: http://dx.doi.org/10.1063/1.2197987 + +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Paulo Branicio (University of Southern California) + branicio@usc.edu +------------------------------------------------------------------------- */ + +#include <string.h> +#include <stdlib.h> +#include <math.h> + +#include "compute_cnp_atom.h" +#include "atom.h" +#include "update.h" +#include "modify.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "force.h" +#include "pair.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +//define maximum values +#define MAXNEAR 24 +#define MAXCOMMON 12 + +enum{NCOMMON}; + +/* ---------------------------------------------------------------------- */ + +ComputeCNPAtom::ComputeCNPAtom(LAMMPS *lmp, int narg, char **arg) : + Compute(lmp, narg, arg), + nearest(NULL), nnearest(NULL), cnpv(NULL) +{ + if (narg != 4) error->all(FLERR,"Illegal compute cnp/atom command"); + + peratom_flag = 1; + size_peratom_cols = 0; + + double cutoff = force->numeric(FLERR,arg[3]); + if (cutoff < 0.0) error->all(FLERR,"Illegal compute cnp/atom command"); + cutsq = cutoff*cutoff; + + // apply check for single type atoms in compute group + int lasttype = -1; + int n = -1; + for (int i=0; i < atom->nlocal; ++i) { + if (atom->mask[i] & groupbit) { + if (lasttype != atom->type[i]) { + lasttype = atom->type[i]; + ++n; + } + } + } + int all_n = 0; + MPI_Allreduce(&n,&all_n,1,MPI_INT,MPI_MAX,world); + if (all_n > 0) + error->warning(FLERR,"Compute cnp/atom requested on multi-type system"); + + nmax = 0; +} + +/* ---------------------------------------------------------------------- */ + +ComputeCNPAtom::~ComputeCNPAtom() +{ + memory->destroy(nearest); + memory->destroy(nnearest); + memory->destroy(cnpv); +} + +/* ---------------------------------------------------------------------- */ + +void ComputeCNPAtom::init() +{ + if (force->pair == NULL) + error->all(FLERR,"Compute cnp/atom requires a pair style be defined"); + + if (sqrt(cutsq) > force->pair->cutforce) + error->all(FLERR,"Compute cnp/atom cutoff is longer than pairwise cutoff"); + + if (2.0*sqrt(cutsq) > force->pair->cutforce + neighbor->skin && + comm->me == 0) + error->warning(FLERR,"Compute cnp/atom cutoff may be too large to find " + "ghost atom neighbors"); + + int count = 0; + for (int i = 0; i < modify->ncompute; i++) + if (strcmp(modify->compute[i]->style,"cnp/atom") == 0) count++; + if (count > 1 && comm->me == 0) + error->warning(FLERR,"More than one compute cnp/atom defined"); + + // need an occasional full neighbor list + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->pair = 0; + neighbor->requests[irequest]->compute = 1; + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->occasional = 1; +} + +/* ---------------------------------------------------------------------- */ + +void ComputeCNPAtom::init_list(int id, NeighList *ptr) +{ + list = ptr; +} + +/* ---------------------------------------------------------------------- */ + +void ComputeCNPAtom::compute_peratom() +{ + int i,j,k,ii,jj,kk,m,n,inum,jnum,inear,jnear; + int firstflag,ncommon; + int *ilist,*jlist,*numneigh,**firstneigh; + int onenearest[MAXNEAR]; + int common[MAXCOMMON]; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + double xjtmp,yjtmp,zjtmp,rjkx,rjky,rjkz; + + invoked_peratom = update->ntimestep; + + // grow arrays if necessary + + if (atom->nmax > nmax) { + memory->destroy(nearest); + memory->destroy(nnearest); + memory->destroy(cnpv); + nmax = atom->nmax; + memory->create(nearest,nmax,MAXNEAR,"cnp:nearest"); + memory->create(nnearest,nmax,"cnp:nnearest"); + memory->create(cnpv,nmax,"cnp:cnp_cnpv"); + vector_atom = cnpv; + } + + // invoke full neighbor list (will copy or build if necessary) + + neighbor->build_one(list); + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // find the neigbors of each atom within cutoff using full neighbor list + // nearest[] = atom indices of nearest neighbors, up to MAXNEAR + // do this for all atoms, not just compute group + // since CNP calculation requires neighbors of neighbors + + double **x = atom->x; + int *mask = atom->mask; + int nlocal = atom->nlocal; + + int nerror = 0; + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + n = 0; + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + if (rsq < cutsq) { + if (n < MAXNEAR) nearest[i][n++] = j; + else { + nerror++; + break; + } + } + } + nnearest[i] = n; + } + + // warning message + + int nerrorall; + MPI_Allreduce(&nerror,&nerrorall,1,MPI_INT,MPI_SUM,world); + if (nerrorall && comm->me == 0) { + char str[128]; + sprintf(str,"Too many neighbors in CNP for %d atoms",nerrorall); + error->warning(FLERR,str,0); + } + + // compute CNP value for each atom in group + // only performed if # of nearest neighbors = 12 or 14 (fcc,hcp) + + nerror = 0; + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + // reset cnpv + cnpv[i] = 0.0; + + // skip computation of cnpv for atoms outside the compute group + + if (!(mask[i] & groupbit)) continue; + + // loop over nearest neighbors of I to build cnp data structure + // cnp[k][NCOMMON] = # of common neighbors of I with each of its neighbors + for (m = 0; m < nnearest[i]; m++) { + j = nearest[i][m]; + xjtmp = x[j][0]; + yjtmp = x[j][1]; + zjtmp = x[j][2]; + + // common = list of neighbors common to atom I and atom J + // if J is an owned atom, use its near neighbor list to find them + // if J is a ghost atom, use full neighbor list of I to find them + // in latter case, must exclude J from I's neighbor list + + // find common neighbors of i and j using near neighbor list + if (j < nlocal) { + firstflag = 1; + ncommon = 0; + for (inear = 0; inear < nnearest[i]; inear++) + for (jnear = 0; jnear < nnearest[j]; jnear++) + if (nearest[i][inear] == nearest[j][jnear]) { + if (ncommon < MAXCOMMON) common[ncommon++] = nearest[i][inear]; + else if (firstflag) { + nerror++; + firstflag = 0; + } + } + + // find common neighbors of i and j using full neighbor list + } else { + jlist = firstneigh[i]; + jnum = numneigh[i]; + + n = 0; + for (kk = 0; kk < jnum; kk++) { + k = jlist[kk]; + k &= NEIGHMASK; + if (k == j) continue; + + delx = xjtmp - x[k][0]; + dely = yjtmp - x[k][1]; + delz = zjtmp - x[k][2]; + rsq = delx*delx + dely*dely + delz*delz; + if (rsq < cutsq) { + if (n < MAXNEAR) onenearest[n++] = k; + else break; + } + } + + firstflag = 1; + ncommon = 0; + for (inear = 0; inear < nnearest[i]; inear++) + for (jnear = 0; (jnear < n) && (n < MAXNEAR); jnear++) + if (nearest[i][inear] == onenearest[jnear]) { + if (ncommon < MAXCOMMON) common[ncommon++] = nearest[i][inear]; + else if (firstflag) { + nerror++; + firstflag = 0; + } + } + } + + // Calculate and update sum |Rik+Rjk|ˆ2 + rjkx = 0.0; + rjky = 0.0; + rjkz = 0.0; + for (kk = 0; kk < ncommon; kk++) { + k = common[kk]; + rjkx += 2.0*x[k][0] - xjtmp - xtmp; + rjky += 2.0*x[k][1] - yjtmp - ytmp; + rjkz += 2.0*x[k][2] - zjtmp - ztmp; + } + // update cnpv with summed (valuejk) + cnpv[i] += rjkx*rjkx + rjky*rjky + rjkz*rjkz; + + // end of loop over j atoms + } + + // normalize cnp by the number of nearest neighbors + cnpv[i] = cnpv[i] / nnearest[i]; + + // end of loop over i atoms + } + + // warning message + MPI_Allreduce(&nerror,&nerrorall,1,MPI_INT,MPI_SUM,world); + if (nerrorall && comm->me == 0) { + char str[128]; + sprintf(str,"Too many common neighbors in CNP %d times",nerrorall); + error->warning(FLERR,str); + } +} + +/* ---------------------------------------------------------------------- + memory usage of local atom-based array +------------------------------------------------------------------------- */ + +double ComputeCNPAtom::memory_usage() +{ + double bytes = nmax * sizeof(int); + bytes += nmax * MAXNEAR * sizeof(int); + bytes += nmax * sizeof(double); + return bytes; +} diff --git a/src/USER-MISC/compute_cnp_atom.h b/src/USER-MISC/compute_cnp_atom.h new file mode 100644 index 0000000000000000000000000000000000000000..4fdb3954f2720fa6059b3425e5aa7f55be97e635 --- /dev/null +++ b/src/USER-MISC/compute_cnp_atom.h @@ -0,0 +1,92 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef COMPUTE_CLASS + +ComputeStyle(cnp/atom,ComputeCNPAtom) + +#else + +#ifndef LMP_COMPUTE_CNP_ATOM_H +#define LMP_COMPUTE_CNP_ATOM_H + +#include "compute.h" + +namespace LAMMPS_NS { + +class ComputeCNPAtom : public Compute { + public: + ComputeCNPAtom(class LAMMPS *, int, char **); + ~ComputeCNPAtom(); + void init(); + void init_list(int, class NeighList *); + void compute_peratom(); + double memory_usage(); + + private: +//revise + int nmax; + double cutsq; + class NeighList *list; + int **nearest; + int *nnearest; + double *cnpv; +// int nmax; +// double cutsq; +// class NeighList *list; +// int **nearest; +// int *nnearest; +// double *pattern; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Compute cnp/atom requires a pair style be defined + +Self-explanatory. + +E: Compute cnp/atom cutoff is longer than pairwise cutoff + +Self-explanatory. + +W: Compute cnp/atom cutoff may be too large to find ghost atom neighbors + +The neighbor cutoff used may not encompass enough ghost atoms +to perform this operation correctly. + +W: More than one compute cnp/atom defined + +It is not efficient to use compute cnp/atom more than once. + +W: Too many neighbors in CNP for %d atoms + +More than the maximum # of neighbors was found multiple times. This +was unexpected. + +W: Too many common neighbors in CNP %d times + +More than the maximum # of neighbors was found multiple times. This +was unexpected. + +*/ diff --git a/src/USER-MISC/fix_filter_corotate.cpp b/src/USER-MISC/fix_filter_corotate.cpp index b59a37e8f9fcf04211e7cc1830b0e864c8f746ae..7ada3aeb99e0d582bda2048e4dddb576ba53fc4e 100644 --- a/src/USER-MISC/fix_filter_corotate.cpp +++ b/src/USER-MISC/fix_filter_corotate.cpp @@ -46,10 +46,6 @@ using namespace LAMMPS_NS; using namespace MathConst; using namespace FixConst; -// allocate space for static class variable - -FixFilterCorotate *FixFilterCorotate::fsptr = NULL; - #define BIG 1.0e20 #define MASSDELTA 0.1 @@ -950,8 +946,7 @@ void FixFilterCorotate::find_clusters() // cycle buffer around ring of procs back to self - fsptr = this; - comm->ring(size,sizeof(tagint),buf,1,ring_bonds,buf); + comm->ring(size,sizeof(tagint),buf,1,ring_bonds,buf,(void *)this); // store partner info returned to me @@ -1079,8 +1074,7 @@ void FixFilterCorotate::find_clusters() // cycle buffer around ring of procs back to self - fsptr = this; - comm->ring(size,sizeof(tagint),buf,2,ring_nshake,buf); + comm->ring(size,sizeof(tagint),buf,2,ring_nshake,buf,(void *)this); // store partner info returned to me @@ -1240,8 +1234,7 @@ void FixFilterCorotate::find_clusters() // cycle buffer around ring of procs back to self - fsptr = this; - comm->ring(size,sizeof(tagint),buf,3,ring_shake,NULL); + comm->ring(size,sizeof(tagint),buf,3,ring_shake,NULL,(void *)this); memory->destroy(buf); @@ -1310,15 +1303,16 @@ void FixFilterCorotate::find_clusters() * search for bond with 1st atom and fill in bondtype * ------------------------------------------------------------------------- */ -void FixFilterCorotate::ring_bonds(int ndatum, char *cbuf) +void FixFilterCorotate::ring_bonds(int ndatum, char *cbuf, void *ptr) { - Atom *atom = fsptr->atom; + FixFilterCorotate *ffptr = (FixFilterCorotate *) ptr; + Atom *atom = ffptr->atom; double *rmass = atom->rmass; double *mass = atom->mass; int *mask = atom->mask; int *type = atom->type; int nlocal = atom->nlocal; - int nmass = fsptr->nmass; + int nmass = ffptr->nmass; tagint *buf = (tagint *) cbuf; int m,n; @@ -1332,10 +1326,10 @@ void FixFilterCorotate::ring_bonds(int ndatum, char *cbuf) if (nmass) { if (rmass) massone = rmass[m]; else massone = mass[type[m]]; - buf[i+4] = fsptr->masscheck(massone); + buf[i+4] = ffptr->masscheck(massone); } if (buf[i+5] == 0) { - n = fsptr->bondtype_findset(m,buf[i],buf[i+1],0); + n = ffptr->bondtype_findset(m,buf[i],buf[i+1],0); if (n) buf[i+5] = n; } } @@ -1347,12 +1341,13 @@ void FixFilterCorotate::ring_bonds(int ndatum, char *cbuf) * if I own partner, fill in nshake value * ------------------------------------------------------------------------- */ -void FixFilterCorotate::ring_nshake(int ndatum, char *cbuf) +void FixFilterCorotate::ring_nshake(int ndatum, char *cbuf, void *ptr) { - Atom *atom = fsptr->atom; + FixFilterCorotate *ffptr = (FixFilterCorotate *) ptr; + Atom *atom = ffptr->atom; int nlocal = atom->nlocal; - int *nshake = fsptr->nshake; + int *nshake = ffptr->nshake; tagint *buf = (tagint *) cbuf; int m; @@ -1368,14 +1363,15 @@ void FixFilterCorotate::ring_nshake(int ndatum, char *cbuf) * if I own partner, fill in nshake value * ------------------------------------------------------------------------- */ -void FixFilterCorotate::ring_shake(int ndatum, char *cbuf) +void FixFilterCorotate::ring_shake(int ndatum, char *cbuf, void *ptr) { - Atom *atom = fsptr->atom; + FixFilterCorotate *ffptr = (FixFilterCorotate *) ptr; + Atom *atom = ffptr->atom; int nlocal = atom->nlocal; - int *shake_flag = fsptr->shake_flag; - tagint **shake_atom = fsptr->shake_atom; - int **shake_type = fsptr->shake_type; + int *shake_flag = ffptr->shake_flag; + tagint **shake_atom = ffptr->shake_atom; + int **shake_type = ffptr->shake_type; tagint *buf = (tagint *) cbuf; int m; diff --git a/src/USER-MISC/fix_filter_corotate.h b/src/USER-MISC/fix_filter_corotate.h index 47accfedd3d2cf08e94d7cabe33fafa0cc20c54c..3f8e8bba43a75cd110b8c69489261399f9517850 100644 --- a/src/USER-MISC/fix_filter_corotate.h +++ b/src/USER-MISC/fix_filter_corotate.h @@ -120,13 +120,11 @@ namespace LAMMPS_NS int bondtype_findset(int, tagint, tagint, int); int angletype_findset(int, tagint, tagint, int); - // static variable for ring communication callback to access class data // callback functions for ring communication - static FixFilterCorotate *fsptr; - static void ring_bonds(int, char *); - static void ring_nshake(int, char *); - static void ring_shake(int, char *); + static void ring_bonds(int, char *, void *); + static void ring_nshake(int, char *, void *); + static void ring_shake(int, char *, void *); int sgn(double val) { return (0 < val) - (val < 0); diff --git a/src/USER-MISC/fix_grem.cpp b/src/USER-MISC/fix_grem.cpp index fd646fa7ad648586721148bf098b04dd77c9ead3..5a4e149316e38b16186ed5bd7f8cd06c94c20aa1 100644 --- a/src/USER-MISC/fix_grem.cpp +++ b/src/USER-MISC/fix_grem.cpp @@ -50,11 +50,8 @@ FixGrem::FixGrem(LAMMPS *lmp, int narg, char **arg) : if (narg < 7) error->all(FLERR,"Illegal fix grem command"); scalar_flag = 1; - vector_flag = 1; - size_vector = 3; + extscalar = 0; global_freq = 1; - extscalar = 1; - extvector = 1; scale_grem = 1.0; @@ -282,6 +279,13 @@ void FixGrem::post_force(int vflag) pe->addstep(update->ntimestep+1); } +/* ---------------------------------------------------------------------- */ + +double FixGrem::compute_scalar() +{ + return tbath / scale_grem; +} + /* ---------------------------------------------------------------------- extract scale factor ------------------------------------------------------------------------- */ diff --git a/src/USER-MISC/fix_grem.h b/src/USER-MISC/fix_grem.h index 4806505f613616b70a8f3405c85177ce65d600fe..1899bf6fc2a3f30e2f668bea53b03e693bc8b193 100644 --- a/src/USER-MISC/fix_grem.h +++ b/src/USER-MISC/fix_grem.h @@ -34,6 +34,7 @@ class FixGrem : public Fix { void min_setup(int); void post_force(int); void *extract(const char *, int &); + double compute_scalar(); double scale_grem,lambda,eta,h0; int pressflag; diff --git a/src/USER-MISC/pair_cdeam.cpp b/src/USER-MISC/pair_cdeam.cpp index 91ef598de833bcadffdf60cc3dd490b2b0f6fbf1..b5607012ce16789056b4afed345ffcfe54d1ca7e 100644 --- a/src/USER-MISC/pair_cdeam.cpp +++ b/src/USER-MISC/pair_cdeam.cpp @@ -456,11 +456,11 @@ void PairCDEAM::read_h_coeff(char *filename) { if(comm->me == 0) { // Open potential file - FILE *fp; + FILE *fptr; char line[MAXLINE]; char nextline[MAXLINE]; - fp = force->open_potential(filename); - if (fp == NULL) { + fptr = force->open_potential(filename); + if (fptr == NULL) { char str[128]; sprintf(str,"Cannot open EAM potential file %s", filename); error->one(FLERR,str); @@ -468,7 +468,7 @@ void PairCDEAM::read_h_coeff(char *filename) // h coefficients are stored at the end of the file. // Skip to last line of file. - while(fgets(nextline, MAXLINE, fp) != NULL) { + while(fgets(nextline, MAXLINE, fptr) != NULL) { strcpy(line, nextline); } char* ptr = strtok(line, " \t\n\r\f"); @@ -483,7 +483,7 @@ void PairCDEAM::read_h_coeff(char *filename) error->one(FLERR,"Failed to read h(x) function coefficients from EAM file."); // Close the potential file. - fclose(fp); + fclose(fptr); } MPI_Bcast(&nhcoeff, 1, MPI_INT, 0, world); diff --git a/src/USER-MISC/pair_lj_sf.cpp b/src/USER-MISC/pair_lj_sf.cpp deleted file mode 100644 index a34119f880b89e5dcdb599d5ad458c922a4c853c..0000000000000000000000000000000000000000 --- a/src/USER-MISC/pair_lj_sf.cpp +++ /dev/null @@ -1,355 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Laurent Joly (U Lyon), ljoly.ulyon@gmail.com -------------------------------------------------------------------------- */ - -#include <math.h> -#include <stdio.h> -#include <stdlib.h> -#include "pair_lj_sf.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neigh_list.h" -#include "memory.h" -#include "error.h" - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJShiftedForce::PairLJShiftedForce(LAMMPS *lmp) : Pair(lmp) -{ - respa_enable = 0; -} - -/* ---------------------------------------------------------------------- */ - -PairLJShiftedForce::~PairLJShiftedForce() -{ - if (allocated) { - memory->destroy(setflag); - memory->destroy(cutsq); - - memory->destroy(cut); - memory->destroy(epsilon); - memory->destroy(sigma); - memory->destroy(lj1); - memory->destroy(lj2); - memory->destroy(lj3); - memory->destroy(lj4); - memory->destroy(foffset); - memory->destroy(offset); - } -} - -/* ---------------------------------------------------------------------- */ - -void PairLJShiftedForce::compute(int eflag, int vflag) -{ - int i,j,ii,jj,inum,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; - double r,t; - int *ilist,*jlist,*numneigh,**firstneigh; - - evdwl = 0.0; - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = vflag_fdotr = 0; - - double **x = atom->x; - double **f = atom->f; - int *type = atom->type; - int nlocal = atom->nlocal; - double *special_lj = force->special_lj; - int newton_pair = force->newton_pair; - - inum = list->inum; - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; - - // loop over neighbors of my atoms - - for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - xtmp = x[i][0]; - ytmp = x[i][1]; - ztmp = x[i][2]; - itype = type[i]; - jlist = firstneigh[i]; - jnum = numneigh[i]; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; - j &= NEIGHMASK; - - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; - - if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - r = sqrt(rsq); - t = r/cut[itype][jtype]; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]) - - t*foffset[itype][jtype]; - fpair = factor_lj*forcelj*r2inv; - - f[i][0] += delx*fpair; - f[i][1] += dely*fpair; - f[i][2] += delz*fpair; - if (newton_pair || j < nlocal) { - f[j][0] -= delx*fpair; - f[j][1] -= dely*fpair; - f[j][2] -= delz*fpair; - } - - if (eflag) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + - (t-1.0)*foffset[itype][jtype] - offset[itype][jtype]; - evdwl *= factor_lj; - } - - if (evflag) ev_tally(i,j,nlocal,newton_pair, - evdwl,0.0,fpair,delx,dely,delz); - } - } - } - - if (vflag_fdotr) virial_fdotr_compute(); -} - -/* ---------------------------------------------------------------------- - allocate all arrays -------------------------------------------------------------------------- */ - -void PairLJShiftedForce::allocate() -{ - allocated = 1; - int n = atom->ntypes; - - memory->create(setflag,n+1,n+1,"pair:setflag"); - for (int i = 1; i <= n; i++) - for (int j = i; j <= n; j++) - setflag[i][j] = 0; - - memory->create(cutsq,n+1,n+1,"pair:cutsq"); - - memory->create(cut,n+1,n+1,"pair:cut"); - memory->create(epsilon,n+1,n+1,"pair:epsilon"); - memory->create(sigma,n+1,n+1,"pair:sigma"); - memory->create(lj1,n+1,n+1,"pair:lj1"); - memory->create(lj2,n+1,n+1,"pair:lj2"); - memory->create(lj3,n+1,n+1,"pair:lj3"); - memory->create(lj4,n+1,n+1,"pair:lj4"); - memory->create(foffset,n+1,n+1,"pair:foffset"); - memory->create(offset,n+1,n+1,"pair:offset"); -} - -/* ---------------------------------------------------------------------- - global settings -------------------------------------------------------------------------- */ - -void PairLJShiftedForce::settings(int narg, char **arg) -{ - if (narg != 1) error->all(FLERR,"Illegal pair_style command"); - - cut_global = force->numeric(FLERR,arg[0]); - - if (cut_global <= 0.0) - error->all(FLERR,"Illegal pair_style command"); - - // reset cutoffs that have been explicitly set - - if (allocated) { - int i,j; - for (i = 1; i <= atom->ntypes; i++) - for (j = i; j <= atom->ntypes; j++) - if (setflag[i][j]) cut[i][j] = cut_global; - } -} - -/* ---------------------------------------------------------------------- - set coeffs for one or more type pairs -------------------------------------------------------------------------- */ - -void PairLJShiftedForce::coeff(int narg, char **arg) -{ - if (narg < 4 || narg > 5) - error->all(FLERR,"Incorrect args for pair coefficients"); - if (!allocated) allocate(); - - int ilo,ihi,jlo,jhi; - force->bounds(FLERR,arg[0],atom->ntypes,ilo,ihi); - force->bounds(FLERR,arg[1],atom->ntypes,jlo,jhi); - - double epsilon_one = force->numeric(FLERR,arg[2]); - double sigma_one = force->numeric(FLERR,arg[3]); - - double cut_one = cut_global; - if (narg == 5) cut_one = force->numeric(FLERR,arg[4]); - - if (cut_one <= 0.0) - error->all(FLERR,"Incorrect args for pair coefficients"); - - int count = 0; - for (int i = ilo; i <= ihi; i++) { - for (int j = MAX(jlo,i); j <= jhi; j++) { - epsilon[i][j] = epsilon_one; - sigma[i][j] = sigma_one; - cut[i][j] = cut_one; - setflag[i][j] = 1; - count++; - } - } - - if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); -} - -/* ---------------------------------------------------------------------- - init for one type pair i,j and corresponding j,i -------------------------------------------------------------------------- */ - -double PairLJShiftedForce::init_one(int i, int j) -{ - if (setflag[i][j] == 0) { - epsilon[i][j] = mix_energy(epsilon[i][i],epsilon[j][j], - sigma[i][i],sigma[j][j]); - sigma[i][j] = mix_distance(sigma[i][i],sigma[j][j]); - cut[i][j] = mix_distance(cut[i][i],cut[j][j]); - } - - lj1[i][j] = 48.0 * epsilon[i][j] * pow(sigma[i][j],12.0); - lj2[i][j] = 24.0 * epsilon[i][j] * pow(sigma[i][j],6.0); - lj3[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],12.0); - lj4[i][j] = 4.0 * epsilon[i][j] * pow(sigma[i][j],6.0); - - double ratio = sigma[i][j] / cut[i][j]; - foffset[i][j] = 4.0 * epsilon[i][j] * (12.0 * pow(ratio,12.0) - - 6.0 * pow(ratio,6.0)); - offset[i][j] = 4.0 * epsilon[i][j] * (pow(ratio,12.0) - pow(ratio,6.0)); - - cut[j][i] = cut[i][j]; - lj1[j][i] = lj1[i][j]; - lj2[j][i] = lj2[i][j]; - lj3[j][i] = lj3[i][j]; - lj4[j][i] = lj4[i][j]; - foffset[j][i] = foffset[i][j]; - offset[j][i] = offset[i][j]; - - return cut[i][j]; -} - -/* ---------------------------------------------------------------------- - proc 0 writes to restart file -------------------------------------------------------------------------- */ - -void PairLJShiftedForce::write_restart(FILE *fp) -{ - write_restart_settings(fp); - - int i,j; - for (i = 1; i <= atom->ntypes; i++) - for (j = i; j <= atom->ntypes; j++) { - fwrite(&setflag[i][j],sizeof(int),1,fp); - if (setflag[i][j]) { - fwrite(&epsilon[i][j],sizeof(double),1,fp); - fwrite(&sigma[i][j],sizeof(double),1,fp); - fwrite(&cut[i][j],sizeof(double),1,fp); - } - } -} - -/* ---------------------------------------------------------------------- - proc 0 reads from restart file, bcasts -------------------------------------------------------------------------- */ - -void PairLJShiftedForce::read_restart(FILE *fp) -{ - read_restart_settings(fp); - allocate(); - - int i,j; - int me = comm->me; - for (i = 1; i <= atom->ntypes; i++) - for (j = i; j <= atom->ntypes; j++) { - if (me == 0) fread(&setflag[i][j],sizeof(int),1,fp); - MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); - if (setflag[i][j]) { - if (me == 0) { - fread(&epsilon[i][j],sizeof(double),1,fp); - fread(&sigma[i][j],sizeof(double),1,fp); - fread(&cut[i][j],sizeof(double),1,fp); - } - MPI_Bcast(&epsilon[i][j],1,MPI_DOUBLE,0,world); - MPI_Bcast(&sigma[i][j],1,MPI_DOUBLE,0,world); - MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world); - } - } -} - -/* ---------------------------------------------------------------------- - proc 0 writes to restart file -------------------------------------------------------------------------- */ - -void PairLJShiftedForce::write_restart_settings(FILE *fp) -{ - fwrite(&cut_global,sizeof(double),1,fp); - fwrite(&offset_flag,sizeof(int),1,fp); - fwrite(&mix_flag,sizeof(int),1,fp); -} - -/* ---------------------------------------------------------------------- - proc 0 reads from restart file, bcasts -------------------------------------------------------------------------- */ - -void PairLJShiftedForce::read_restart_settings(FILE *fp) -{ - int me = comm->me; - if (me == 0) { - fread(&cut_global,sizeof(double),1,fp); - fread(&offset_flag,sizeof(int),1,fp); - fread(&mix_flag,sizeof(int),1,fp); - } - MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world); - MPI_Bcast(&offset_flag,1,MPI_INT,0,world); - MPI_Bcast(&mix_flag,1,MPI_INT,0,world); -} - -/* ---------------------------------------------------------------------- */ - -double PairLJShiftedForce::single(int i, int j, int itype, int jtype, double rsq, - double factor_coul, double factor_lj, - double &fforce) -{ - double r2inv,r6inv,forcelj,philj,r,t; - - r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - r = sqrt(rsq); - t = r/cut[itype][jtype]; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]) - - t*foffset[itype][jtype]; - fforce = factor_lj*forcelj*r2inv; - - philj = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + - (t-1.0)*foffset[itype][jtype] - offset[itype][jtype]; - return factor_lj*philj; -} diff --git a/src/USER-MISC/pair_lj_sf.h b/src/USER-MISC/pair_lj_sf.h deleted file mode 100644 index 1a4106b782e02a7d3c31d93add94c1779925a381..0000000000000000000000000000000000000000 --- a/src/USER-MISC/pair_lj_sf.h +++ /dev/null @@ -1,53 +0,0 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/sf,PairLJShiftedForce) - -#else - -#ifndef LMP_PAIR_LJ_SF_H -#define LMP_PAIR_LJ_SF_H - -#include "pair.h" - -namespace LAMMPS_NS { - -class PairLJShiftedForce : public Pair { - public: - PairLJShiftedForce(class LAMMPS *); - virtual ~PairLJShiftedForce(); - virtual void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - double init_one(int, int); - void write_restart(FILE *); - void read_restart(FILE *); - void write_restart_settings(FILE *); - void read_restart_settings(FILE *); - double single(int, int, int, int, double, double, double, double &); - - protected: - double cut_global; - double **cut; - double **epsilon,**sigma; - double **lj1,**lj2,**lj3,**lj4,**foffset,**offset; - - void allocate(); -}; - -} - -#endif -#endif diff --git a/src/USER-NETCDF/README b/src/USER-NETCDF/README index 57dec5e4c835fd41b4b5ac7a62ac8e96e75913e0..7d7874e5acaefc5bc0b785d5304252f5e7320ba4 100644 --- a/src/USER-NETCDF/README +++ b/src/USER-NETCDF/README @@ -1,9 +1,9 @@ USER-NETCDF ============ -This package provides the netcf and netcdf/mpiio dump styles. -See the doc page for dump nc or dump nc/mpiio command for how to use them. -Compiling these dump styles requires having the netCDF library installed +This package provides the netcdf and netcdf/mpiio dump styles. +See the doc page for dump netcdf or dump netcdf/mpiio command for how to use +them. Compiling these dump styles requires having the netCDF library installed on your system. See lib/netcdf/README for additional details. PACKAGE DESCRIPTION @@ -29,11 +29,11 @@ NetCDF files can be directly visualized with the following tools: a NetCDF reader that is not present in the standard distribution of AtomEye. The person who created these files is Lars Pastewka at -Karlsruhe Institute of Technology (lars.pastewka@kit.edu). +the University of Freiburg (lars.pastewka@imtek.uni-freiburg.de). Contact him directly if you have questions. Lars Pastewka -Institute for Applied Materials (IAM) -Karlsruhe Institute of Technology (KIT) -Kaiserstrasse 12, 76131 Karlsruhe -e-mail: lars.pastewka@kit.edu +University of Freiburg +Department of Microsystems Engineering +Georges-Köhler-Allee 103, 79110 Freiburg, Germany +e-mail: lars.pastewka@imtek.uni-freiburg.de diff --git a/src/USER-NETCDF/dump_netcdf.cpp b/src/USER-NETCDF/dump_netcdf.cpp index bad90bdef3d1affe541a93c27e69856a66c3f700..b45794126d8175e876d9764738f2d582bd0f3ae0 100644 --- a/src/USER-NETCDF/dump_netcdf.cpp +++ b/src/USER-NETCDF/dump_netcdf.cpp @@ -1,25 +1,3 @@ -/* ====================================================================== - LAMMPS NetCDF dump style - https://github.com/pastewka/lammps-netcdf - Lars Pastewka, lars.pastewka@kit.edu - - Copyright (2011-2013) Fraunhofer IWM - Copyright (2014) Karlsruhe Institute of Technology - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - ====================================================================== */ - /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories @@ -33,6 +11,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Lars Pastewka (University of Freiburg) +------------------------------------------------------------------------- */ + #if defined(LMP_HAS_NETCDF) #include <unistd.h> @@ -55,11 +37,13 @@ #include "universe.h" #include "variable.h" #include "force.h" +#include "output.h" +#include "thermo.h" using namespace LAMMPS_NS; using namespace MathConst; -enum{INT,DOUBLE}; // same as in dump_custom.cpp +enum{INT,FLOAT,BIGINT}; // same as in thermo.cpp const char NC_FRAME_STR[] = "frame"; const char NC_SPATIAL_STR[] = "spatial"; @@ -208,15 +192,15 @@ DumpNetCDF::DumpNetCDF(LAMMPS *lmp, int narg, char **arg) : perat[inc].field[idim] = i; } - n_perframe = 0; - perframe = NULL; - n_buffer = 0; int_buffer = NULL; double_buffer = NULL; double_precision = false; + thermo = false; + thermovar = NULL; + framei = 0; } @@ -227,8 +211,7 @@ DumpNetCDF::~DumpNetCDF() closefile(); delete [] perat; - if (n_perframe > 0) - delete [] perframe; + if (thermovar) delete [] thermovar; if (int_buffer) memory->sfree(int_buffer); if (double_buffer) memory->sfree(double_buffer); @@ -238,6 +221,11 @@ DumpNetCDF::~DumpNetCDF() void DumpNetCDF::openfile() { + if (thermo && !singlefile_opened) { + if (thermovar) delete [] thermovar; + thermovar = new int[output->thermo->nfield]; + } + // now the computes and fixes have been initialized, so we can query // for the size of vector quantities for (int i = 0; i < n_perat; i++) { @@ -289,30 +277,30 @@ void DumpNetCDF::openfile() // dimensions NCERRX( nc_inq_dimid(ncid, NC_FRAME_STR, &frame_dim), NC_FRAME_STR ); NCERRX( nc_inq_dimid(ncid, NC_SPATIAL_STR, &spatial_dim), - NC_SPATIAL_STR ); + NC_SPATIAL_STR ); NCERRX( nc_inq_dimid(ncid, NC_VOIGT_STR, &Voigt_dim), NC_VOIGT_STR ); NCERRX( nc_inq_dimid(ncid, NC_ATOM_STR, &atom_dim), NC_ATOM_STR ); NCERRX( nc_inq_dimid(ncid, NC_CELL_SPATIAL_STR, &cell_spatial_dim), - NC_CELL_SPATIAL_STR ); + NC_CELL_SPATIAL_STR ); NCERRX( nc_inq_dimid(ncid, NC_CELL_ANGULAR_STR, &cell_angular_dim), - NC_CELL_ANGULAR_STR ); + NC_CELL_ANGULAR_STR ); NCERRX( nc_inq_dimid(ncid, NC_LABEL_STR, &label_dim), NC_LABEL_STR ); // default variables NCERRX( nc_inq_varid(ncid, NC_SPATIAL_STR, &spatial_var), - NC_SPATIAL_STR ); + NC_SPATIAL_STR ); NCERRX( nc_inq_varid(ncid, NC_CELL_SPATIAL_STR, &cell_spatial_var), - NC_CELL_SPATIAL_STR); + NC_CELL_SPATIAL_STR); NCERRX( nc_inq_varid(ncid, NC_CELL_ANGULAR_STR, &cell_angular_var), - NC_CELL_ANGULAR_STR); + NC_CELL_ANGULAR_STR); NCERRX( nc_inq_varid(ncid, NC_TIME_STR, &time_var), NC_TIME_STR ); NCERRX( nc_inq_varid(ncid, NC_CELL_ORIGIN_STR, &cell_origin_var), - NC_CELL_ORIGIN_STR ); + NC_CELL_ORIGIN_STR ); NCERRX( nc_inq_varid(ncid, NC_CELL_LENGTHS_STR, &cell_lengths_var), - NC_CELL_LENGTHS_STR); + NC_CELL_LENGTHS_STR); NCERRX( nc_inq_varid(ncid, NC_CELL_ANGLES_STR, &cell_angles_var), - NC_CELL_ANGLES_STR); + NC_CELL_ANGLES_STR); // variables specified in the input file for (int i = 0; i < n_perat; i++) { @@ -334,9 +322,12 @@ void DumpNetCDF::openfile() } // perframe variables - for (int i = 0; i < n_perframe; i++) { - NCERRX( nc_inq_varid(ncid, perframe[i].name, &perframe[i].var), - perframe[i].name ); + if (thermo) { + Thermo *th = output->thermo; + for (int i = 0; i < th->nfield; i++) { + NCERRX( nc_inq_varid(ncid, th->keyword[i], &thermovar[i]), + th->keyword[i] ); + } } size_t nframes; @@ -354,49 +345,49 @@ void DumpNetCDF::openfile() if (singlefile_opened) return; singlefile_opened = 1; - NCERRX( nc_create(filename, NC_64BIT_OFFSET, &ncid), - filename ); + NCERRX( nc_create(filename, NC_64BIT_DATA, &ncid), + filename ); // dimensions NCERRX( nc_def_dim(ncid, NC_FRAME_STR, NC_UNLIMITED, &frame_dim), - NC_FRAME_STR ); + NC_FRAME_STR ); NCERRX( nc_def_dim(ncid, NC_SPATIAL_STR, 3, &spatial_dim), - NC_SPATIAL_STR ); + NC_SPATIAL_STR ); NCERRX( nc_def_dim(ncid, NC_VOIGT_STR, 6, &Voigt_dim), - NC_VOIGT_STR ); + NC_VOIGT_STR ); NCERRX( nc_def_dim(ncid, NC_ATOM_STR, ntotalgr, &atom_dim), - NC_ATOM_STR ); + NC_ATOM_STR ); NCERRX( nc_def_dim(ncid, NC_CELL_SPATIAL_STR, 3, &cell_spatial_dim), - NC_CELL_SPATIAL_STR ); + NC_CELL_SPATIAL_STR ); NCERRX( nc_def_dim(ncid, NC_CELL_ANGULAR_STR, 3, &cell_angular_dim), - NC_CELL_ANGULAR_STR ); + NC_CELL_ANGULAR_STR ); NCERRX( nc_def_dim(ncid, NC_LABEL_STR, 10, &label_dim), - NC_LABEL_STR ); + NC_LABEL_STR ); // default variables dims[0] = spatial_dim; NCERRX( nc_def_var(ncid, NC_SPATIAL_STR, NC_CHAR, 1, dims, &spatial_var), - NC_SPATIAL_STR ); + NC_SPATIAL_STR ); NCERRX( nc_def_var(ncid, NC_CELL_SPATIAL_STR, NC_CHAR, 1, dims, - &cell_spatial_var), NC_CELL_SPATIAL_STR ); + &cell_spatial_var), NC_CELL_SPATIAL_STR ); dims[0] = spatial_dim; dims[1] = label_dim; NCERRX( nc_def_var(ncid, NC_CELL_ANGULAR_STR, NC_CHAR, 2, dims, - &cell_angular_var), NC_CELL_ANGULAR_STR ); + &cell_angular_var), NC_CELL_ANGULAR_STR ); dims[0] = frame_dim; NCERRX( nc_def_var(ncid, NC_TIME_STR, NC_DOUBLE, 1, dims, &time_var), - NC_TIME_STR); + NC_TIME_STR); dims[0] = frame_dim; dims[1] = cell_spatial_dim; NCERRX( nc_def_var(ncid, NC_CELL_ORIGIN_STR, NC_DOUBLE, 2, dims, - &cell_origin_var), NC_CELL_ORIGIN_STR ); + &cell_origin_var), NC_CELL_ORIGIN_STR ); NCERRX( nc_def_var(ncid, NC_CELL_LENGTHS_STR, NC_DOUBLE, 2, dims, - &cell_lengths_var), NC_CELL_LENGTHS_STR ); + &cell_lengths_var), NC_CELL_LENGTHS_STR ); dims[0] = frame_dim; dims[1] = cell_angular_dim; NCERRX( nc_def_var(ncid, NC_CELL_ANGLES_STR, NC_DOUBLE, 2, dims, - &cell_angles_var), NC_CELL_ANGLES_STR ); + &cell_angles_var), NC_CELL_ANGLES_STR ); // variables specified in the input file dims[0] = frame_dim; @@ -423,17 +414,17 @@ void DumpNetCDF::openfile() // this is a tensor in Voigt notation dims[2] = Voigt_dim; NCERRX( nc_def_var(ncid, perat[i].name, xtype, 2, dims+1, - &perat[i].var), perat[i].name ); + &perat[i].var), perat[i].name ); } else if (perat[i].dims == 3) { // this is a vector, we need to store x-, y- and z-coordinates dims[2] = spatial_dim; NCERRX( nc_def_var(ncid, perat[i].name, xtype, 2, dims+1, - &perat[i].var), perat[i].name ); + &perat[i].var), perat[i].name ); } else if (perat[i].dims == 1) { NCERRX( nc_def_var(ncid, perat[i].name, xtype, 1, dims+1, - &perat[i].var), perat[i].name ); + &perat[i].var), perat[i].name ); } else { char errstr[1024]; @@ -448,17 +439,17 @@ void DumpNetCDF::openfile() // this is a tensor in Voigt notation dims[2] = Voigt_dim; NCERRX( nc_def_var(ncid, perat[i].name, xtype, 3, dims, - &perat[i].var), perat[i].name ); + &perat[i].var), perat[i].name ); } else if (perat[i].dims == 3) { // this is a vector, we need to store x-, y- and z-coordinates dims[2] = spatial_dim; NCERRX( nc_def_var(ncid, perat[i].name, xtype, 3, dims, - &perat[i].var), perat[i].name ); + &perat[i].var), perat[i].name ); } else if (perat[i].dims == 1) { NCERRX( nc_def_var(ncid, perat[i].name, xtype, 2, dims, - &perat[i].var), perat[i].name ); + &perat[i].var), perat[i].name ); } else { char errstr[1024]; @@ -471,14 +462,21 @@ void DumpNetCDF::openfile() } // perframe variables - for (int i = 0; i < n_perframe; i++) { - if (perframe[i].type == THIS_IS_A_BIGINT) { - NCERRX( nc_def_var(ncid, perframe[i].name, NC_LONG, 1, dims, - &perframe[i].var), perframe[i].name ); - } - else { - NCERRX( nc_def_var(ncid, perframe[i].name, NC_DOUBLE, 1, dims, - &perframe[i].var), perframe[i].name ); + if (thermo) { + Thermo *th = output->thermo; + for (int i = 0; i < th->nfield; i++) { + if (th->vtype[i] == FLOAT) { + NCERRX( nc_def_var(ncid, th->keyword[i], NC_DOUBLE, 1, dims, + &thermovar[i]), th->keyword[i] ); + } + else if (th->vtype[i] == INT) { + NCERRX( nc_def_var(ncid, th->keyword[i], NC_INT, 1, dims, + &thermovar[i]), th->keyword[i] ); + } + else if (th->vtype[i] == BIGINT) { + NCERRX( nc_def_var(ncid, th->keyword[i], NC_LONG, 1, dims, + &thermovar[i]), th->keyword[i] ); + } } } @@ -622,46 +620,30 @@ void DumpNetCDF::write() start[0] = framei-1; start[1] = 0; - for (int i = 0; i < n_perframe; i++) { - - if (perframe[i].type == THIS_IS_A_BIGINT) { - bigint data; - (this->*perframe[i].compute)((void*) &data); - - if (filewriter) + if (thermo) { + Thermo *th = output->thermo; + for (int i = 0; i < th->nfield; i++) { + th->call_vfunc(i); + if (filewriter) { + if (th->vtype[i] == FLOAT) { + NCERRX( nc_put_var1_double(ncid, thermovar[i], start, + &th->dvalue), + th->keyword[i] ); + } + else if (th->vtype[i] == INT) { + NCERRX( nc_put_var1_int(ncid, thermovar[i], start, &th->ivalue), + th->keyword[i] ); + } + else if (th->vtype[i] == BIGINT) { #if defined(LAMMPS_SMALLBIG) || defined(LAMMPS_BIGBIG) - NCERR( nc_put_var1_long(ncid, perframe[i].var, start, &data) ); + NCERRX( nc_put_var1_long(ncid, thermovar[i], start, &th->bivalue), + th->keyword[i] ); #else - NCERR( nc_put_var1_int(ncid, perframe[i].var, start, &data) ); + NCERRX( nc_put_var1_int(ncid, thermovar[i], start, &th->bivalue), + th->keyword[i] ); #endif - } - else { - double data; - int j = perframe[i].index; - int idim = perframe[i].dim; - - if (perframe[i].type == THIS_IS_A_COMPUTE) { - if (idim >= 0) { - modify->compute[j]->compute_vector(); - data = modify->compute[j]->vector[idim]; - } - else - data = modify->compute[j]->compute_scalar(); - } - else if (perframe[i].type == THIS_IS_A_FIX) { - if (idim >= 0) { - data = modify->fix[j]->compute_vector(idim); } - else - data = modify->fix[j]->compute_scalar(); } - else if (perframe[i].type == THIS_IS_A_VARIABLE) { - j = input->variable->find(perframe[i].id); - data = input->variable->compute_equal(j); - } - - if (filewriter) - NCERR( nc_put_var1_double(ncid, perframe[i].var, start, &data) ); } } @@ -908,126 +890,19 @@ int DumpNetCDF::modify_param(int narg, char **arg) iarg++; return 2; } - else if (strcmp(arg[iarg],"global") == 0) { - // "perframe" quantities, i.e. not per-atom stuff - + else if (strcmp(arg[iarg],"thermo") == 0) { iarg++; - - n_perframe = narg-iarg; - perframe = new nc_perframe_t[n_perframe]; - - for (int i = 0; iarg < narg; iarg++, i++) { - int n; - char *suffix=NULL; - - if (!strcmp(arg[iarg],"step")) { - perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNetCDF::compute_step; - strcpy(perframe[i].name, arg[iarg]); - } - else if (!strcmp(arg[iarg],"elapsed")) { - perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNetCDF::compute_elapsed; - strcpy(perframe[i].name, arg[iarg]); - } - else if (!strcmp(arg[iarg],"elaplong")) { - perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNetCDF::compute_elapsed_long; - strcpy(perframe[i].name, arg[iarg]); - } - else { - - n = strlen(arg[iarg]); - - if (n > 2) { - suffix = new char[n-1]; - strcpy(suffix, arg[iarg]+2); - } - else { - char errstr[1024]; - sprintf(errstr, "perframe quantity '%s' must thermo quantity or " - "compute, fix or variable", arg[iarg]); - error->all(FLERR,errstr); - } - - if (!strncmp(arg[iarg], "c_", 2)) { - int idim = -1; - char *ptr = strchr(suffix, '['); - - if (ptr) { - if (suffix[strlen(suffix)-1] != ']') - error->all(FLERR,"Missing ']' in dump modify command"); - *ptr = '\0'; - idim = ptr[1] - '1'; - } - - n = modify->find_compute(suffix); - if (n < 0) - error->all(FLERR,"Could not find dump modify compute ID"); - if (modify->compute[n]->peratom_flag != 0) - error->all(FLERR,"Dump modify compute ID computes per-atom info"); - if (idim >= 0 && modify->compute[n]->vector_flag == 0) - error->all(FLERR,"Dump modify compute ID does not compute vector"); - if (idim < 0 && modify->compute[n]->scalar_flag == 0) - error->all(FLERR,"Dump modify compute ID does not compute scalar"); - - perframe[i].type = THIS_IS_A_COMPUTE; - perframe[i].dim = idim; - perframe[i].index = n; - strcpy(perframe[i].name, arg[iarg]); - } - else if (!strncmp(arg[iarg], "f_", 2)) { - int idim = -1; - char *ptr = strchr(suffix, '['); - - if (ptr) { - if (suffix[strlen(suffix)-1] != ']') - error->all(FLERR,"Missing ']' in dump modify command"); - *ptr = '\0'; - idim = ptr[1] - '1'; - } - - n = modify->find_fix(suffix); - if (n < 0) - error->all(FLERR,"Could not find dump modify fix ID"); - if (modify->fix[n]->peratom_flag != 0) - error->all(FLERR,"Dump modify fix ID computes per-atom info"); - if (idim >= 0 && modify->fix[n]->vector_flag == 0) - error->all(FLERR,"Dump modify fix ID does not compute vector"); - if (idim < 0 && modify->fix[n]->scalar_flag == 0) - error->all(FLERR,"Dump modify fix ID does not compute vector"); - - perframe[i].type = THIS_IS_A_FIX; - perframe[i].dim = idim; - perframe[i].index = n; - strcpy(perframe[i].name, arg[iarg]); - } - else if (!strncmp(arg[iarg], "v_", 2)) { - n = input->variable->find(suffix); - if (n < 0) - error->all(FLERR,"Could not find dump modify variable ID"); - if (!input->variable->equalstyle(n)) - error->all(FLERR,"Dump modify variable must be of style equal"); - - perframe[i].type = THIS_IS_A_VARIABLE; - perframe[i].dim = 1; - perframe[i].index = n; - strcpy(perframe[i].name, arg[iarg]); - strcpy(perframe[i].id, suffix); - } - else { - char errstr[1024]; - sprintf(errstr, "perframe quantity '%s' must be compute, fix or " - "variable", arg[iarg]); - error->all(FLERR,errstr); - } - - delete [] suffix; - - } + if (iarg >= narg) + error->all(FLERR,"expected 'yes' or 'no' after 'thermo' keyword."); + if (strcmp(arg[iarg],"yes") == 0) { + thermo = true; } - - return narg; + else if (strcmp(arg[iarg],"no") == 0) { + thermo = false; + } + else error->all(FLERR,"expected 'yes' or 'no' after 'thermo' keyword."); + iarg++; + return 2; } else return 0; } @@ -1101,41 +976,14 @@ void DumpNetCDF::ncerr(int err, const char *descr, int line) char errstr[1024]; if (descr) { sprintf(errstr, "NetCDF failed with error '%s' (while accessing '%s') " - " in line %i of %s.", nc_strerror(err), descr, line, __FILE__); + " in line %i of %s.", nc_strerror(err), descr, line, __FILE__); } else { sprintf(errstr, "NetCDF failed with error '%s' in line %i of %s.", - nc_strerror(err), line, __FILE__); + nc_strerror(err), line, __FILE__); } error->one(FLERR,errstr); } } -/* ---------------------------------------------------------------------- - one method for every keyword thermo can output - called by compute() or evaluate_keyword() - compute will have already been called - set ivalue/dvalue/bivalue if value is int/double/bigint - customize a new keyword by adding a method -------------------------------------------------------------------------- */ - -void DumpNetCDF::compute_step(void *r) -{ - *((bigint *) r) = update->ntimestep; -} - -/* ---------------------------------------------------------------------- */ - -void DumpNetCDF::compute_elapsed(void *r) -{ - *((bigint *) r) = update->ntimestep - update->firststep; -} - -/* ---------------------------------------------------------------------- */ - -void DumpNetCDF::compute_elapsed_long(void *r) -{ - *((bigint *) r) = update->ntimestep - update->beginstep; -} - #endif /* defined(LMP_HAS_NETCDF) */ diff --git a/src/USER-NETCDF/dump_netcdf.h b/src/USER-NETCDF/dump_netcdf.h index daf4e9d0de2d94151c6f0bad6b9e348171e48b82..036df3f058398a53c9f55df4f1fab79f7a477b8e 100644 --- a/src/USER-NETCDF/dump_netcdf.h +++ b/src/USER-NETCDF/dump_netcdf.h @@ -1,25 +1,3 @@ -/* ====================================================================== - LAMMPS NetCDF dump style - https://github.com/pastewka/lammps-netcdf - Lars Pastewka, lars.pastewka@kit.edu - - Copyright (2011-2013) Fraunhofer IWM - Copyright (2014) Karlsruhe Institute of Technology - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - ====================================================================== */ - /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories @@ -33,6 +11,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Lars Pastewka (University of Freiburg) +------------------------------------------------------------------------- */ + #if defined(LMP_HAS_NETCDF) #ifdef DUMP_CLASS @@ -69,22 +51,6 @@ class DumpNetCDF : public DumpCustom { int ndumped; // number of enties written for this prop. }; - typedef void (DumpNetCDF::*funcptr_t)(void *); - - // per-frame quantities (variables, fixes or computes) - struct nc_perframe_t { - char name[NC_FIELD_NAME_MAX]; // field name - int var; // NetCDF variable - int type; // variable, fix, compute or callback - int index; // index in fix/compute list - funcptr_t compute; // compute function - int dim; // dimension - char id[NC_FIELD_NAME_MAX]; // variable id - - bigint bigint_data; // actual data - double double_data; // actual data - }; - int framei; // current frame index int blocki; // current block index int ndata; // number of data blocks to expect @@ -94,10 +60,10 @@ class DumpNetCDF : public DumpCustom { int n_perat; // # of netcdf per-atom properties nc_perat_t *perat; // per-atom properties - int n_perframe; // # of global netcdf (not per-atom) fix props - nc_perframe_t *perframe; // global properties + int *thermovar; // NetCDF variables for thermo output bool double_precision; // write everything as double precision + bool thermo; // write thermo output to netcdf file bigint n_buffer; // size of buffer int *int_buffer; // buffer for passing data to netcdf @@ -131,10 +97,6 @@ class DumpNetCDF : public DumpCustom { virtual int modify_param(int, char **); void ncerr(int, const char *, int); - - void compute_step(void *); - void compute_elapsed(void *); - void compute_elapsed_long(void *); }; } diff --git a/src/USER-NETCDF/dump_netcdf_mpiio.cpp b/src/USER-NETCDF/dump_netcdf_mpiio.cpp index 2e9ec274a5e07cc04bddb7644aa87d54353b09c6..c5b87b178e2a58bf2b195a81365ec4352f168613 100644 --- a/src/USER-NETCDF/dump_netcdf_mpiio.cpp +++ b/src/USER-NETCDF/dump_netcdf_mpiio.cpp @@ -1,25 +1,3 @@ -/* ====================================================================== - LAMMPS NetCDF dump style - https://github.com/pastewka/lammps-netcdf - Lars Pastewka, lars.pastewka@kit.edu - - Copyright (2011-2013) Fraunhofer IWM - Copyright (2014) Karlsruhe Institute of Technology - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - ====================================================================== */ - /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories @@ -33,6 +11,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Lars Pastewka (University of Freiburg) +------------------------------------------------------------------------- */ + #if defined(LMP_HAS_PNETCDF) #include <unistd.h> @@ -55,11 +37,13 @@ #include "universe.h" #include "variable.h" #include "force.h" +#include "output.h" +#include "thermo.h" using namespace LAMMPS_NS; using namespace MathConst; -enum{INT,DOUBLE}; // same as in dump_custom.cpp +enum{INT,FLOAT,BIGINT}; // same as in thermo.cpp const char NC_FRAME_STR[] = "frame"; const char NC_SPATIAL_STR[] = "spatial"; @@ -201,15 +185,15 @@ DumpNetCDFMPIIO::DumpNetCDFMPIIO(LAMMPS *lmp, int narg, char **arg) : perat[inc].field[idim] = i; } - n_perframe = 0; - perframe = NULL; - n_buffer = 0; int_buffer = NULL; double_buffer = NULL; double_precision = false; + thermo = false; + thermovar = NULL; + framei = 0; } @@ -220,8 +204,7 @@ DumpNetCDFMPIIO::~DumpNetCDFMPIIO() closefile(); delete [] perat; - if (n_perframe > 0) - delete [] perframe; + if (thermovar) delete [] thermovar; if (int_buffer) memory->sfree(int_buffer); if (double_buffer) memory->sfree(double_buffer); @@ -231,6 +214,11 @@ DumpNetCDFMPIIO::~DumpNetCDFMPIIO() void DumpNetCDFMPIIO::openfile() { + if (thermo && !singlefile_opened) { + if (thermovar) delete [] thermovar; + thermovar = new int[output->thermo->nfield]; + } + // now the computes and fixes have been initialized, so we can query // for the size of vector quantities for (int i = 0; i < n_perat; i++) { @@ -330,9 +318,12 @@ void DumpNetCDFMPIIO::openfile() } // perframe variables - for (int i = 0; i < n_perframe; i++) { - NCERRX( ncmpi_inq_varid(ncid, perframe[i].name, &perframe[i].var), - perframe[i].name ); + if (thermo) { + Thermo *th = output->thermo; + for (int i = 0; i < th->nfield; i++) { + NCERRX( ncmpi_inq_varid(ncid, th->keyword[i], &thermovar[i]), + th->keyword[i] ); + } } MPI_Offset nframes; @@ -350,7 +341,7 @@ void DumpNetCDFMPIIO::openfile() if (singlefile_opened) return; singlefile_opened = 1; - NCERRX( ncmpi_create(MPI_COMM_WORLD, filename, NC_64BIT_OFFSET, + NCERRX( ncmpi_create(MPI_COMM_WORLD, filename, NC_64BIT_DATA, MPI_INFO_NULL, &ncid), filename ); // dimensions @@ -439,14 +430,21 @@ void DumpNetCDFMPIIO::openfile() } // perframe variables - for (int i = 0; i < n_perframe; i++) { - if (perframe[i].type == THIS_IS_A_BIGINT) { - NCERRX( ncmpi_def_var(ncid, perframe[i].name, NC_INT, 1, dims, - &perframe[i].var), perframe[i].name ); - } - else { - NCERRX( ncmpi_def_var(ncid, perframe[i].name, NC_DOUBLE, 1, dims, - &perframe[i].var), perframe[i].name ); + if (thermo) { + Thermo *th = output->thermo; + for (int i = 0; i < th->nfield; i++) { + if (th->vtype[i] == FLOAT) { + NCERRX( ncmpi_def_var(ncid, th->keyword[i], NC_DOUBLE, 1, dims, + &thermovar[i]), th->keyword[i] ); + } + else if (th->vtype[i] == INT) { + NCERRX( ncmpi_def_var(ncid, th->keyword[i], NC_INT, 1, dims, + &thermovar[i]), th->keyword[i] ); + } + else if (th->vtype[i] == BIGINT) { + NCERRX( ncmpi_def_var(ncid, th->keyword[i], NC_LONG, 1, dims, + &thermovar[i]), th->keyword[i] ); + } } } @@ -600,50 +598,34 @@ void DumpNetCDFMPIIO::write() NCERR( ncmpi_begin_indep_data(ncid) ); - for (int i = 0; i < n_perframe; i++) { - - if (perframe[i].type == THIS_IS_A_BIGINT) { - bigint data; - (this->*perframe[i].compute)((void*) &data); - - if (filewriter) + if (thermo) { + Thermo *th = output->thermo; + for (int i = 0; i < th->nfield; i++) { + th->call_vfunc(i); + if (filewriter) { + if (th->vtype[i] == FLOAT) { + NCERRX( ncmpi_put_var1_double(ncid, thermovar[i], start, + &th->dvalue), + th->keyword[i] ); + } + else if (th->vtype[i] == INT) { + NCERRX( ncmpi_put_var1_int(ncid, thermovar[i], start, &th->ivalue), + th->keyword[i] ); + } + else if (th->vtype[i] == BIGINT) { #if defined(LAMMPS_SMALLBIG) || defined(LAMMPS_BIGBIG) - NCERR( ncmpi_put_var1_long(ncid, perframe[i].var, start, &data) ); + NCERRX( ncmpi_put_var1_long(ncid, thermovar[i], start, &th->bivalue), + th->keyword[i] ); #else - NCERR( ncmpi_put_var1_int(ncid, perframe[i].var, start, &data) ); + NCERRX( ncmpi_put_var1_int(ncid, thermovar[i], start, &th->bivalue), + th->keyword[i] ); #endif - } - else { - double data; - int j = perframe[i].index; - int idim = perframe[i].dim; - - if (perframe[i].type == THIS_IS_A_COMPUTE) { - if (idim >= 0) { - modify->compute[j]->compute_vector(); - data = modify->compute[j]->vector[idim]; - } - else - data = modify->compute[j]->compute_scalar(); - } - else if (perframe[i].type == THIS_IS_A_FIX) { - if (idim >= 0) { - data = modify->fix[j]->compute_vector(idim); } - else - data = modify->fix[j]->compute_scalar(); } - else if (perframe[i].type == THIS_IS_A_VARIABLE) { - j = input->variable->find(perframe[i].id); - data = input->variable->compute_equal(j); - } - - if (filewriter) - NCERR( ncmpi_put_var1_double(ncid, perframe[i].var, start, &data) ); } } - // write timestep header + // write timestep header write_time_and_cell(); @@ -903,126 +885,19 @@ int DumpNetCDFMPIIO::modify_param(int narg, char **arg) iarg++; return 2; } - else if (strcmp(arg[iarg],"global") == 0) { - // "perframe" quantities, i.e. not per-atom stuff - + else if (strcmp(arg[iarg],"thermo") == 0) { iarg++; - - n_perframe = narg-iarg; - perframe = new nc_perframe_t[n_perframe]; - - for (int i = 0; iarg < narg; iarg++, i++) { - int n; - char *suffix; - - if (!strcmp(arg[iarg],"step")) { - perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNetCDFMPIIO::compute_step; - strcpy(perframe[i].name, arg[iarg]); - } - else if (!strcmp(arg[iarg],"elapsed")) { - perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNetCDFMPIIO::compute_elapsed; - strcpy(perframe[i].name, arg[iarg]); - } - else if (!strcmp(arg[iarg],"elaplong")) { - perframe[i].type = THIS_IS_A_BIGINT; - perframe[i].compute = &DumpNetCDFMPIIO::compute_elapsed_long; - strcpy(perframe[i].name, arg[iarg]); - } - else { - - n = strlen(arg[iarg]); - - if (n > 2) { - suffix = new char[n-1]; - strcpy(suffix, arg[iarg]+2); - } - else { - char errstr[1024]; - sprintf(errstr, "perframe quantity '%s' must thermo quantity or " - "compute, fix or variable", arg[iarg]); - error->all(FLERR,errstr); - } - - if (!strncmp(arg[iarg], "c_", 2)) { - int idim = -1; - char *ptr = strchr(suffix, '['); - - if (ptr) { - if (suffix[strlen(suffix)-1] != ']') - error->all(FLERR,"Missing ']' in dump modify command"); - *ptr = '\0'; - idim = ptr[1] - '1'; - } - - n = modify->find_compute(suffix); - if (n < 0) - error->all(FLERR,"Could not find dump modify compute ID"); - if (modify->compute[n]->peratom_flag != 0) - error->all(FLERR,"Dump modify compute ID computes per-atom info"); - if (idim >= 0 && modify->compute[n]->vector_flag == 0) - error->all(FLERR,"Dump modify compute ID does not compute vector"); - if (idim < 0 && modify->compute[n]->scalar_flag == 0) - error->all(FLERR,"Dump modify compute ID does not compute scalar"); - - perframe[i].type = THIS_IS_A_COMPUTE; - perframe[i].dim = idim; - perframe[i].index = n; - strcpy(perframe[i].name, arg[iarg]); - } - else if (!strncmp(arg[iarg], "f_", 2)) { - int idim = -1; - char *ptr = strchr(suffix, '['); - - if (ptr) { - if (suffix[strlen(suffix)-1] != ']') - error->all(FLERR,"Missing ']' in dump modify command"); - *ptr = '\0'; - idim = ptr[1] - '1'; - } - - n = modify->find_fix(suffix); - if (n < 0) - error->all(FLERR,"Could not find dump modify fix ID"); - if (modify->fix[n]->peratom_flag != 0) - error->all(FLERR,"Dump modify fix ID computes per-atom info"); - if (idim >= 0 && modify->fix[n]->vector_flag == 0) - error->all(FLERR,"Dump modify fix ID does not compute vector"); - if (idim < 0 && modify->fix[n]->scalar_flag == 0) - error->all(FLERR,"Dump modify fix ID does not compute vector"); - - perframe[i].type = THIS_IS_A_FIX; - perframe[i].dim = idim; - perframe[i].index = n; - strcpy(perframe[i].name, arg[iarg]); - } - else if (!strncmp(arg[iarg], "v_", 2)) { - n = input->variable->find(suffix); - if (n < 0) - error->all(FLERR,"Could not find dump modify variable ID"); - if (!input->variable->equalstyle(n)) - error->all(FLERR,"Dump modify variable must be of style equal"); - - perframe[i].type = THIS_IS_A_VARIABLE; - perframe[i].dim = 1; - perframe[i].index = n; - strcpy(perframe[i].name, arg[iarg]); - strcpy(perframe[i].id, suffix); - } - else { - char errstr[1024]; - sprintf(errstr, "perframe quantity '%s' must be compute, fix or " - "variable", arg[iarg]); - error->all(FLERR,errstr); - } - - delete [] suffix; - - } + if (iarg >= narg) + error->all(FLERR,"expected 'yes' or 'no' after 'thermo' keyword."); + if (strcmp(arg[iarg],"yes") == 0) { + thermo = true; } - - return narg; + else if (strcmp(arg[iarg],"no") == 0) { + thermo = false; + } + else error->all(FLERR,"expected 'yes' or 'no' after 'thermo' keyword."); + iarg++; + return 2; } else return 0; } @@ -1044,31 +919,4 @@ void DumpNetCDFMPIIO::ncerr(int err, const char *descr, int line) } } -/* ---------------------------------------------------------------------- - one method for every keyword thermo can output - called by compute() or evaluate_keyword() - compute will have already been called - set ivalue/dvalue/bivalue if value is int/double/bigint - customize a new keyword by adding a method -------------------------------------------------------------------------- */ - -void DumpNetCDFMPIIO::compute_step(void *r) -{ - *((bigint *) r) = update->ntimestep; -} - -/* ---------------------------------------------------------------------- */ - -void DumpNetCDFMPIIO::compute_elapsed(void *r) -{ - *((bigint *) r) = update->ntimestep - update->firststep; -} - -/* ---------------------------------------------------------------------- */ - -void DumpNetCDFMPIIO::compute_elapsed_long(void *r) -{ - *((bigint *) r) = update->ntimestep - update->beginstep; -} - #endif /* defined(LMP_HAS_PNETCDF) */ diff --git a/src/USER-NETCDF/dump_netcdf_mpiio.h b/src/USER-NETCDF/dump_netcdf_mpiio.h index 6f5b00b03350f6a08c9c603a01cbae8c49f90e7b..10b0e800d2347d561d42073c1b3c196275fd9ca5 100644 --- a/src/USER-NETCDF/dump_netcdf_mpiio.h +++ b/src/USER-NETCDF/dump_netcdf_mpiio.h @@ -1,25 +1,3 @@ -/* ====================================================================== - LAMMPS NetCDF dump style - https://github.com/pastewka/lammps-netcdf - Lars Pastewka, lars.pastewka@kit.edu - - Copyright (2011-2013) Fraunhofer IWM - Copyright (2014) Karlsruhe Institute of Technology - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - ====================================================================== */ - /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories @@ -33,6 +11,10 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + Contributing author: Lars Pastewka (University of Freiburg) +------------------------------------------------------------------------- */ + #if defined(LMP_HAS_PNETCDF) #ifdef DUMP_CLASS @@ -66,21 +48,7 @@ class DumpNetCDFMPIIO : public DumpCustom { int var; // NetCDF variable }; - typedef void (DumpNCMPIIO::*funcptr_t)(void *); - - // per-frame quantities (variables, fixes or computes) - struct nc_perframe_t { - char name[NC_MPIIO_FIELD_NAME_MAX]; // field name - int var; // NetCDF variable - int type; // variable, fix, compute or callback - int index; // index in fix/compute list - funcptr_t compute; // compute function - int dim; // dimension - char id[NC_MPIIO_FIELD_NAME_MAX]; // variable id - - bigint bigint_data; // actual data - double double_data; // actual data - }; + typedef void (DumpNetCDFMPIIO::*funcptr_t)(void *); int framei; // current frame index int blocki; // current block index @@ -91,10 +59,10 @@ class DumpNetCDFMPIIO : public DumpCustom { int n_perat; // # of netcdf per-atom properties nc_perat_t *perat; // per-atom properties - int n_perframe; // # of global netcdf (not per-atom) fix props - nc_perframe_t *perframe; // global properties + int *thermovar; // NetCDF variables for thermo output bool double_precision; // write everything as double precision + bool thermo; // write thermo output to netcdf file bigint n_buffer; // size of buffer int *int_buffer; // buffer for passing data to netcdf @@ -128,10 +96,6 @@ class DumpNetCDFMPIIO : public DumpCustom { virtual int modify_param(int, char **); void ncerr(int, const char *, int); - - void compute_step(void *); - void compute_elapsed(void *); - void compute_elapsed_long(void *); }; } diff --git a/src/USER-OMP/angle_dipole_omp.cpp b/src/USER-OMP/angle_dipole_omp.cpp index 9a646e04b0a4c1d0f1a1e30352d8cff8bb61a49c..f582ce4c41178760510e6c519558071d6f41a810 100644 --- a/src/USER-OMP/angle_dipole_omp.cpp +++ b/src/USER-OMP/angle_dipole_omp.cpp @@ -122,14 +122,14 @@ void AngleDipoleOMP::eval(int nfrom, int nto, ThrData * const thr) delTx = tangle * (dely*mu[iDip][2] - delz*mu[iDip][1]); delTy = tangle * (delz*mu[iDip][0] - delx*mu[iDip][2]); delTz = tangle * (delx*mu[iDip][1] - dely*mu[iDip][0]); - + torque[iDip][0] += delTx; torque[iDip][1] += delTy; torque[iDip][2] += delTz; // Force couple that counterbalances dipolar torque fx = dely*delTz - delz*delTy; // direction (fi): - r x (-T) - fy = delz*delTx - delx*delTz; + fy = delz*delTx - delx*delTz; fz = delx*delTy - dely*delTx; fmod = sqrt(delTx*delTx + delTy*delTy + delTz*delTz) / r; // magnitude @@ -142,11 +142,11 @@ void AngleDipoleOMP::eval(int nfrom, int nto, ThrData * const thr) fj[0] = -fi[0]; fj[1] = -fi[1]; fj[2] = -fi[2]; - + f[iDip][0] += fj[0]; f[iDip][1] += fj[1]; f[iDip][2] += fj[2]; - + f[iRef][0] += fi[0]; f[iRef][1] += fi[1]; f[iRef][2] += fi[2]; diff --git a/src/USER-OMP/fix_qeq_reax_omp.cpp b/src/USER-OMP/fix_qeq_reax_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3ca193be74d3bb1f46d6ee411928971000e6caa0 --- /dev/null +++ b/src/USER-OMP/fix_qeq_reax_omp.cpp @@ -0,0 +1,1161 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Hasan Metin Aktulga, Purdue University + (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov) + + Hybrid and sub-group capabilities: Ray Shan (Sandia) +------------------------------------------------------------------------- */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "fix_qeq_reax_omp.h" +#include "pair_reaxc_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "update.h" +#include "force.h" +#include "group.h" +#include "pair.h" +#include "respa.h" +#include "memory.h" +#include "citeme.h" +#include "error.h" +#include "reaxc_defs.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +#define EV_TO_KCAL_PER_MOL 14.4 +//#define DANGER_ZONE 0.95 +//#define LOOSE_ZONE 0.7 +#define SQR(x) ((x)*(x)) +#define CUBE(x) ((x)*(x)*(x)) +#define MIN_NBRS 100 + +/* ---------------------------------------------------------------------- */ + +FixQEqReaxOMP::FixQEqReaxOMP(LAMMPS *lmp, int narg, char **arg) : + FixQEqReax(lmp, narg, arg) +{ + if (narg<8 || narg>9) error->all(FLERR,"Illegal fix qeq/reax/omp command"); + + b_temp = NULL; + + // ASPC: Kolafa, J. Comp. Chem., 25(3), 335 (2003) + do_aspc = 0; + aspc_order = 1; + // Must be consistent with nprev to store history: nprev = aspc_order + 2 + aspc_order_max = nprev - 2; + aspc_omega = 0.0; + aspc_b = NULL; +} + +FixQEqReaxOMP::~FixQEqReaxOMP() +{ + memory->destroy(b_temp); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::post_constructor() +{ + pertype_parameters(pertype_option); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::allocate_storage() +{ + FixQEqReax::allocate_storage(); + + // dual CG support + int size = nmax; + if (dual_enabled) size*= 2; + memory->create(b_temp, comm->nthreads, size, "qeq/reax/omp:b_temp"); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::deallocate_storage() +{ + memory->destroy(b_temp); + + FixQEqReax::deallocate_storage(); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::init() +{ + FixQEqReax::init(); + + // APSC setup + if (do_aspc) { + memory->create(aspc_b, aspc_order_max+2, "qeq/reax/aspc_b"); + + // Calculate damping factor + double o = double(aspc_order); + aspc_omega = (o+2.0) / (2*o+3.0); + + // Calculate B coefficients + double c = (4.0 * o + 6.0) / (o + 3.0); + aspc_b[0] = c; + + double n = 1.0; + double d = 4.0; + double s = -1.0; + double f = 2.0; + + for (int i=1; i<aspc_order_max+2; i++) { + c*= (o + n) / (o + d); + aspc_b[i] = s * f * c; + + s *= -1.0; + f += 1.0; + n -= 1.0; + d += 1.0; + } + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::compute_H() +{ + int inum, *ilist, *numneigh, **firstneigh; + double SMALL = 0.0001; + + int *type = atom->type; + tagint * tag = atom->tag; + double **x = atom->x; + int *mask = atom->mask; + + if (reaxc) { + inum = reaxc->list->inum; + ilist = reaxc->list->ilist; + numneigh = reaxc->list->numneigh; + firstneigh = reaxc->list->firstneigh; + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + } + int ai, num_nbrs; + + // sumscan of the number of neighbors per atom to determine the offsets + // most likely, we are overallocating. desirable to work on this part + // to reduce the memory footprint of the far_nbrs list. + + num_nbrs = 0; + + for (int itr_i = 0; itr_i < inum; ++itr_i) { + ai = ilist[itr_i]; + H.firstnbr[ai] = num_nbrs; + num_nbrs += numneigh[ai]; + } + + // fill in the H matrix + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int i, j, ii, jj, mfill, jnum, flag; + int *jlist; + double dx, dy, dz, r_sqr; + + mfill = 0; + +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + if (mask[i] & groupbit) { + jlist = firstneigh[i]; + jnum = numneigh[i]; + mfill = H.firstnbr[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + + dx = x[j][0] - x[i][0]; + dy = x[j][1] - x[i][1]; + dz = x[j][2] - x[i][2]; + r_sqr = SQR(dx) + SQR(dy) + SQR(dz); + + flag = 0; + if (r_sqr <= SQR(swb)) { + if (j < n) flag = 1; + else if (tag[i] < tag[j]) flag = 1; + else if (tag[i] == tag[j]) { + if (dz > SMALL) flag = 1; + else if (fabs(dz) < SMALL) { + if (dy > SMALL) flag = 1; + else if (fabs(dy) < SMALL && dx > SMALL) flag = 1; + } + } + } + + if (flag) { + H.jlist[mfill] = j; + H.val[mfill] = calculate_H( sqrt(r_sqr), shld[type[i]][type[j]] ); + mfill++; + } + } + + H.numnbrs[i] = mfill - H.firstnbr[i]; + } + } + + if (mfill >= H.m) { + char str[128]; + sprintf(str,"H matrix size has been exceeded: mfill=%d H.m=%d\n", + mfill, H.m); + error->warning(FLERR,str); + error->all(FLERR,"Fix qeq/reax/omp has insufficient QEq matrix size"); + } + } // omp + +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::init_storage() +{ + int NN; + + if (reaxc) NN = reaxc->list->inum + reaxc->list->gnum; + else NN = list->inum + list->gnum; + +#if defined(_OPENMP) +#pragma omp parallel for schedule(static) +#endif + for (int i = 0; i < NN; i++) { + Hdia_inv[i] = 1. / eta[atom->type[i]]; + b_s[i] = -chi[atom->type[i]]; + b_t[i] = -1.0; + b_prc[i] = 0; + b_prm[i] = 0; + s[i] = t[i] = 0; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::pre_force(int vflag) +{ + +#ifdef OMP_TIMING + double endTimeBase, startTimeBase, funcstartTimeBase; + funcstartTimeBase = MPI_Wtime(); +#endif + + double t_start, t_end; + + if (update->ntimestep % nevery) return; + if (comm->me == 0) t_start = MPI_Wtime(); + + n = atom->nlocal; + N = atom->nlocal + atom->nghost; + + // grow arrays if necessary + // need to be atom->nmax in length + + if (atom->nmax > nmax) reallocate_storage(); + if (n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE) + reallocate_matrix(); + +#ifdef OMP_TIMING + startTimeBase = MPI_Wtime(); +#endif + + init_matvec(); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEINITMVINDEX] += (endTimeBase-startTimeBase); + startTimeBase = endTimeBase; +#endif + + if (dual_enabled) { + matvecs = dual_CG(b_s, b_t, s, t); // OMP_TIMING inside dual_CG + } else { + matvecs_s = CG(b_s, s); // CG on s - parallel + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTECG1INDEX] += (endTimeBase-startTimeBase); + ompTimingCount[COMPUTECG1INDEX]++; + ompTimingCGCount[COMPUTECG1INDEX]+= matvecs_s; + startTimeBase = endTimeBase; +#endif + + matvecs_t = CG(b_t, t); // CG on t - parallel + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTECG2INDEX] += (endTimeBase-startTimeBase); + ompTimingCount[COMPUTECG2INDEX]++; + ompTimingCGCount[COMPUTECG2INDEX]+= matvecs_t; + startTimeBase = endTimeBase; +#endif + + } // if (dual_enabled) + +#ifdef OMP_TIMING + startTimeBase = MPI_Wtime(); +#endif + + calculate_Q(); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTECALCQINDEX] += (endTimeBase-startTimeBase); +#endif + + if (comm->me == 0) { + t_end = MPI_Wtime(); + qeq_time = t_end - t_start; + } + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEQEQINDEX] += (endTimeBase-funcstartTimeBase); +#endif +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::init_matvec() +{ +#ifdef OMP_TIMING + long endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + /* fill-in H matrix */ + compute_H(); + + int nn,i; + int *ilist; + + if (reaxc) { + nn = reaxc->list->inum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + ilist = list->ilist; + } + + // Should really be more careful with initialization and first (aspc_order+2) MD steps + if (do_aspc) { + + double m_aspc_omega = 1.0 - aspc_omega; +#if defined(_OPENMP) +#pragma omp parallel for schedule(dynamic,50) private(i) +#endif + for (int ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + + /* init pre-conditioner for H and init solution vectors */ + Hdia_inv[i] = 1. / eta[ atom->type[i] ]; + b_s[i] = -chi[ atom->type[i] ]; + b_t[i] = -1.0; + + // Predictor Step + double tp = 0.0; + double sp = 0.0; + for (int j=0; j<aspc_order+2; j++) { + tp+= aspc_b[j] * t_hist[i][j]; + sp+= aspc_b[j] * s_hist[i][j]; + } + + // Corrector Step + t[i] = aspc_omega * t_hist[i][0] + m_aspc_omega * tp; + s[i] = aspc_omega * s_hist[i][0] + m_aspc_omega * sp; + } + } + + } else { + +#if defined(_OPENMP) +#pragma omp parallel for schedule(dynamic,50) private(i) +#endif + for (int ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + + /* init pre-conditioner for H and init solution vectors */ + Hdia_inv[i] = 1. / eta[ atom->type[i] ]; + b_s[i] = -chi[ atom->type[i] ]; + b_t[i] = -1.0; + + /* linear extrapolation for s & t from previous solutions */ + //s[i] = 2 * s_hist[i][0] - s_hist[i][1]; + //t[i] = 2 * t_hist[i][0] - t_hist[i][1]; + + /* quadratic extrapolation for s & t from previous solutions */ + //s[i] = s_hist[i][2] + 3 * ( s_hist[i][0] - s_hist[i][1] ); + t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1] ); + + /* cubic extrapolation for s & t from previous solutions */ + s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]); + //t[i] = 4*(t_hist[i][0]+t_hist[i][2])-(6*t_hist[i][1]+t_hist[i][3]); + } + } + } + + pack_flag = 2; + comm->forward_comm_fix(this); //Dist_vector( s ); + pack_flag = 3; + comm->forward_comm_fix(this); //Dist_vector( t ); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEMVCOMPINDEX] += (long) (endTimeBase-startTimeBase); +#endif +} + +/* ---------------------------------------------------------------------- */ + +int FixQEqReaxOMP::CG( double *b, double *x) +{ + int i, ii, imax; + double alpha, beta, b_norm; + double sig_old, sig_new; + + double my_buf[2], buf[2]; + + int nn, jj; + int *ilist; + if (reaxc) { + nn = reaxc->list->inum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + ilist = list->ilist; + } + + imax = 200; + + pack_flag = 1; + sparse_matvec( &H, x, q ); + comm->reverse_comm_fix( this); //Coll_Vector( q ); + + double tmp1, tmp2; + tmp1 = tmp2 = 0.0; + +#if defined(_OPENMP) +#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2) +#endif + for (jj = 0; jj < nn; ++jj) { + i = ilist[jj]; + if (atom->mask[i] & groupbit) { + r[i] = b[i] - q[i]; + d[i] = r[i] * Hdia_inv[i]; //pre-condition + + tmp1 += b[i] * b[i]; + tmp2 += r[i] * d[i]; + } + } + + my_buf[0] = tmp1; + my_buf[1] = tmp2; + + MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world); + + b_norm = sqrt(buf[0]); + sig_new = buf[1]; + + for (i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i) { + comm->forward_comm_fix(this); //Dist_vector( d ); + sparse_matvec( &H, d, q ); + comm->reverse_comm_fix(this); //Coll_vector( q ); + + tmp1 = 0.0; +#if defined(_OPENMP) +#pragma omp parallel +#endif + { + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1) +#endif + for (jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if (atom->mask[ii] & groupbit) tmp1 += d[ii] * q[ii]; + } + +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp master +#endif + { + MPI_Allreduce(&tmp1, &tmp2, 1, MPI_DOUBLE, MPI_SUM, world); + + alpha = sig_new / tmp2; + tmp1 = 0.0; + } + +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1) +#endif + for (jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if (atom->mask[ii] & groupbit) { + x[ii] += alpha * d[ii]; + r[ii] -= alpha * q[ii]; + + // pre-conditioning + p[ii] = r[ii] * Hdia_inv[ii]; + tmp1 += r[ii] * p[ii]; + } + } + } // omp parallel + + sig_old = sig_new; + + MPI_Allreduce(&tmp1, &tmp2, 1, MPI_DOUBLE, MPI_SUM, world); + + sig_new = tmp2; + beta = sig_new / sig_old; + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) private(ii) +#endif + for (jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if (atom->mask[ii] & groupbit) d[ii] = p[ii] + beta * d[ii]; + } + } + + if (i >= imax && comm->me == 0) { + char str[128]; + sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations " + "at " BIGINT_FORMAT " step",i,update->ntimestep); + error->warning(FLERR,str); + } + + return i; +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::sparse_matvec( sparse_matrix *A, double *x, double *b) +{ +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int i, j, itr_j; + int nn, NN, ii; + int *ilist; + int nthreads = comm->nthreads; +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + + if (reaxc) { + nn = reaxc->list->inum; + NN = reaxc->list->inum + reaxc->list->gnum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + NN = list->inum + list->gnum; + ilist = list->ilist; + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) b[i] = eta[ atom->type[i] ] * x[i]; + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (ii = nn; ii < NN; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) b[i] = 0; + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (i = 0; i < NN; ++i) + for (int t=0; t<nthreads; t++) b_temp[t][i] = 0.0; + + // Wait for b accumulated and b_temp zeroed. +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp for schedule(dynamic,50) +#endif + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + for (itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) { + j = A->jlist[itr_j]; + b[i] += A->val[itr_j] * x[j]; + + b_temp[tid][j] += A->val[itr_j] * x[i]; + } + } + } + + // Wait till b_temp accumulated +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp for schedule(dynamic,50) +#endif + for (i = 0; i < NN; ++i) + for (int t = 0; t < nthreads; ++t) b[i] += b_temp[t][i]; + + } //end omp parallel +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::calculate_Q() +{ + int i; + double *q = atom->q; + + int nn; + int *ilist; + + if (reaxc) { + nn = reaxc->list->inum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + ilist = list->ilist; + } + + double tmp1, tmp2; + tmp1 = tmp2 = 0.0; +#if defined(_OPENMP) +#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2) +#endif + for (int ii = 0; ii < nn; ii++) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + tmp1 += s[i]; + tmp2 += t[i]; + } + } + + double my_buf[2], buf[2]; + buf[0] = 0.0; + buf[1] = 0.0; + + my_buf[0] = tmp1; + my_buf[1] = tmp2; + + MPI_Allreduce(&my_buf,&buf,2,MPI_DOUBLE,MPI_SUM,world); + + double u = buf[0] / buf[1]; + +#if defined(_OPENMP) +#pragma omp parallel for schedule(static) private(i) +#endif + for (int ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + q[i] = s[i] - u * t[i]; + + // backup s & t + for (int k = 4; k > 0; --k) { + s_hist[i][k] = s_hist[i][k-1]; + t_hist[i][k] = t_hist[i][k-1]; + } + s_hist[i][0] = s[i]; + t_hist[i][0] = t[i]; + } + } + + pack_flag = 4; + comm->forward_comm_fix( this); //Dist_vector( atom->q ); +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::vector_sum( double* dest, double c, double* v, + double d, double* y, int k) +{ + int i; + int *ilist; + + if (reaxc) ilist = reaxc->list->ilist; + else ilist = list->ilist; + +#if defined(_OPENMP) +#pragma omp parallel for schedule(static) private(i) +#endif + for (int ii=0; ii<k; ii++) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) dest[i] = c * v[i] + d * y[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::vector_add( double* dest, double c, double* v, int k) +{ + int i; + int *ilist; + + if (reaxc) ilist = reaxc->list->ilist; + else ilist = list->ilist; + +#if defined(_OPENMP) +#pragma omp parallel for schedule(static) private(i) +#endif + for (int ii=0; ii<k; ii++) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) dest[i] += c * v[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- */ +/* dual CG support */ +/* ---------------------------------------------------------------------- */ + +int FixQEqReaxOMP::dual_CG( double *b1, double *b2, double *x1, double *x2) +{ + +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int i, imax; + double alpha_s, alpha_t, beta_s, beta_t, b_norm_s, b_norm_t; + double sig_old_s, sig_old_t, sig_new_s, sig_new_t; + + double my_buf[4], buf[4]; + + int nn, ii, jj; + int *ilist; + if (reaxc) { + nn = reaxc->list->inum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + ilist = list->ilist; + } + + imax = 200; + + pack_flag = 5; // forward 2x d and reverse 2x q + dual_sparse_matvec( &H, x1, x2, q ); + comm->reverse_comm_fix( this); //Coll_Vector( q ); + + double tmp1, tmp2, tmp3, tmp4; + tmp1 = tmp2 = tmp3 = tmp4 = 0.0; + +#if defined(_OPENMP) +#pragma omp parallel for schedule(dynamic,50) private(i) reduction(+:tmp1,tmp2,tmp3,tmp4) +#endif + for (jj = 0; jj < nn; ++jj) { + i = ilist[jj]; + if (atom->mask[i] & groupbit) { + int indxI = 2 * i; + r[indxI ] = b1[i] - q[indxI ]; + r[indxI+1] = b2[i] - q[indxI+1]; + + d[indxI ] = r[indxI ] * Hdia_inv[i]; //pre-condition + d[indxI+1] = r[indxI+1] * Hdia_inv[i]; + + tmp1 += b1[i] * b1[i]; + tmp2 += b2[i] * b2[i]; + + tmp3 += r[indxI ] * d[indxI ]; + tmp4 += r[indxI+1] * d[indxI+1]; + } + } + + my_buf[0] = tmp1; + my_buf[1] = tmp2; + my_buf[2] = tmp3; + my_buf[3] = tmp4; + + MPI_Allreduce(&my_buf, &buf, 4, MPI_DOUBLE, MPI_SUM, world); + + b_norm_s = sqrt(buf[0]); + b_norm_t = sqrt(buf[1]); + + sig_new_s = buf[2]; + sig_new_t = buf[3]; + + for (i = 1; i < imax; ++i) { + comm->forward_comm_fix(this); //Dist_vector( d ); + dual_sparse_matvec( &H, d, q ); + comm->reverse_comm_fix(this); //Coll_vector( q ); + + tmp1 = tmp2 = 0.0; +#if defined(_OPENMP) +#pragma omp parallel +#endif + { + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1,tmp2) +#endif + for (jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if (atom->mask[ii] & groupbit) { + int indxI = 2 * ii; + tmp1 += d[indxI ] * q[indxI ]; + tmp2 += d[indxI+1] * q[indxI+1]; + } + } + +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp master +#endif + { + my_buf[0] = tmp1; + my_buf[1] = tmp2; + + MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world); + + alpha_s = sig_new_s / buf[0]; + alpha_t = sig_new_t / buf[1]; + + tmp1 = tmp2 = 0.0; + } + +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp for schedule(dynamic,50) private(ii) reduction(+:tmp1,tmp2) +#endif + for (jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if (atom->mask[ii] & groupbit) { + int indxI = 2 * ii; + x1[ii] += alpha_s * d[indxI ]; + x2[ii] += alpha_t * d[indxI+1]; + + r[indxI ] -= alpha_s * q[indxI ]; + r[indxI+1] -= alpha_t * q[indxI+1]; + + // pre-conditioning + p[indxI ] = r[indxI ] * Hdia_inv[ii]; + p[indxI+1] = r[indxI+1] * Hdia_inv[ii]; + + tmp1 += r[indxI ] * p[indxI ]; + tmp2 += r[indxI+1] * p[indxI+1]; + } + } + } // omp parallel + + my_buf[0] = tmp1; + my_buf[1] = tmp2; + + sig_old_s = sig_new_s; + sig_old_t = sig_new_t; + + MPI_Allreduce(&my_buf, &buf, 2, MPI_DOUBLE, MPI_SUM, world); + + sig_new_s = buf[0]; + sig_new_t = buf[1]; + + if (sqrt(sig_new_s)/b_norm_s <= tolerance + || sqrt(sig_new_t)/b_norm_t <= tolerance) break; + + beta_s = sig_new_s / sig_old_s; + beta_t = sig_new_t / sig_old_t; + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) private(ii) +#endif + for (jj = 0; jj < nn; jj++) { + ii = ilist[jj]; + if (atom->mask[ii] & groupbit) { + int indxI = 2 * ii; + + d[indxI ] = p[indxI ] + beta_s * d[indxI ]; + d[indxI+1] = p[indxI+1] + beta_t * d[indxI+1]; + } + } + } + + i++; + matvecs_s = matvecs_t = i; // The plus one makes consistent with count from CG() + matvecs = i; + + // Timing info for iterating s&t together +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTECG1INDEX] += (endTimeBase-startTimeBase); + ompTimingCount[COMPUTECG1INDEX]++; + ompTimingCGCount[COMPUTECG1INDEX]+= i; + startTimeBase = endTimeBase; +#endif + + // If necessary, converge other system + if (sqrt(sig_new_s)/b_norm_s > tolerance) { + pack_flag = 2; + comm->forward_comm_fix(this); // x1 => s + + i+= CG(b1, x1); + matvecs_s = i; + } + else if (sqrt(sig_new_t)/b_norm_t > tolerance) { + pack_flag = 3; + comm->forward_comm_fix(this); // x2 => t + + i+= CG(b2, x2); + matvecs_t = i; + } + + // Timing info for remainder of s or t +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTECG2INDEX] += (endTimeBase-startTimeBase); + ompTimingCount[COMPUTECG2INDEX]++; + ompTimingCGCount[COMPUTECG2INDEX]+= i - matvecs; + startTimeBase = endTimeBase; +#endif + + if ( i >= imax && comm->me == 0) { + char str[128]; + sprintf(str,"Fix qeq/reax CG convergence failed after %d iterations " + "at " BIGINT_FORMAT " step",i,update->ntimestep); + error->warning(FLERR,str); + } + + return i; +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x1, double *x2, double *b) +{ +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int i, j, itr_j; + int nn, NN, ii; + int *ilist; + int indxI, indxJ; + + int nthreads = comm->nthreads; +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + + if (reaxc) { + nn = reaxc->list->inum; + NN = reaxc->list->inum + reaxc->list->gnum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + NN = list->inum + list->gnum; + ilist = list->ilist; + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + indxI = 2 * i; + b[indxI ] = eta[ atom->type[i] ] * x1[i]; + b[indxI+1] = eta[ atom->type[i] ] * x2[i]; + } + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (ii = nn; ii < NN; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + indxI = 2 * i; + b[indxI] = 0; + b[indxI+1] = 0; + } + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (i = 0; i < NN; ++i) { + indxI = 2 * i; + for (int t=0; t<nthreads; t++) { + b_temp[t][indxI ] = 0.0; + b_temp[t][indxI+1] = 0.0; + } + } + + // Wait for b accumulated and b_temp zeroed +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp for schedule(dynamic,50) +#endif + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + indxI = 2 * i; + for (itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) { + j = A->jlist[itr_j]; + indxJ = 2 * j; + b[indxI ] += A->val[itr_j] * x1[j]; + b[indxI+1] += A->val[itr_j] * x2[j]; + + b_temp[tid][indxJ ] += A->val[itr_j] * x1[i]; + b_temp[tid][indxJ+1] += A->val[itr_j] * x2[i]; + } + } + } + + // Wait till b_temp accumulated +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp for schedule(dynamic,50) +#endif + for (i = 0; i < NN; ++i) { + indxI = 2 * i; + for (int t = 0; t < nthreads; ++t) { + b[indxI ] += b_temp[t][indxI ]; + b[indxI+1] += b_temp[t][indxI+1]; + } + } + + } // omp parallel +} + +/* ---------------------------------------------------------------------- */ + +void FixQEqReaxOMP::dual_sparse_matvec( sparse_matrix *A, double *x, double *b ) +{ +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int i, j, itr_j; + int nn, NN, ii; + int *ilist; + int indxI, indxJ; + + int nthreads = comm->nthreads; +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + + if (reaxc) { + nn = reaxc->list->inum; + NN = reaxc->list->inum + reaxc->list->gnum; + ilist = reaxc->list->ilist; + } else { + nn = list->inum; + NN = list->inum + list->gnum; + ilist = list->ilist; + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + indxI = 2 * i; + b[indxI ] = eta[ atom->type[i] ] * x[indxI ]; + b[indxI+1] = eta[ atom->type[i] ] * x[indxI+1]; + } + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (ii = nn; ii < NN; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + indxI = 2 * i; + b[indxI] = 0; + b[indxI+1] = 0; + } + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (i = 0; i < NN; ++i) { + indxI = 2 * i; + for (int t=0; t<nthreads; t++) { + b_temp[t][indxI ] = 0.0; + b_temp[t][indxI+1] = 0.0; + } + } + + // Wait for b accumulated and b_temp zeroed +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp for schedule(dynamic,50) +#endif + for (ii = 0; ii < nn; ++ii) { + i = ilist[ii]; + if (atom->mask[i] & groupbit) { + indxI = 2 * i; + for (itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) { + j = A->jlist[itr_j]; + indxJ = 2 * j; + b[indxI ] += A->val[itr_j] * x[indxJ ]; + b[indxI+1] += A->val[itr_j] * x[indxJ+1]; + + b_temp[tid][indxJ ] += A->val[itr_j] * x[indxI ]; + b_temp[tid][indxJ+1] += A->val[itr_j] * x[indxI+1]; + } + } + } + + // Wait till b_temp accumulated +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp for schedule(dynamic,50) +#endif + for (i = 0; i < NN; ++i) { + indxI = 2 * i; + for (int t = 0; t < nthreads; ++t) { + b[indxI ] += b_temp[t][indxI ]; + b[indxI+1] += b_temp[t][indxI+1]; + } + } + } // omp parallel +} diff --git a/src/USER-OMP/fix_qeq_reax_omp.h b/src/USER-OMP/fix_qeq_reax_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..078ba3b9af55171d61efec56d84b2e2c121da4d7 --- /dev/null +++ b/src/USER-OMP/fix_qeq_reax_omp.h @@ -0,0 +1,76 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Hasan Metin Aktulga, Purdue University + (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov) + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(qeq/reax/omp,FixQEqReaxOMP) + +#else + +#ifndef LMP_FIX_QEQ_REAX_OMP_H +#define LMP_FIX_QEQ_REAX_OMP_H + +#include "fix_qeq_reax.h" + +namespace LAMMPS_NS { + +class FixQEqReaxOMP : public FixQEqReax { + + public: + FixQEqReaxOMP(class LAMMPS *, int, char **); + ~FixQEqReaxOMP(); + virtual void init(); + virtual void init_storage(); + virtual void pre_force(int); + virtual void post_constructor(); + + protected: + double **b_temp; + + int do_aspc; + int aspc_order, aspc_order_max; + double aspc_omega; + double * aspc_b; + + virtual void allocate_storage(); + virtual void deallocate_storage(); + virtual void init_matvec(); + virtual void compute_H(); + + virtual int CG(double*,double*); + virtual void sparse_matvec(sparse_matrix*,double*,double*); + virtual void calculate_Q(); + + virtual void vector_sum(double*,double,double*,double,double*,int); + virtual void vector_add(double*, double, double*,int); + + // dual CG support + virtual int dual_CG(double*,double*,double*,double*); + virtual void dual_sparse_matvec(sparse_matrix*,double*,double*,double*); + virtual void dual_sparse_matvec(sparse_matrix*,double*,double*); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/fix_rigid_omp.cpp b/src/USER-OMP/fix_rigid_omp.cpp index d5bacc0320ec7fe485453da9ea5aa9a7dacb3be5..f3ef1d2985adf542a625542affda2803acb53d29 100644 --- a/src/USER-OMP/fix_rigid_omp.cpp +++ b/src/USER-OMP/fix_rigid_omp.cpp @@ -39,7 +39,7 @@ using namespace MathConst; enum{SINGLE,MOLECULE,GROUP}; // same as in FixRigid -#define EINERTIA 0.4 // moment of inertia prefactor for ellipsoid +#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid typedef struct { double x,y,z; } dbl3_t; diff --git a/src/USER-OMP/fix_rigid_small_omp.cpp b/src/USER-OMP/fix_rigid_small_omp.cpp index a260899aefef848033574327a802b2b4f8cd823e..e3939a829d33a308dabc29c3ae0787f75f964712 100644 --- a/src/USER-OMP/fix_rigid_small_omp.cpp +++ b/src/USER-OMP/fix_rigid_small_omp.cpp @@ -37,7 +37,7 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -#define EINERTIA 0.4 // moment of inertia prefactor for ellipsoid +#define EINERTIA 0.2 // moment of inertia prefactor for ellipsoid enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF}; diff --git a/src/USER-OMP/npair_full_bin_atomonly_omp.cpp b/src/USER-OMP/npair_full_bin_atomonly_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d9e0fb9297287a87fadcb5b614099a1326453fb5 --- /dev/null +++ b/src/USER-OMP/npair_full_bin_atomonly_omp.cpp @@ -0,0 +1,106 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "npair_full_bin_atomonly_omp.h" +#include "npair_omp.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "atom.h" +#include "atom_vec.h" +#include "domain.h" +#include "my_page.h" +#include "error.h" + +using namespace LAMMPS_NS; +using namespace NeighConst; + +/* ---------------------------------------------------------------------- */ + +NPairFullBinAtomonlyOmp::NPairFullBinAtomonlyOmp(LAMMPS *lmp) : NPair(lmp) {} + +/* ---------------------------------------------------------------------- + binned neighbor list construction for all neighbors + every neighbor pair appears in list of both atoms i and j +------------------------------------------------------------------------- */ + +void NPairFullBinAtomonlyOmp::build(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NPAIR_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NPAIR_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + tagint *molecule = atom->molecule; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + // each thread has its own page allocator + MyPage<int> &ipage = list->ipage[tid]; + ipage.reset(); + + // loop over owned atoms, storing neighbors + + for (i = ifrom; i < ito; i++) { + + n = 0; + neighptr = ipage.vget(); + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over all atoms in surrounding bins in stencil including self + // skip i = j + + ibin = atom2bin[i]; + + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + if (i == j) continue; + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j; + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + ipage.vgot(n); + if (ipage.status()) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NPAIR_OMP_CLOSE; + list->inum = nlocal; + list->gnum = 0; +} diff --git a/src/USER-INTEL/npair_half_bin_newtoff_intel.h b/src/USER-OMP/npair_full_bin_atomonly_omp.h similarity index 54% rename from src/USER-INTEL/npair_half_bin_newtoff_intel.h rename to src/USER-OMP/npair_full_bin_atomonly_omp.h index ccb4560909aa66ef01c84343e09a2da24aea26d5..643bf193a22b5d05814be534a3a96fce456b4987 100644 --- a/src/USER-INTEL/npair_half_bin_newtoff_intel.h +++ b/src/USER-OMP/npair_full_bin_atomonly_omp.h @@ -13,32 +13,25 @@ #ifdef NPAIR_CLASS -NPairStyle(half/bin/newtoff/intel, - NPairHalfBinNewtoffIntel, - NP_HALF | NP_BIN | NP_NEWTOFF | NP_ORTHO | NP_TRI | NP_INTEL) +NPairStyle(full/bin/atomonly/omp, + NPairFullBinAtomonlyOmp, + NP_FULL | NP_BIN | NP_ATOMONLY | NP_OMP | + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) #else -#ifndef LMP_NPAIR_HALF_BIN_NEWTOFF_INTEL_H -#define LMP_NPAIR_HALF_BIN_NEWTOFF_INTEL_H +#ifndef LMP_NPAIR_FULL_BIN_ATOMONLY_OMP_H +#define LMP_NPAIR_FULL_BIN_ATOMONLY_OMP_H -#include "npair_intel.h" -#include "fix_intel.h" +#include "npair.h" namespace LAMMPS_NS { -class NPairHalfBinNewtoffIntel : public NPairIntel { +class NPairFullBinAtomonlyOmp : public NPair { public: - NPairHalfBinNewtoffIntel(class LAMMPS *); - ~NPairHalfBinNewtoffIntel() {} + NPairFullBinAtomonlyOmp(class LAMMPS *); + ~NPairFullBinAtomonlyOmp() {} void build(class NeighList *); - - private: - template <class flt_t, class acc_t> - void hbnni(NeighList *, IntelBuffers<flt_t,acc_t> *); - template <class flt_t, class acc_t, int> - void hbnni(const int, NeighList *, IntelBuffers<flt_t,acc_t> *, const int, - const int); }; } @@ -48,5 +41,4 @@ class NPairHalfBinNewtoffIntel : public NPairIntel { /* ERROR/WARNING messages: - */ diff --git a/src/USER-OMP/npair_half_bin_atomonly_newton_omp.cpp b/src/USER-OMP/npair_half_bin_atomonly_newton_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..02d98ff6ab46691ba77643d7a812ad42f410b72d --- /dev/null +++ b/src/USER-OMP/npair_half_bin_atomonly_newton_omp.cpp @@ -0,0 +1,126 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "npair_half_bin_atomonly_newton_omp.h" +#include "npair_omp.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "atom.h" +#include "atom_vec.h" +#include "molecule.h" +#include "domain.h" +#include "my_page.h" +#include "error.h" + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +NPairHalfBinAtomonlyNewtonOmp::NPairHalfBinAtomonlyNewtonOmp(LAMMPS *lmp) : NPair(lmp) {} + +/* ---------------------------------------------------------------------- + binned neighbor list construction with full Newton's 3rd law + each owned atom i checks its own bin and other bins in Newton stencil + every pair stored exactly once by some processor +------------------------------------------------------------------------- */ + +void NPairHalfBinAtomonlyNewtonOmp::build(NeighList *list) +{ + const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal; + + NPAIR_OMP_INIT; +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(list) +#endif + NPAIR_OMP_SETUP(nlocal); + + int i,j,k,n,itype,jtype,ibin; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *neighptr; + + // loop over each atom, storing neighbors + + double **x = atom->x; + int *type = atom->type; + int *mask = atom->mask; + tagint *molecule = atom->molecule; + + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + + // each thread has its own page allocator + MyPage<int> &ipage = list->ipage[tid]; + ipage.reset(); + + for (i = ifrom; i < ito; i++) { + + n = 0; + neighptr = ipage.vget(); + + itype = type[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // loop over rest of atoms in i's bin, ghosts are at end of linked list + // if j is owned atom, store it, since j is beyond i in linked list + // if j is ghost, only store if j coords are "above and to the right" of i + + for (j = bins[i]; j >= 0; j = bins[j]) { + if (j >= nlocal) { + if (x[j][2] < ztmp) continue; + if (x[j][2] == ztmp) { + if (x[j][1] < ytmp) continue; + if (x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + } + + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j; + } + + // loop over all atoms in other bins in stencil, store every pair + + ibin = atom2bin[i]; + for (k = 0; k < nstencil; k++) { + for (j = binhead[ibin+stencil[k]]; j >= 0; j = bins[j]) { + jtype = type[j]; + if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq <= cutneighsq[itype][jtype]) neighptr[n++] = j; + } + } + + ilist[i] = i; + firstneigh[i] = neighptr; + numneigh[i] = n; + ipage.vgot(n); + if (ipage.status()) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } + NPAIR_OMP_CLOSE; + list->inum = nlocal; +} diff --git a/src/USER-OMP/npair_half_bin_atomonly_newton_omp.h b/src/USER-OMP/npair_half_bin_atomonly_newton_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..63223fd0bcc10b1f22c6ddaa511f6b8e53e35b3b --- /dev/null +++ b/src/USER-OMP/npair_half_bin_atomonly_newton_omp.h @@ -0,0 +1,43 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef NPAIR_CLASS + +NPairStyle(half/bin/atomonly/newton/omp, + NPairHalfBinAtomonlyNewtonOmp, + NP_HALF | NP_BIN | NP_ATOMONLY | NP_NEWTON | NP_OMP | NP_ORTHO) + +#else + +#ifndef LMP_NPAIR_HALF_BIN_ATOMONLY_NEWTON_OMP_H +#define LMP_NPAIR_HALF_BIN_ATOMONLY_NEWTON_OMP_H + +#include "npair.h" + +namespace LAMMPS_NS { + +class NPairHalfBinAtomonlyNewtonOmp : public NPair { + public: + NPairHalfBinAtomonlyNewtonOmp(class LAMMPS *); + ~NPairHalfBinAtomonlyNewtonOmp() {} + void build(class NeighList *); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/USER-OMP/pair_airebo_omp.cpp b/src/USER-OMP/pair_airebo_omp.cpp index 95f9d8b4012f1fbf495ec085bcf85f1d9ed67374..206e8e86e69374f920b12719ee0d7887ca62db8f 100644 --- a/src/USER-OMP/pair_airebo_omp.cpp +++ b/src/USER-OMP/pair_airebo_omp.cpp @@ -1387,6 +1387,10 @@ double PairAIREBOOMP::bondorder_thr(int i, int j, double rij[3], double rijmag, if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + // due to kronecker(ktype, 0) term in contribution + // to NconjtmpI and later Nijconj + if (ktype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag; f[atomi][0] -= tmp2*rik[0]; f[atomi][1] -= tmp2*rik[1]; @@ -1450,6 +1454,10 @@ double PairAIREBOOMP::bondorder_thr(int i, int j, double rij[3], double rijmag, if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + // due to kronecker(ltype, 0) term in contribution + // to NconjtmpJ and later Nijconj + if (ltype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag; f[atomj][0] -= tmp2*rjl[0]; f[atomj][1] -= tmp2*rjl[1]; @@ -1732,6 +1740,10 @@ double PairAIREBOOMP::bondorder_thr(int i, int j, double rij[3], double rijmag, if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + // due to kronecker(ktype, 0) term in contribution + // to NconjtmpI and later Nijconj + if (ktype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag; f[atomi][0] -= tmp2*rik[0]; f[atomi][1] -= tmp2*rik[1]; @@ -1795,6 +1807,10 @@ double PairAIREBOOMP::bondorder_thr(int i, int j, double rij[3], double rijmag, if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + // due to kronecker(ltype, 0) term in contribution + // to NconjtmpJ and later Nijconj + if (ltype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag; f[atomj][0] -= tmp2*rjl[0]; f[atomj][1] -= tmp2*rjl[1]; @@ -2332,6 +2348,10 @@ double PairAIREBOOMP::bondorderLJ_thr(int i, int j, double rij[3], double rijmag if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + // due to kronecker(ktype, 0) term in contribution + // to NconjtmpI and later Nijconj + if (ktype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)/rikmag; f[atomi][0] -= tmp2*rik[0]; f[atomi][1] -= tmp2*rik[1]; @@ -2395,6 +2415,10 @@ double PairAIREBOOMP::bondorderLJ_thr(int i, int j, double rij[3], double rijmag if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + // due to kronecker(ltype, 0) term in contribution + // to NconjtmpJ and later Nijconj + if (ltype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)/rjlmag; f[atomj][0] -= tmp2*rjl[0]; f[atomj][1] -= tmp2*rjl[1]; @@ -2667,6 +2691,10 @@ double PairAIREBOOMP::bondorderLJ_thr(int i, int j, double rij[3], double rijmag if (vflag_atom) v_tally2_thr(atomi,atomk,-tmp2,rik,thr); + // due to kronecker(ktype, 0) term in contribution + // to NconjtmpI and later Nijconj + if (ktype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpI*dwik*SpN)*Etmp/rikmag; f[atomi][0] -= tmp2*rik[0]; f[atomi][1] -= tmp2*rik[1]; @@ -2730,6 +2758,10 @@ double PairAIREBOOMP::bondorderLJ_thr(int i, int j, double rij[3], double rijmag if (vflag_atom) v_tally2_thr(atomj,atoml,-tmp2,rjl,thr); + // due to kronecker(ltype, 0) term in contribution + // to NconjtmpJ and later Nijconj + if (ltype != 0) continue; + tmp2 = VA*dN3[2]*(2.0*NconjtmpJ*dwjl*SpN)*Etmp/rjlmag; f[atomj][0] -= tmp2*rjl[0]; f[atomj][1] -= tmp2*rjl[1]; diff --git a/src/USER-OMP/pair_comb_omp.cpp b/src/USER-OMP/pair_comb_omp.cpp index c776ff3026d49a75aacaa5c5784e59c76e8c5395..2a0e6ceb2354ea0361a540b824c18f1cecbdbc9f 100644 --- a/src/USER-OMP/pair_comb_omp.cpp +++ b/src/USER-OMP/pair_comb_omp.cpp @@ -484,7 +484,7 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup) qfo_field(¶ms[iparam_ij],rsq1,iq,jq,fqji,fqjj); fqi += jq * fqij + fqji; -#if defined(_OPENMP) +#if defined(_OPENMP) && !defined(__NVCC__) #pragma omp atomic #endif qf[j] += (iq * fqij + fqjj); @@ -511,13 +511,13 @@ double PairCombOMP::yasu_char(double *qf_fix, int &igroup) qfo_short(¶ms[iparam_ij],i,nj,rsq1,iq,jq,fqij,fqjj); fqi += fqij; -#if defined(_OPENMP) +#if defined(_OPENMP) && !defined(__NVCC__) #pragma omp atomic #endif qf[j] += fqjj; } -#if defined(_OPENMP) +#if defined(_OPENMP) && !defined(__NVCC__) #pragma omp atomic #endif qf[i] += fqi; diff --git a/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp b/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp index 2f1b98fc4f15b4c346cc9a5770e2ecc5094f46fd..d05b13cd10765b977adfbeff070f42a8e0e8ba63 100644 --- a/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_tip4p_long_omp.cpp @@ -104,7 +104,7 @@ void PairLJCutTIP4PLongOMP::compute(int eflag, int vflag) thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); - if (!ncoultablebits) { + if (ncoultablebits) { if (evflag) { if (eflag) { if (vflag) eval<1,1,1,1>(ifrom, ito, thr); @@ -156,6 +156,7 @@ void PairLJCutTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; const double * _noalias const q = atom->q; const int * _noalias const type = atom->type; + const tagint * _noalias const tag = atom->tag; const int nlocal = atom->nlocal; const double * _noalias const special_coul = force->special_coul; const double * _noalias const special_lj = force->special_lj; @@ -187,8 +188,8 @@ void PairLJCutTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) // will be the same, there is no race condition. if (itype == typeO) { if (hneigh_thr[i].a < 0) { - iH1 = atom->map(atom->tag[i] + 1); - iH2 = atom->map(atom->tag[i] + 2); + iH1 = atom->map(tag[i] + 1); + iH2 = atom->map(tag[i] + 2); if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) @@ -267,8 +268,8 @@ void PairLJCutTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) if (jtype == typeO) { if (hneigh_thr[j].a < 0) { - jH1 = atom->map(atom->tag[j] + 1); - jH2 = atom->map(atom->tag[j] + 2); + jH1 = atom->map(tag[j] + 1); + jH2 = atom->map(tag[j] + 2); if (jH1 == -1 || jH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) @@ -301,7 +302,7 @@ void PairLJCutTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) if (rsq < cut_coulsq) { r2inv = 1 / rsq; - if (CTABLE || rsq <= tabinnersq) { + if (!CTABLE || rsq <= tabinnersq) { r = sqrt(rsq); grij = g_ewald * r; expm2 = exp(-grij*grij); @@ -337,7 +338,7 @@ void PairLJCutTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) // virial = sum(r x F) where each water's atoms are near xi and xj // vlist stores 2,4,6 atoms whose forces contribute to virial - if (EVFLAG) { + if (VFLAG) { n = 0; key = 0; } @@ -354,11 +355,11 @@ void PairLJCutTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) v[3] = x[i].x * dely * cforce; v[4] = x[i].x * delz * cforce; v[5] = x[i].y * delz * cforce; + vlist[n++] = i; } - if (EVFLAG) vlist[n++] = i; } else { - if (EVFLAG) key++; + if (VFLAG) key++; fdx = delx*cforce; fdy = dely*cforce; @@ -393,8 +394,6 @@ void PairLJCutTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) v[3] = x[i].x*fOy + xH1.x*fHy + xH2.x*fHy; v[4] = x[i].x*fOz + xH1.x*fHz + xH2.x*fHz; v[5] = x[i].y*fOz + xH1.y*fHz + xH2.y*fHz; - } - if (EVFLAG) { vlist[n++] = i; vlist[n++] = iH1; vlist[n++] = iH2; @@ -413,11 +412,11 @@ void PairLJCutTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) v[3] -= x[j].x * dely * cforce; v[4] -= x[j].x * delz * cforce; v[5] -= x[j].y * delz * cforce; + vlist[n++] = j; } - if (EVFLAG) vlist[n++] = j; } else { - if (EVFLAG) key += 2; + if (VFLAG) key += 2; fdx = -delx*cforce; fdy = -dely*cforce; @@ -452,8 +451,6 @@ void PairLJCutTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) v[3] += x[j].x*fOy + xH1.x*fHy + xH2.x*fHy; v[4] += x[j].x*fOz + xH1.x*fHz + xH2.x*fHz; v[5] += x[j].y*fOz + xH1.y*fHz + xH2.y*fHz; - } - if (EVFLAG) { vlist[n++] = j; vlist[n++] = jH1; vlist[n++] = jH2; @@ -461,7 +458,7 @@ void PairLJCutTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) } if (EFLAG) { - if (CTABLE || rsq <= tabinnersq) + if (!CTABLE || rsq <= tabinnersq) ecoul = prefactor*erfc; else { table = etable[itable] + fraction*detable[itable]; diff --git a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp index 5072496cc6a6e6270439dd298cbedfb2f07cd4b4..1c8f60d7dcb1c141d69e370fdff8bf0f5aef09d4 100644 --- a/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp +++ b/src/USER-OMP/pair_lj_long_tip4p_long_omp.cpp @@ -15,13 +15,14 @@ #include <math.h> #include "pair_lj_long_tip4p_long_omp.h" #include "atom.h" +#include "domain.h" #include "comm.h" #include "math_vector.h" #include "force.h" #include "neighbor.h" -#include "neigh_list.h" +#include "error.h" #include "memory.h" -#include "domain.h" +#include "neigh_list.h" #include "suffix.h" using namespace LAMMPS_NS; @@ -719,25 +720,27 @@ template < const int EVFLAG, const int EFLAG, void PairLJLongTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) { const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; - double * const * const f = thr->get_f(); - const double * const q = atom->q; - const int * const type = atom->type; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const double * _noalias const q = atom->q; + const int * _noalias const type = atom->type; + const tagint * _noalias const tag = atom->tag; const int nlocal = atom->nlocal; - const double * const special_coul = force->special_coul; - const double * const special_lj = force->special_lj; + const double * _noalias const special_coul = force->special_coul; + const double * _noalias const special_lj = force->special_lj; const double qqrd2e = force->qqrd2e; const double cut_coulsqplus = (cut_coul+2.0*qdist)*(cut_coul+2.0*qdist); + const int vflag = vflag_global || vflag_atom; int i,j,ii,jj,jnum,itype,jtype,itable; int n,vlist[6]; int key; int iH1,iH2,jH1,jH2; - double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fxtmp,fytmp,fztmp,evdwl,ecoul; double fraction,table; double r,r2inv,forcecoul,forcelj,cforce; double factor_coul; double grij,expm2,prefactor,t,erfc; - double fO[3],fH[3],fd[3],v[6]; + double fOx,fOy,fOz,fHx,fHy,fHz,fdx,fdy,fdz,v[6]; dbl3_t x1,x2,xH1,xH2; int *ilist,*jlist,*numneigh,**firstneigh; double rsq; @@ -763,26 +766,25 @@ void PairLJLongTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) itype = type[i]; if (itype == typeO) { if (hneigh_thr[i].a < 0) { - iH1 = atom->map(atom->tag[i] + 1); - iH2 = atom->map(atom->tag[i] + 2); + iH1 = atom->map(tag[i] + 1); + iH2 = atom->map(tag[i] + 2); if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); - if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + if (type[iH1] != typeH || type[iH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); - // set iH1,iH2 to index of closest image to O + // set iH1,iH2 to closest image to O iH1 = domain->closest_image(i,iH1); iH2 = domain->closest_image(i,iH2); compute_newsite_thr(x[i],x[iH1],x[iH2],newsite_thr[i]); - hneigh_thr[i].a = iH1; - hneigh_thr[i].b = iH2; hneigh_thr[i].t = 1; - + hneigh_thr[i].b = iH2; + hneigh_thr[i].a = iH1; } else { iH1 = hneigh_thr[i].a; iH2 = hneigh_thr[i].b; if (hneigh_thr[i].t == 0) { - hneigh_thr[i].t = 1; compute_newsite_thr(x[i],x[iH1],x[iH2],newsite_thr[i]); + hneigh_thr[i].t = 1; } } x1 = newsite_thr[i]; @@ -790,13 +792,14 @@ void PairLJLongTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) jlist = firstneigh[i]; jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; offseti = offset[itype]; lj1i = lj1[itype]; lj2i = lj2[itype]; lj3i = lj3[itype]; lj4i = lj4[itype]; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; ni = sbmask(j); - factor_coul = special_coul[sbmask(j)]; + factor_coul = special_coul[ni]; j &= NEIGHMASK; delx = xtmp - x[j].x; @@ -809,22 +812,22 @@ void PairLJLongTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) r2inv = 1.0/rsq; if (ORDER6) { // long-range lj if (!LJTABLE || rsq <= tabinnerdispsq) { - register double rn = r2inv*r2inv*r2inv; - register double x2 = g2*rsq, a2 = 1.0/x2; - x2 = a2*exp(-x2)*lj4i[jtype]; - if (ni == 0) { - forcelj = - (rn*=rn)*lj1i[jtype]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; - if (EFLAG) - evdwl = rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2; - } - else { // special case - register double f = special_lj[ni], t = rn*(1.0-f); - forcelj = f*(rn *= rn)*lj1i[jtype]- - g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[jtype]; - if (EFLAG) - evdwl = f*rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[jtype]; - } + register double rn = r2inv*r2inv*r2inv; + register double x2 = g2*rsq, a2 = 1.0/x2; + x2 = a2*exp(-x2)*lj4i[jtype]; + if (ni == 0) { + forcelj = + (rn*=rn)*lj1i[jtype]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; + if (EFLAG) + evdwl = rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2; + } + else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + forcelj = f*(rn *= rn)*lj1i[jtype]- + g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[jtype]; + if (EFLAG) + evdwl = f*rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[jtype]; + } } else { // table real space register union_int_float_t disp_t; @@ -842,31 +845,31 @@ void PairLJLongTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) if (EFLAG) evdwl = f*rn*lj3i[jtype]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[jtype]+t*lj4i[jtype]; } } - } - else { // cut lj - register double rn = r2inv*r2inv*r2inv; - if (ni == 0) { - forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); - if (EFLAG) evdwl = rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]; - } - else { // special case - register double f = special_lj[ni]; - forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); - if (EFLAG) - evdwl = f * (rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]); - } + } + else { // cut lj + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) { + forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); + if (EFLAG) evdwl = rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]; + } + else { // special case + register double f = special_lj[ni]; + forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); + if (EFLAG) + evdwl = f * (rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]); + } } forcelj *= r2inv; - f[i][0] += delx*forcelj; - f[i][1] += dely*forcelj; - f[i][2] += delz*forcelj; - f[j][0] -= delx*forcelj; - f[j][1] -= dely*forcelj; - f[j][2] -= delz*forcelj; + fxtmp += delx*forcelj; + fytmp += dely*forcelj; + fztmp += delz*forcelj; + f[j].x -= delx*forcelj; + f[j].y -= dely*forcelj; + f[j].z -= delz*forcelj; if (EVFLAG) ev_tally_thr(this,i,j,nlocal, /* newton_pair = */ 1, - evdwl,0.0,forcelj,delx,dely,delz,thr); + evdwl,0.0,forcelj,delx,dely,delz,thr); } @@ -875,211 +878,215 @@ void PairLJLongTIP4PLongOMP::eval(int iifrom, int iito, ThrData * const thr) if (rsq < cut_coulsqplus) { if (itype == typeO || jtype == typeO) { - if (jtype == typeO) { + if (jtype == typeO) { if (hneigh_thr[j].a < 0) { - jH1 = atom->map(atom->tag[j] + 1); - jH2 = atom->map(atom->tag[j] + 2); + jH1 = atom->map(tag[j] + 1); + jH2 = atom->map(tag[j] + 2); if (jH1 == -1 || jH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); - if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) + if (type[jH1] != typeH || type[jH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); // set jH1,jH2 to closest image to O jH1 = domain->closest_image(j,jH1); jH2 = domain->closest_image(j,jH2); compute_newsite_thr(x[j],x[jH1],x[jH2],newsite_thr[j]); - hneigh_thr[j].a = jH1; - hneigh_thr[j].b = jH2; hneigh_thr[j].t = 1; - + hneigh_thr[j].b = jH2; + hneigh_thr[j].a = jH1; } else { jH1 = hneigh_thr[j].a; jH2 = hneigh_thr[j].b; if (hneigh_thr[j].t == 0) { - hneigh_thr[j].t = 1; compute_newsite_thr(x[j],x[jH1],x[jH2],newsite_thr[j]); + hneigh_thr[j].t = 1; } } x2 = newsite_thr[j]; - } else x2 = x[j]; - delx = x1.x - x2.x; - dely = x1.y - x2.y; - delz = x1.z - x2.z; - rsq = delx*delx + dely*dely + delz*delz; + } else x2 = x[j]; + delx = x1.x - x2.x; + dely = x1.y - x2.y; + delz = x1.z - x2.z; + rsq = delx*delx + dely*dely + delz*delz; } - // test current rsq against cutoff and compute Coulombic force + // test current rsq against cutoff and compute Coulombic force if (rsq < cut_coulsq && ORDER1) { - r2inv = 1.0 / rsq; - if (!CTABLE || rsq <= tabinnersq) { - r = sqrt(rsq); - grij = g_ewald * r; - expm2 = exp(-grij*grij); - t = 1.0 / (1.0 + EWALD_P*grij); - erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; - prefactor = qqrd2e * qtmp*q[j]/r; - forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); - if (factor_coul < 1.0) { - forcecoul -= (1.0-factor_coul)*prefactor; - } - } else { - union_int_float_t rsq_lookup; - rsq_lookup.f = rsq; - itable = rsq_lookup.i & ncoulmask; - itable >>= ncoulshiftbits; - fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; - table = ftable[itable] + fraction*dftable[itable]; - forcecoul = qtmp*q[j] * table; - if (factor_coul < 1.0) { - table = ctable[itable] + fraction*dctable[itable]; - prefactor = qtmp*q[j] * table; - forcecoul -= (1.0-factor_coul)*prefactor; - } - } - - cforce = forcecoul * r2inv; - - //if (evflag) ev_tally(i,j,nlocal,newton_pair, - // evdwl,0.0,cforce,delx,dely,delz); + r2inv = 1.0 / rsq; + if (!CTABLE || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) { + forcecoul -= (1.0-factor_coul)*prefactor; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } - // if i,j are not O atoms, force is applied directly - // if i or j are O atoms, force is on fictitious atom & partitioned - // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) - // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f - // preserves total force and torque on water molecule - // virial = sum(r x F) where each water's atoms are near xi and xj - // vlist stores 2,4,6 atoms whose forces contribute to virial + cforce = forcecoul * r2inv; - n = 0; - key = 0; + //if (evflag) ev_tally(i,j,nlocal,newton_pair, + // evdwl,0.0,cforce,delx,dely,delz); - if (itype != typeO) { - f[i][0] += delx * cforce; - f[i][1] += dely * cforce; - f[i][2] += delz * cforce; + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + if (EVFLAG && vflag) { + n = 0; + key = 0; + } - if (EVFLAG) { + if (itype != typeO) { + fxtmp += delx * cforce; + fytmp += dely * cforce; + fztmp += delz * cforce; + + if (EVFLAG && vflag) { v[0] = x[i].x * delx * cforce; v[1] = x[i].y * dely * cforce; v[2] = x[i].z * delz * cforce; v[3] = x[i].x * dely * cforce; v[4] = x[i].x * delz * cforce; v[5] = x[i].y * delz * cforce; + vlist[n++] = i; } - vlist[n++] = i; - } else { - key += 1; - fd[0] = delx*cforce; - fd[1] = dely*cforce; - fd[2] = delz*cforce; + } else { + if (EVFLAG && vflag) key++; + fdx = delx*cforce; + fdy = dely*cforce; + fdz = delz*cforce; - fO[0] = fd[0]*(1 - alpha); - fO[1] = fd[1]*(1 - alpha); - fO[2] = fd[2]*(1 - alpha); + fOx = fdx*(1 - alpha); + fOy = fdy*(1 - alpha); + fOz = fdz*(1 - alpha); - fH[0] = 0.5 * alpha * fd[0]; - fH[1] = 0.5 * alpha * fd[1]; - fH[2] = 0.5 * alpha * fd[2]; + fHx = 0.5 * alpha * fdx; + fHy = 0.5 * alpha * fdy; + fHz = 0.5 * alpha * fdz; - f[i][0] += fO[0]; - f[i][1] += fO[1]; - f[i][2] += fO[2]; + fxtmp += fOx; + fytmp += fOy; + fztmp += fOz; - f[iH1][0] += fH[0]; - f[iH1][1] += fH[1]; - f[iH1][2] += fH[2]; + f[iH1].x += fHx; + f[iH1].y += fHy; + f[iH1].z += fHz; - f[iH2][0] += fH[0]; - f[iH2][1] += fH[1]; - f[iH2][2] += fH[2]; + f[iH2].x += fHx; + f[iH2].y += fHy; + f[iH2].z += fHz; - if (EVFLAG) { + if (EVFLAG && vflag) { xH1 = x[iH1]; xH2 = x[iH2]; - v[0] = x[i].x*fO[0] + xH1.x*fH[0] + xH2.x*fH[0]; - v[1] = x[i].y*fO[1] + xH1.y*fH[1] + xH2.y*fH[1]; - v[2] = x[i].z*fO[2] + xH1.z*fH[2] + xH2.z*fH[2]; - v[3] = x[i].x*fO[1] + xH1.x*fH[1] + xH2.x*fH[1]; - v[4] = x[i].x*fO[2] + xH1.x*fH[2] + xH2.x*fH[2]; - v[5] = x[i].y*fO[2] + xH1.y*fH[2] + xH2.y*fH[2]; - } - vlist[n++] = i; - vlist[n++] = iH1; - vlist[n++] = iH2; - } - - if (jtype != typeO) { - f[j][0] -= delx * cforce; - f[j][1] -= dely * cforce; - f[j][2] -= delz * cforce; - - if (EVFLAG) { - v[0] -= x[j].x * delx * cforce; - v[1] -= x[j].y * dely * cforce; - v[2] -= x[j].z * delz * cforce; - v[3] -= x[j].x * dely * cforce; - v[4] -= x[j].x * delz * cforce; - v[5] -= x[j].y * delz * cforce; + v[0] = x[i].x*fOx + xH1.x*fHx + xH2.x*fHx; + v[1] = x[i].y*fOy + xH1.y*fHy + xH2.y*fHy; + v[2] = x[i].z*fOz + xH1.z*fHz + xH2.z*fHz; + v[3] = x[i].x*fOy + xH1.x*fHy + xH2.x*fHy; + v[4] = x[i].x*fOz + xH1.x*fHz + xH2.x*fHz; + v[5] = x[i].y*fOz + xH1.y*fHz + xH2.y*fHz; + vlist[n++] = i; + vlist[n++] = iH1; + vlist[n++] = iH2; } - vlist[n++] = j; + } - } else { - key += 2; + if (jtype != typeO) { + f[j].x -= delx * cforce; + f[j].y -= dely * cforce; + f[j].z -= delz * cforce; + + if (EVFLAG && vflag) { + v[0] -= x[j].x * delx * cforce; + v[1] -= x[j].y * dely * cforce; + v[2] -= x[j].z * delz * cforce; + v[3] -= x[j].x * dely * cforce; + v[4] -= x[j].x * delz * cforce; + v[5] -= x[j].y * delz * cforce; + vlist[n++] = j; + } - fd[0] = -delx*cforce; - fd[1] = -dely*cforce; - fd[2] = -delz*cforce; + } else { + if (EVFLAG && vflag) key += 2; - fO[0] = fd[0]*(1 - alpha); - fO[1] = fd[1]*(1 - alpha); - fO[2] = fd[2]*(1 - alpha); + fdx = -delx*cforce; + fdy = -dely*cforce; + fdz = -delz*cforce; - fH[0] = 0.5 * alpha * fd[0]; - fH[1] = 0.5 * alpha * fd[1]; - fH[2] = 0.5 * alpha * fd[2]; + fOx = fdx*(1 - alpha); + fOy = fdy*(1 - alpha); + fOz = fdz*(1 - alpha); - f[j][0] += fO[0]; - f[j][1] += fO[1]; - f[j][2] += fO[2]; + fHx = 0.5 * alpha * fdx; + fHy = 0.5 * alpha * fdy; + fHz = 0.5 * alpha * fdz; - f[jH1][0] += fH[0]; - f[jH1][1] += fH[1]; - f[jH1][2] += fH[2]; + f[j].x += fOx; + f[j].y += fOy; + f[j].z += fOz; - f[jH2][0] += fH[0]; - f[jH2][1] += fH[1]; - f[jH2][2] += fH[2]; + f[jH1].x += fHx; + f[jH1].y += fHy; + f[jH1].z += fHz; - if (EVFLAG) { + f[jH2].x += fHx; + f[jH2].y += fHy; + f[jH2].z += fHz; + + if (EVFLAG && vflag) { xH1 = x[jH1]; xH2 = x[jH2]; - v[0] += x[j].x*fO[0] + xH1.x*fH[0] + xH2.x*fH[0]; - v[1] += x[j].y*fO[1] + xH1.y*fH[1] + xH2.y*fH[1]; - v[2] += x[j].z*fO[2] + xH1.z*fH[2] + xH2.z*fH[2]; - v[3] += x[j].x*fO[1] + xH1.x*fH[1] + xH2.x*fH[1]; - v[4] += x[j].x*fO[2] + xH1.x*fH[2] + xH2.x*fH[2]; - v[5] += x[j].y*fO[2] + xH1.y*fH[2] + xH2.y*fH[2]; + v[0] += x[j].x*fOx + xH1.x*fHx + xH2.x*fHx; + v[1] += x[j].y*fOy + xH1.y*fHy + xH2.y*fHy; + v[2] += x[j].z*fOz + xH1.z*fHz + xH2.z*fHz; + v[3] += x[j].x*fOy + xH1.x*fHy + xH2.x*fHy; + v[4] += x[j].x*fOz + xH1.x*fHz + xH2.x*fHz; + v[5] += x[j].y*fOz + xH1.y*fHz + xH2.y*fHz; + vlist[n++] = j; + vlist[n++] = jH1; + vlist[n++] = jH2; } - vlist[n++] = j; - vlist[n++] = jH1; - vlist[n++] = jH2; - } - - if (EFLAG) { - if (!CTABLE || rsq <= tabinnersq) - ecoul = prefactor*erfc; - else { - table = etable[itable] + fraction*detable[itable]; - ecoul = qtmp*q[j] * table; - } - if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } + + if (EFLAG) { + if (!CTABLE || rsq <= tabinnersq) + ecoul = prefactor*erfc; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; } else ecoul = 0.0; if (EVFLAG) ev_tally_list_thr(this,key,vlist,v,ecoul,alpha,thr); - } + } } } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; } } @@ -1090,11 +1097,12 @@ void PairLJLongTIP4PLongOMP::eval_inner(int iifrom, int iito, ThrData * const th double rsq, r2inv, forcecoul = 0.0, forcelj, cforce; const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; - double * const * const f = thr->get_f(); - const double * const q = atom->q; - const int * const type = atom->type; - const double * const special_coul = force->special_coul; - const double * const special_lj = force->special_lj; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const double * _noalias const q = atom->q; + const int * _noalias const type = atom->type; + const tagint * _noalias const tag = atom->tag; + const double * _noalias const special_coul = force->special_coul; + const double * _noalias const special_lj = force->special_lj; const double qqrd2e = force->qqrd2e; const double cut_coulsqplus = (cut_coul+2.0*qdist)*(cut_coul+2.0*qdist); @@ -1111,8 +1119,8 @@ void PairLJLongTIP4PLongOMP::eval_inner(int iifrom, int iito, ThrData * const th int i,j,ii,jj,jnum,itype,jtype; int iH1,iH2,jH1,jH2; - double qtmp,xtmp,ytmp,ztmp,delx,dely,delz; - double fO[3],fH[3],fd[3]; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fxtmp,fytmp,fztmp; + double fOx,fOy,fOz,fHx,fHy,fHz,fdx,fdy,fdz; dbl3_t x1,x2; int *ilist,*jlist,*numneigh,**firstneigh; @@ -1131,22 +1139,35 @@ void PairLJLongTIP4PLongOMP::eval_inner(int iifrom, int iito, ThrData * const th ytmp = x[i].y; ztmp = x[i].z; itype = type[i]; + + // if atom I = water O, set x1 = offset charge site + // else x1 = x of atom I + // NOTE: to make this part thread safe, we need to + // make sure that the hneigh_thr[][] entries only get + // updated, when all data is in place. worst case, + // some calculation is repeated, but since the results + // will be the same, there is no race condition. if (itype == typeO) { if (hneigh_thr[i].a < 0) { - hneigh_thr[i].a = iH1 = atom->map(atom->tag[i] + 1); - hneigh_thr[i].b = iH2 = atom->map(atom->tag[i] + 2); - hneigh_thr[i].t = 1; + iH1 = atom->map(tag[i] + 1); + iH2 = atom->map(tag[i] + 2); if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); - if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + if (type[iH1] != typeH || type[iH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set iH1,iH2 to index of closest image to O + iH1 = domain->closest_image(i,iH1); + iH2 = domain->closest_image(i,iH2); compute_newsite_thr(x[i],x[iH1],x[iH2],newsite_thr[i]); + hneigh_thr[i].t = 1; + hneigh_thr[i].b = iH2; + hneigh_thr[i].a = iH1; } else { iH1 = hneigh_thr[i].a; iH2 = hneigh_thr[i].b; if (hneigh_thr[i].t == 0) { - hneigh_thr[i].t = 1; compute_newsite_thr(x[i],x[iH1],x[iH2],newsite_thr[i]); + hneigh_thr[i].t = 1; } } x1 = newsite_thr[i]; @@ -1155,6 +1176,7 @@ void PairLJLongTIP4PLongOMP::eval_inner(int iifrom, int iito, ThrData * const th jlist = firstneigh[i]; jnum = numneigh[i]; lj1i = lj1[itype]; lj2i = lj2[itype]; + fxtmp=fytmp=fztmp=0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; @@ -1169,12 +1191,12 @@ void PairLJLongTIP4PLongOMP::eval_inner(int iifrom, int iito, ThrData * const th if (rsq < cut_ljsq[itype][jtype] && rsq < cut_out_off_sq ) { // lj r2inv = 1.0/rsq; - register double rn = r2inv*r2inv*r2inv; - if (ni == 0) forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); - else { // special case - register double f = special_lj[ni]; - forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); - } + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); + else { // special case + register double f = special_lj[ni]; + forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); + } if (rsq > cut_out_on_sq) { // switching register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; @@ -1182,12 +1204,12 @@ void PairLJLongTIP4PLongOMP::eval_inner(int iifrom, int iito, ThrData * const th } forcelj *= r2inv; - f[i][0] += delx*forcelj; - f[i][1] += dely*forcelj; - f[i][2] += delz*forcelj; - f[j][0] -= delx*forcelj; - f[j][1] -= dely*forcelj; - f[j][2] -= delz*forcelj; + fxtmp += delx*forcelj; + fytmp += dely*forcelj; + fztmp += delz*forcelj; + f[j].x -= delx*forcelj; + f[j].y -= dely*forcelj; + f[j].z -= delz*forcelj; } @@ -1196,36 +1218,41 @@ void PairLJLongTIP4PLongOMP::eval_inner(int iifrom, int iito, ThrData * const th if (rsq < cut_coulsqplus && order1) { if (itype == typeO || jtype == typeO) { - if (jtype == typeO) { + if (jtype == typeO) { if (hneigh_thr[j].a < 0) { - hneigh_thr[j].a = jH1 = atom->map(atom->tag[j] + 1); - hneigh_thr[j].b = jH2 = atom->map(atom->tag[j] + 2); - hneigh_thr[j].t = 1; + jH1 = atom->map(tag[j] + 1); + jH2 = atom->map(tag[j] + 2); if (jH1 == -1 || jH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); - if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) + if (type[jH1] != typeH || type[jH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set jH1,jH2 to closest image to O + jH1 = domain->closest_image(j,jH1); + jH2 = domain->closest_image(j,jH2); compute_newsite_thr(x[j],x[jH1],x[jH2],newsite_thr[j]); + hneigh_thr[j].t = 1; + hneigh_thr[j].b = jH2; + hneigh_thr[j].a = jH1; } else { jH1 = hneigh_thr[j].a; jH2 = hneigh_thr[j].b; if (hneigh_thr[j].t == 0) { - hneigh_thr[j].t = 1; compute_newsite_thr(x[j],x[jH1],x[jH2],newsite_thr[j]); + hneigh_thr[j].t = 1; } } x2 = newsite_thr[j]; - } else x2 = x[j]; - delx = x1.x - x2.x; - dely = x1.y - x2.y; - delz = x1.z - x2.z; - rsq = delx*delx + dely*dely + delz*delz; + } else x2 = x[j]; + delx = x1.x - x2.x; + dely = x1.y - x2.y; + delz = x1.z - x2.z; + rsq = delx*delx + dely*dely + delz*delz; } - // test current rsq against cutoff and compute Coulombic force + // test current rsq against cutoff and compute Coulombic force if (rsq < cut_coulsq && rsq < cut_out_off_sq) { - r2inv = 1.0 / rsq; + r2inv = 1.0 / rsq; qri = qqrd2e*qtmp; if (ni == 0) forcecoul = qri*q[j]*sqrt(r2inv); else { @@ -1237,83 +1264,86 @@ void PairLJLongTIP4PLongOMP::eval_inner(int iifrom, int iito, ThrData * const th forcecoul *= 1.0 + rsw*rsw*(2.0*rsw-3.0); } - cforce = forcecoul * r2inv; + cforce = forcecoul * r2inv; - //if (evflag) ev_tally(i,j,nlocal,newton_pair, + //if (evflag) ev_tally(i,j,nlocal,newton_pair, // evdwl,0.0,cforce,delx,dely,delz); - // if i,j are not O atoms, force is applied directly - // if i or j are O atoms, force is on fictitious atom & partitioned - // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) - // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f - // preserves total force and torque on water molecule - // virial = sum(r x F) where each water's atoms are near xi and xj - // vlist stores 2,4,6 atoms whose forces contribute to virial - - if (itype != typeO) { - f[i][0] += delx * cforce; - f[i][1] += dely * cforce; - f[i][2] += delz * cforce; - - } else { - fd[0] = delx*cforce; - fd[1] = dely*cforce; - fd[2] = delz*cforce; - - fO[0] = fd[0]*(1 - alpha); - fO[1] = fd[1]*(1 - alpha); - fO[2] = fd[2]*(1 - alpha); - - fH[0] = 0.5 * alpha * fd[0]; - fH[1] = 0.5 * alpha * fd[1]; - fH[2] = 0.5 * alpha * fd[2]; - - f[i][0] += fO[0]; - f[i][1] += fO[1]; - f[i][2] += fO[2]; - - f[iH1][0] += fH[0]; - f[iH1][1] += fH[1]; - f[iH1][2] += fH[2]; - - f[iH2][0] += fH[0]; - f[iH2][1] += fH[1]; - f[iH2][2] += fH[2]; + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + if (itype != typeO) { + fxtmp += delx * cforce; + fytmp += dely * cforce; + fztmp += delz * cforce; + + } else { + fdx = delx*cforce; + fdy = dely*cforce; + fdz = delz*cforce; + + fOx = fdx*(1 - alpha); + fOy = fdy*(1 - alpha); + fOz = fdz*(1 - alpha); + + fHx = 0.5 * alpha * fdx; + fHy = 0.5 * alpha * fdy; + fHz = 0.5 * alpha * fdz; + + fxtmp += fOx; + fytmp += fOy; + fztmp += fOz; + + f[iH1].x += fHx; + f[iH1].y += fHy; + f[iH1].z += fHz; + + f[iH2].x += fHx; + f[iH2].y += fHy; + f[iH2].z += fHz; } - if (jtype != typeO) { - f[j][0] -= delx * cforce; - f[j][1] -= dely * cforce; - f[j][2] -= delz * cforce; + if (jtype != typeO) { + f[j].x -= delx * cforce; + f[j].y -= dely * cforce; + f[j].z -= delz * cforce; - } else { - fd[0] = -delx*cforce; - fd[1] = -dely*cforce; - fd[2] = -delz*cforce; + } else { + fdx = -delx*cforce; + fdy = -dely*cforce; + fdz = -delz*cforce; - fO[0] = fd[0]*(1 - alpha); - fO[1] = fd[1]*(1 - alpha); - fO[2] = fd[2]*(1 - alpha); + fOx = fdx*(1 - alpha); + fOy = fdy*(1 - alpha); + fOz = fdz*(1 - alpha); - fH[0] = 0.5 * alpha * fd[0]; - fH[1] = 0.5 * alpha * fd[1]; - fH[2] = 0.5 * alpha * fd[2]; + fHx = 0.5 * alpha * fdx; + fHy = 0.5 * alpha * fdy; + fHz = 0.5 * alpha * fdz; - f[j][0] += fO[0]; - f[j][1] += fO[1]; - f[j][2] += fO[2]; + f[j].x += fOx; + f[j].y += fOy; + f[j].z += fOz; - f[jH1][0] += fH[0]; - f[jH1][1] += fH[1]; - f[jH1][2] += fH[2]; + f[jH1].x += fHx; + f[jH1].y += fHy; + f[jH1].z += fHz; - f[jH2][0] += fH[0]; - f[jH2][1] += fH[1]; - f[jH2][2] += fH[2]; + f[jH2].x += fHx; + f[jH2].y += fHy; + f[jH2].z += fHz; } - } + } } } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; } } @@ -1324,11 +1354,12 @@ void PairLJLongTIP4PLongOMP::eval_middle(int iifrom, int iito, ThrData * const t double rsq, r2inv, forcecoul,forcelj, cforce; const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; - double * const * const f = thr->get_f(); - const double * const q = atom->q; - const int * const type = atom->type; - const double * const special_coul = force->special_coul; - const double * const special_lj = force->special_lj; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const double * _noalias const q = atom->q; + const int * _noalias const type = atom->type; + const tagint * _noalias const tag = atom->tag; + const double * _noalias const special_coul = force->special_coul; + const double * _noalias const special_lj = force->special_lj; const double qqrd2e = force->qqrd2e; const double cut_coulsqplus = (cut_coul+2.0*qdist)*(cut_coul+2.0*qdist); @@ -1348,8 +1379,8 @@ void PairLJLongTIP4PLongOMP::eval_middle(int iifrom, int iito, ThrData * const t int i,j,ii,jj,jnum,itype,jtype; int iH1,iH2,jH1,jH2; - double qtmp,xtmp,ytmp,ztmp,delx,dely,delz; - double fO[3],fH[3],fd[3]; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,fxtmp,fytmp,fztmp; + double fOx,fOy,fOz,fHx,fHy,fHz,fdx,fdy,fdz; dbl3_t x1,x2; int *ilist,*jlist,*numneigh,**firstneigh; double qri; @@ -1372,20 +1403,25 @@ void PairLJLongTIP4PLongOMP::eval_middle(int iifrom, int iito, ThrData * const t itype = type[i]; if (itype == typeO) { if (hneigh_thr[i].a < 0) { - hneigh_thr[i].a = iH1 = atom->map(atom->tag[i] + 1); - hneigh_thr[i].b = iH2 = atom->map(atom->tag[i] + 2); - hneigh_thr[i].t = 1; + iH1 = atom->map(tag[i] + 1); + iH2 = atom->map(tag[i] + 2); if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); - if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + if (type[iH1] != typeH || type[iH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set iH1,iH2 to index of closest image to O + iH1 = domain->closest_image(i,iH1); + iH2 = domain->closest_image(i,iH2); compute_newsite_thr(x[i],x[iH1],x[iH2],newsite_thr[i]); + hneigh_thr[i].t = 1; + hneigh_thr[i].b = iH2; + hneigh_thr[i].a = iH1; } else { iH1 = hneigh_thr[i].a; iH2 = hneigh_thr[i].b; if (hneigh_thr[i].t == 0) { - hneigh_thr[i].t = 1; compute_newsite_thr(x[i],x[iH1],x[iH2],newsite_thr[i]); + hneigh_thr[i].t = 1; } } x1 = newsite_thr[i]; @@ -1394,6 +1430,7 @@ void PairLJLongTIP4PLongOMP::eval_middle(int iifrom, int iito, ThrData * const t jlist = firstneigh[i]; jnum = numneigh[i]; lj1i = lj1[itype]; lj2i = lj2[itype]; + fxtmp = fytmp = fztmp = 0.0; for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; @@ -1408,12 +1445,12 @@ void PairLJLongTIP4PLongOMP::eval_middle(int iifrom, int iito, ThrData * const t if (rsq < cut_ljsq[itype][jtype] && rsq >= cut_in_off_sq && rsq <= cut_out_off_sq ) { // lj r2inv = 1.0/rsq; - register double rn = r2inv*r2inv*r2inv; - if (ni == 0) forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); - else { // special case - register double f = special_lj[ni]; - forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); - } + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); + else { // special case + register double f = special_lj[ni]; + forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); + } if (rsq < cut_in_on_sq) { // switching register double rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; @@ -1425,12 +1462,12 @@ void PairLJLongTIP4PLongOMP::eval_middle(int iifrom, int iito, ThrData * const t } forcelj *= r2inv; - f[i][0] += delx*forcelj; - f[i][1] += dely*forcelj; - f[i][2] += delz*forcelj; - f[j][0] -= delx*forcelj; - f[j][1] -= dely*forcelj; - f[j][2] -= delz*forcelj; + fxtmp += delx*forcelj; + fytmp += dely*forcelj; + fztmp += delz*forcelj; + f[j].x -= delx*forcelj; + f[j].y -= dely*forcelj; + f[j].z -= delz*forcelj; } @@ -1439,36 +1476,41 @@ void PairLJLongTIP4PLongOMP::eval_middle(int iifrom, int iito, ThrData * const t if (rsq < cut_coulsqplus && order1) { if (itype == typeO || jtype == typeO) { - if (jtype == typeO) { + if (jtype == typeO) { if (hneigh_thr[j].a < 0) { - hneigh_thr[j].a = jH1 = atom->map(atom->tag[j] + 1); - hneigh_thr[j].b = jH2 = atom->map(atom->tag[j] + 2); - hneigh_thr[j].t = 1; + jH1 = atom->map(tag[j] + 1); + jH2 = atom->map(tag[j] + 2); if (jH1 == -1 || jH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); - if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) + if (type[jH1] != typeH || type[jH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set jH1,jH2 to closest image to O + jH1 = domain->closest_image(j,jH1); + jH2 = domain->closest_image(j,jH2); compute_newsite_thr(x[j],x[jH1],x[jH2],newsite_thr[j]); + hneigh_thr[j].t = 1; + hneigh_thr[j].b = jH2; + hneigh_thr[j].a = jH1; } else { jH1 = hneigh_thr[j].a; jH2 = hneigh_thr[j].b; if (hneigh_thr[j].t == 0) { - hneigh_thr[j].t = 1; compute_newsite_thr(x[j],x[jH1],x[jH2],newsite_thr[j]); + hneigh_thr[j].t = 1; } } x2 = newsite_thr[j]; - } else x2 = x[j]; - delx = x1.x - x2.x; - dely = x1.y - x2.y; - delz = x1.z - x2.z; - rsq = delx*delx + dely*dely + delz*delz; + } else x2 = x[j]; + delx = x1.x - x2.x; + dely = x1.y - x2.y; + delz = x1.z - x2.z; + rsq = delx*delx + dely*dely + delz*delz; } - // test current rsq against cutoff and compute Coulombic force + // test current rsq against cutoff and compute Coulombic force if (rsq < cut_coulsq && rsq >= cut_in_off_sq && rsq <= cut_out_off_sq) { - r2inv = 1.0 / rsq; + r2inv = 1.0 / rsq; qri = qqrd2e*qtmp; if (ni == 0) forcecoul = qri*q[j]*sqrt(r2inv); else { @@ -1484,83 +1526,86 @@ void PairLJLongTIP4PLongOMP::eval_middle(int iifrom, int iito, ThrData * const t forcecoul *= 1.0 + rsw*rsw*(2.0*rsw-3.0); } - cforce = forcecoul * r2inv; + cforce = forcecoul * r2inv; - //if (evflag) ev_tally(i,j,nlocal,newton_pair, + //if (evflag) ev_tally(i,j,nlocal,newton_pair, // evdwl,0.0,cforce,delx,dely,delz); - // if i,j are not O atoms, force is applied directly - // if i or j are O atoms, force is on fictitious atom & partitioned - // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) - // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f - // preserves total force and torque on water molecule - // virial = sum(r x F) where each water's atoms are near xi and xj - // vlist stores 2,4,6 atoms whose forces contribute to virial - - if (itype != typeO) { - f[i][0] += delx * cforce; - f[i][1] += dely * cforce; - f[i][2] += delz * cforce; - - } else { - fd[0] = delx*cforce; - fd[1] = dely*cforce; - fd[2] = delz*cforce; - - fO[0] = fd[0]*(1 - alpha); - fO[1] = fd[1]*(1 - alpha); - fO[2] = fd[2]*(1 - alpha); - - fH[0] = 0.5 * alpha * fd[0]; - fH[1] = 0.5 * alpha * fd[1]; - fH[2] = 0.5 * alpha * fd[2]; - - f[i][0] += fO[0]; - f[i][1] += fO[1]; - f[i][2] += fO[2]; - - f[iH1][0] += fH[0]; - f[iH1][1] += fH[1]; - f[iH1][2] += fH[2]; - - f[iH2][0] += fH[0]; - f[iH2][1] += fH[1]; - f[iH2][2] += fH[2]; + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + if (itype != typeO) { + fxtmp += delx * cforce; + fytmp += dely * cforce; + fztmp += delz * cforce; + + } else { + fdx = delx*cforce; + fdy = dely*cforce; + fdz = delz*cforce; + + fOx = fdx*(1 - alpha); + fOy = fdy*(1 - alpha); + fOz = fdz*(1 - alpha); + + fHx = 0.5 * alpha * fdx; + fHy = 0.5 * alpha * fdy; + fHz = 0.5 * alpha * fdz; + + fxtmp += fOx; + fytmp += fOy; + fztmp += fOz; + + f[iH1].x += fHx; + f[iH1].y += fHy; + f[iH1].z += fHz; + + f[iH2].x += fHx; + f[iH2].y += fHy; + f[iH2].z += fHz; } - if (jtype != typeO) { - f[j][0] -= delx * cforce; - f[j][1] -= dely * cforce; - f[j][2] -= delz * cforce; + if (jtype != typeO) { + f[j].x -= delx * cforce; + f[j].y -= dely * cforce; + f[j].z -= delz * cforce; - } else { - fd[0] = -delx*cforce; - fd[1] = -dely*cforce; - fd[2] = -delz*cforce; + } else { + fdx = -delx*cforce; + fdy = -dely*cforce; + fdz = -delz*cforce; - fO[0] = fd[0]*(1 - alpha); - fO[1] = fd[1]*(1 - alpha); - fO[2] = fd[2]*(1 - alpha); + fOx = fdx*(1 - alpha); + fOy = fdy*(1 - alpha); + fOz = fdz*(1 - alpha); - fH[0] = 0.5 * alpha * fd[0]; - fH[1] = 0.5 * alpha * fd[1]; - fH[2] = 0.5 * alpha * fd[2]; + fHx = 0.5 * alpha * fdx; + fHy = 0.5 * alpha * fdy; + fHz = 0.5 * alpha * fdz; - f[j][0] += fO[0]; - f[j][1] += fO[1]; - f[j][2] += fO[2]; + f[j].x += fOx; + f[j].y += fOy; + f[j].z += fOz; - f[jH1][0] += fH[0]; - f[jH1][1] += fH[1]; - f[jH1][2] += fH[2]; + f[jH1].x += fHx; + f[jH1].y += fHy; + f[jH1].z += fHz; - f[jH2][0] += fH[0]; - f[jH2][1] += fH[1]; - f[jH2][2] += fH[2]; + f[jH2].x += fHx; + f[jH2].y += fHy; + f[jH2].z += fHz; } - } + } } } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; } } @@ -1572,25 +1617,28 @@ void PairLJLongTIP4PLongOMP::eval_outer(int iifrom, int iito, ThrData * const th { double evdwl,ecoul,fvirial; evdwl = ecoul = 0.0; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz; + double r2inv,forcecoul,forcelj,cforce, respa_coul, respa_lj, frespa; + double fdx,fdy,fdz,fOx,fOy,fOz,fHx,fHy,fHz; + double v[6]; + dbl3_t x1,x2,xH1,xH2; const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; - double * const * const f = thr->get_f(); - const double * const q = atom->q; - const int * const type = atom->type; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const double * _noalias const q = atom->q; + const int * _noalias const type = atom->type; + const tagint * _noalias const tag = atom->tag; const int nlocal = atom->nlocal; - const double * const special_coul = force->special_coul; - const double * const special_lj = force->special_lj; + const double * _noalias const special_coul = force->special_coul; + const double * _noalias const special_lj = force->special_lj; const double qqrd2e = force->qqrd2e; const double cut_coulsqplus = (cut_coul+2.0*qdist)*(cut_coul+2.0*qdist); - + const int vflag = vflag_atom || vflag_global; + int i,j,ii,jj,jnum,itype,jtype; int n,vlist[6]; int key; int iH1,iH2,jH1,jH2; - double qtmp,xtmp,ytmp,ztmp,delx,dely,delz; - double r2inv,forcecoul,forcelj,cforce, respa_coul, respa_lj, frespa; - double fO[3],fH[3],fd[3],v[6],xH1[3],xH2[3]; - dbl3_t x1,x2; int *ilist,*jlist,*numneigh,**firstneigh; double rsq,qri; int respa_flag; @@ -1606,6 +1654,8 @@ void PairLJLongTIP4PLongOMP::eval_outer(int iifrom, int iito, ThrData * const th const double cut_in_off_sq = cut_in_off*cut_in_off; const double cut_in_on_sq = cut_in_on*cut_in_on; + double fxtmp,fytmp,fztmp; + ilist = listouter->ilist; numneigh = listouter->numneigh; firstneigh = listouter->firstneigh; @@ -1622,20 +1672,25 @@ void PairLJLongTIP4PLongOMP::eval_outer(int iifrom, int iito, ThrData * const th itype = type[i]; if (itype == typeO) { if (hneigh_thr[i].a < 0) { - hneigh_thr[i].a = iH1 = atom->map(atom->tag[i] + 1); - hneigh_thr[i].b = iH2 = atom->map(atom->tag[i] + 2); - hneigh_thr[i].t = 1; + iH1 = atom->map(tag[i] + 1); + iH2 = atom->map(tag[i] + 2); if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); - if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + if (type[iH1] != typeH || type[iH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set iH1,iH2 to closest image to O + iH1 = domain->closest_image(i,iH1); + iH2 = domain->closest_image(i,iH2); + hneigh_thr[i].t = 1; + hneigh_thr[i].b = iH2; + hneigh_thr[i].a = iH1; compute_newsite_thr(x[i],x[iH1],x[iH2],newsite_thr[i]); } else { iH1 = hneigh_thr[i].a; iH2 = hneigh_thr[i].b; if (hneigh_thr[i].t == 0) { - hneigh_thr[i].t = 1; compute_newsite_thr(x[i],x[iH1],x[iH2],newsite_thr[i]); + hneigh_thr[i].t = 1; } } x1 = newsite_thr[i]; @@ -1670,8 +1725,8 @@ void PairLJLongTIP4PLongOMP::eval_outer(int iifrom, int iito, ThrData * const th r2inv = 1.0/rsq; register double rn = r2inv*r2inv*r2inv; if (respa_flag) respa_lj = ni == 0 ? // correct for respa - frespa*rn*(rn*lj1i[jtype]-lj2i[jtype]) : - frespa*rn*(rn*lj1i[jtype]-lj2i[jtype])*special_lj[ni]; + frespa*rn*(rn*lj1i[jtype]-lj2i[jtype]) : + frespa*rn*(rn*lj1i[jtype]-lj2i[jtype])*special_lj[ni]; if (ORDER6) { // long-range form if (!ndisptablebits || rsq <= tabinnerdispsq) { register double x2 = g2*rsq, a2 = 1.0/x2; @@ -1719,17 +1774,17 @@ void PairLJLongTIP4PLongOMP::eval_outer(int iifrom, int iito, ThrData * const th } forcelj *= r2inv; - f[i][0] += delx*forcelj; - f[i][1] += dely*forcelj; - f[i][2] += delz*forcelj; - f[j][0] -= delx*forcelj; - f[j][1] -= dely*forcelj; - f[j][2] -= delz*forcelj; + fxtmp += delx*forcelj; + fytmp += dely*forcelj; + fztmp += delz*forcelj; + f[j].x -= delx*forcelj; + f[j].y -= dely*forcelj; + f[j].z -= delz*forcelj; if (EVFLAG) { fvirial = forcelj + respa_lj*r2inv; ev_tally_thr(this,i,j,nlocal,/*newton_pair = */ 1, - evdwl,0.0,fvirial,delx,dely,delz, thr); + evdwl,0.0,fvirial,delx,dely,delz, thr); } } @@ -1739,33 +1794,38 @@ void PairLJLongTIP4PLongOMP::eval_outer(int iifrom, int iito, ThrData * const th if (rsq < cut_coulsqplus) { if (itype == typeO || jtype == typeO) { - if (jtype == typeO) { + if (jtype == typeO) { if (hneigh_thr[j].a < 0) { - hneigh_thr[j].a = jH1 = atom->map(atom->tag[j] + 1); - hneigh_thr[j].b = jH2 = atom->map(atom->tag[j] + 2); - hneigh_thr[j].t = 1; + jH1 = atom->map(tag[j] + 1); + jH2 = atom->map(tag[j] + 2); if (jH1 == -1 || jH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); - if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) + if (type[jH1] != typeH || type[jH2] != typeH) error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + // set jH1,jH2 to closest image to O + jH1 = domain->closest_image(j,jH1); + jH2 = domain->closest_image(j,jH2); compute_newsite_thr(x[j],x[jH1],x[jH2],newsite_thr[j]); + hneigh_thr[j].t = 1; + hneigh_thr[j].b = jH2; + hneigh_thr[j].a = jH1; } else { jH1 = hneigh_thr[j].a; jH2 = hneigh_thr[j].b; if (hneigh_thr[j].t == 0) { - hneigh_thr[j].t = 1; compute_newsite_thr(x[j],x[jH1],x[jH2],newsite_thr[j]); + hneigh_thr[j].t = 1; } } x2 = newsite_thr[j]; - } else x2 = x[j]; - delx = x1.x - x2.x; - dely = x1.y - x2.y; - delz = x1.z - x2.z; - rsq = delx*delx + dely*dely + delz*delz; + } else x2 = x[j]; + delx = x1.x - x2.x; + dely = x1.y - x2.y; + delz = x1.z - x2.z; + rsq = delx*delx + dely*dely + delz*delz; } - // test current rsq against cutoff and compute Coulombic force + // test current rsq against cutoff and compute Coulombic force if ((rsq < cut_coulsq) && ORDER1) { frespa = 1.0; // check whether and how to compute respa corrections @@ -1819,161 +1879,165 @@ void PairLJLongTIP4PLongOMP::eval_outer(int iifrom, int iito, ThrData * const th fvirial = (forcecoul + respa_coul) * r2inv; // if i,j are not O atoms, force is applied directly - // if i or j are O atoms, force is on fictitious atom & partitioned - // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) - // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f - // preserves total force and torque on water molecule - // virial = sum(r x F) where each water's atoms are near xi and xj - // vlist stores 2,4,6 atoms whose forces contribute to virial - - n = 0; - key = 0; - - if (itype != typeO) { - f[i][0] += delx * cforce; - f[i][1] += dely * cforce; - f[i][2] += delz * cforce; - - if (EVFLAG) { + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + if (EVFLAG && vflag) { + n = 0; + key = 0; + } + + if (itype != typeO) { + fxtmp += delx * cforce; + fytmp += dely * cforce; + fztmp += delz * cforce; + + if (EVFLAG && vflag) { v[0] = x[i].x * delx * fvirial; v[1] = x[i].y * dely * fvirial; v[2] = x[i].z * delz * fvirial; v[3] = x[i].x * dely * fvirial; v[4] = x[i].x * delz * fvirial; v[5] = x[i].y * delz * fvirial; + vlist[n++] = i; } - vlist[n++] = i; - - } else { - key += 1; - fd[0] = delx*cforce; - fd[1] = dely*cforce; - fd[2] = delz*cforce; - - fO[0] = fd[0]*(1 - alpha); - fO[1] = fd[1]*(1 - alpha); - fO[2] = fd[2]*(1 - alpha); - - fH[0] = 0.5 * alpha * fd[0]; - fH[1] = 0.5 * alpha * fd[1]; - fH[2] = 0.5 * alpha * fd[2]; - - f[i][0] += fO[0]; - f[i][1] += fO[1]; - f[i][2] += fO[2]; - - f[iH1][0] += fH[0]; - f[iH1][1] += fH[1]; - f[iH1][2] += fH[2]; - - f[iH2][0] += fH[0]; - f[iH2][1] += fH[1]; - f[iH2][2] += fH[2]; - - if (EVFLAG) { - - fd[0] = delx*fvirial; - fd[1] = dely*fvirial; - fd[2] = delz*fvirial; - - fO[0] = fd[0]*(1 - alpha); - fO[1] = fd[1]*(1 - alpha); - fO[2] = fd[2]*(1 - alpha); - - fH[0] = 0.5 * alpha * fd[0]; - fH[1] = 0.5 * alpha * fd[1]; - fH[2] = 0.5 * alpha * fd[2]; - - domain->closest_image(&x[i].x,&x[iH1].x,xH1); - domain->closest_image(&x[i].x,&x[iH2].x,xH2); - - v[0] = x[i].x*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0]; - v[1] = x[i].y*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1]; - v[2] = x[i].z*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2]; - v[3] = x[i].x*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1]; - v[4] = x[i].x*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2]; - v[5] = x[i].y*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2]; - } - vlist[n++] = i; - vlist[n++] = iH1; - vlist[n++] = iH2; - } - - if (jtype != typeO) { - f[j][0] -= delx * cforce; - f[j][1] -= dely * cforce; - f[j][2] -= delz * cforce; - - if (EVFLAG) { - v[0] -= x[j].x * delx * fvirial; - v[1] -= x[j].y * dely * fvirial; - v[2] -= x[j].z * delz * fvirial; - v[3] -= x[j].x * dely * fvirial; - v[4] -= x[j].x * delz * fvirial; - v[5] -= x[j].y * delz * fvirial; - } - vlist[n++] = j; - } else { - key += 2; + } else { + if (EVFLAG && vflag) key += 1; - fd[0] = -delx*cforce; - fd[1] = -dely*cforce; - fd[2] = -delz*cforce; + fdx = delx*cforce; + fdy = dely*cforce; + fdz = delz*cforce; - fO[0] = fd[0]*(1 - alpha); - fO[1] = fd[1]*(1 - alpha); - fO[2] = fd[2]*(1 - alpha); + fOx = fdx*(1 - alpha); + fOy = fdy*(1 - alpha); + fOz = fdz*(1 - alpha); - fH[0] = 0.5 * alpha * fd[0]; - fH[1] = 0.5 * alpha * fd[1]; - fH[2] = 0.5 * alpha * fd[2]; + fHx = 0.5*alpha * fdx; + fHy = 0.5*alpha * fdy; + fHz = 0.5*alpha * fdz; - f[j][0] += fO[0]; - f[j][1] += fO[1]; - f[j][2] += fO[2]; + fxtmp += fOx; + fytmp += fOy; + fztmp += fOz; - f[jH1][0] += fH[0]; - f[jH1][1] += fH[1]; - f[jH1][2] += fH[2]; + f[iH1].x += fHx; + f[iH1].y += fHy; + f[iH1].z += fHz; - f[jH2][0] += fH[0]; - f[jH2][1] += fH[1]; - f[jH2][2] += fH[2]; + f[iH2].x += fHx; + f[iH2].y += fHy; + f[iH2].z += fHz; - if (EVFLAG) { + if (EVFLAG && vflag) { + xH1 = x[iH1]; + xH2 = x[iH2]; - fd[0] = -delx*fvirial; - fd[1] = -dely*fvirial; - fd[2] = -delz*fvirial; + fdx = delx*fvirial; + fdy = dely*fvirial; + fdz = delz*fvirial; + + fOx = fdx*(1 - alpha); + fOy = fdy*(1 - alpha); + fOz = fdz*(1 - alpha); + + fHx = 0.5 * alpha * fdx; + fHy = 0.5 * alpha * fdy; + fHz = 0.5 * alpha * fdz; + + v[0] = x[i].x*fOx + xH1.x*fHx + xH2.x*fHx; + v[1] = x[i].y*fOy + xH1.y*fHy + xH2.y*fHy; + v[2] = x[i].z*fOz + xH1.z*fHz + xH2.z*fHz; + v[3] = x[i].x*fOy + xH1.x*fHy + xH2.x*fHy; + v[4] = x[i].x*fOz + xH1.x*fHz + xH2.x*fHz; + v[5] = x[i].y*fOz + xH1.y*fHz + xH2.y*fHz; + vlist[n++] = i; + vlist[n++] = iH1; + vlist[n++] = iH2; + } + } - fO[0] = fd[0]*(1 - alpha); - fO[1] = fd[1]*(1 - alpha); - fO[2] = fd[2]*(1 - alpha); + if (jtype != typeO) { + f[j].x -= delx * cforce; + f[j].y -= dely * cforce; + f[j].z -= delz * cforce; + + if (EVFLAG && vflag) { + v[0] -= x[j].x * delx * fvirial; + v[1] -= x[j].y * dely * fvirial; + v[2] -= x[j].z * delz * fvirial; + v[3] -= x[j].x * dely * fvirial; + v[4] -= x[j].x * delz * fvirial; + v[5] -= x[j].y * delz * fvirial; + vlist[n++] = j; + } - fH[0] = 0.5 * alpha * fd[0]; - fH[1] = 0.5 * alpha * fd[1]; - fH[2] = 0.5 * alpha * fd[2]; + } else { + if (EVFLAG && vflag) key += 2; + + fdx = -delx*cforce; + fdy = -dely*cforce; + fdz = -delz*cforce; + + fOx = fdx*(1 - alpha); + fOy = fdy*(1 - alpha); + fOz = fdz*(1 - alpha); + + fHx = 0.5 * alpha * fdx; + fHy = 0.5 * alpha * fdy; + fHz = 0.5 * alpha * fdz; + + f[j].x += fOx; + f[j].y += fOy; + f[j].z += fOz; - domain->closest_image(&x[j].x,&x[jH1].x,xH1); - domain->closest_image(&x[j].x,&x[jH2].x,xH2); + f[jH1].x += fHx; + f[jH1].y += fHy; + f[jH1].z += fHz; - v[0] += x[j].x*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0]; - v[1] += x[j].y*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1]; - v[2] += x[j].z*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2]; - v[3] += x[j].x*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1]; - v[4] += x[j].x*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2]; - v[5] += x[j].y*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2]; + f[jH2].x += fHx; + f[jH2].y += fHy; + f[jH2].z += fHz; + + if (EVFLAG && vflag) { + xH1 = x[jH1]; + xH2 = x[jH2]; + + fdx = -delx*fvirial; + fdy = -dely*fvirial; + fdz = -delz*fvirial; + + fOx = fdx*(1 - alpha); + fOy = fdy*(1 - alpha); + fOz = fdz*(1 - alpha); + + fHx = 0.5 * alpha * fdx; + fHy = 0.5 * alpha * fdy; + fHz = 0.5 * alpha * fdz; + + v[0] += x[j].x*fOx + xH1.x*fHx + xH2.x*fHx; + v[1] += x[j].y*fOy + xH1.y*fHy + xH2.y*fHy; + v[2] += x[j].z*fOz + xH1.z*fHz + xH2.z*fHz; + v[3] += x[j].x*fOy + xH1.x*fHy + xH2.x*fHy; + v[4] += x[j].x*fOz + xH1.x*fHz + xH2.x*fHz; + v[5] += x[j].y*fOz + xH1.y*fHz + xH2.y*fHz; + vlist[n++] = j; + vlist[n++] = jH1; + vlist[n++] = jH2; } - vlist[n++] = j; - vlist[n++] = jH1; - vlist[n++] = jH2; - } + } if (EVFLAG) ev_tally_list_thr(this,key,vlist,v,ecoul,alpha,thr); } } } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; } } diff --git a/src/USER-OMP/pair_lj_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_omp.cpp deleted file mode 100644 index bd9d5220b8f26cb21b3da11d7e835965b2d84d61..0000000000000000000000000000000000000000 --- a/src/USER-OMP/pair_lj_sf_omp.cpp +++ /dev/null @@ -1,164 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - This software is distributed under the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Axel Kohlmeyer (Temple U) -------------------------------------------------------------------------- */ - -#include <math.h> -#include "pair_lj_sf_omp.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" - -#include "suffix.h" -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJShiftedForceOMP::PairLJShiftedForceOMP(LAMMPS *lmp) : - PairLJShiftedForce(lmp), ThrOMP(lmp, THR_PAIR) -{ - suffix_flag |= Suffix::OMP; - respa_enable = 0; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJShiftedForceOMP::compute(int eflag, int vflag) -{ - if (eflag || vflag) { - ev_setup(eflag,vflag); - } else evflag = vflag_fdotr = 0; - - const int nall = atom->nlocal + atom->nghost; - const int nthreads = comm->nthreads; - const int inum = list->inum; - -#if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) -#endif - { - int ifrom, ito, tid; - - loop_setup_thr(ifrom, ito, tid, inum, nthreads); - ThrData *thr = fix->get_thr(tid); - thr->timer(Timer::START); - ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); - - if (evflag) { - if (eflag) { - if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); - else eval<1,1,0>(ifrom, ito, thr); - } else { - if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); - else eval<1,0,0>(ifrom, ito, thr); - } - } else { - if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); - else eval<0,0,0>(ifrom, ito, thr); - } - - thr->timer(Timer::PAIR); - reduce_thr(this, eflag, vflag, thr); - } // end of omp parallel region -} - -template <int EVFLAG, int EFLAG, int NEWTON_PAIR> -void PairLJShiftedForceOMP::eval(int iifrom, int iito, ThrData * const thr) -{ - int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double t,rsq,r2inv,r6inv,forcelj,factor_lj; - int *ilist,*jlist,*numneigh,**firstneigh; - - evdwl = 0.0; - - const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; - dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; - const int * _noalias const type = atom->type; - const int nlocal = atom->nlocal; - const double * _noalias const special_lj = force->special_lj; - double fxtmp,fytmp,fztmp; - - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; - - // loop over neighbors of my atoms - - for (ii = iifrom; ii < iito; ++ii) { - - i = ilist[ii]; - xtmp = x[i].x; - ytmp = x[i].y; - ztmp = x[i].z; - itype = type[i]; - jlist = firstneigh[i]; - jnum = numneigh[i]; - fxtmp=fytmp=fztmp=0.0; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; - j &= NEIGHMASK; - - delx = xtmp - x[j].x; - dely = ytmp - x[j].y; - delz = ztmp - x[j].z; - rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; - - if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - t = sqrt(rsq)/cut[itype][jtype]; - - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]) - - t*foffset[itype][jtype]; - - fpair = factor_lj*forcelj*r2inv; - - fxtmp += delx*fpair; - fytmp += dely*fpair; - fztmp += delz*fpair; - if (NEWTON_PAIR || j < nlocal) { - f[j].x -= delx*fpair; - f[j].y -= dely*fpair; - f[j].z -= delz*fpair; - } - - if (EFLAG) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + - (t-1.0)*foffset[itype][jtype] - offset[itype][jtype]; - evdwl *= factor_lj; - } - - if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,0.0,fpair,delx,dely,delz,thr); - } - } - f[i].x += fxtmp; - f[i].y += fytmp; - f[i].z += fztmp; - } -} - -/* ---------------------------------------------------------------------- */ - -double PairLJShiftedForceOMP::memory_usage() -{ - double bytes = memory_usage_thr(); - bytes += PairLJShiftedForce::memory_usage(); - - return bytes; -} diff --git a/src/USER-OMP/pair_lj_smooth_linear_omp.h b/src/USER-OMP/pair_lj_smooth_linear_omp.h index 940c0ea707e36ed460be359a86d5d175c6272015..874e42eb9f70b20df88061816251b023ba9f8a2e 100644 --- a/src/USER-OMP/pair_lj_smooth_linear_omp.h +++ b/src/USER-OMP/pair_lj_smooth_linear_omp.h @@ -18,6 +18,7 @@ #ifdef PAIR_CLASS PairStyle(lj/smooth/linear/omp,PairLJSmoothLinearOMP) +PairStyle(lj/sf/omp,PairLJSmoothLinearOMP) #else diff --git a/src/USER-OMP/pair_reaxc_omp.cpp b/src/USER-OMP/pair_reaxc_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0fb24ed5f212ac7840cab36cd17a5b53f2de6fa7 --- /dev/null +++ b/src/USER-OMP/pair_reaxc_omp.cpp @@ -0,0 +1,603 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Hasan Metin Aktulga, Purdue University + (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov) + Per-atom energy/virial added by Ray Shan (Sandia) + Fix reax/c/bonds and fix reax/c/species for pair_style reax/c added by + Ray Shan (Sandia) +------------------------------------------------------------------------- */ + +#include "pair_reaxc_omp.h" +#include "atom.h" +#include "update.h" +#include "force.h" +#include "comm.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "modify.h" +#include "fix.h" +#include "fix_reaxc.h" +#include "citeme.h" +#include "memory.h" +#include "error.h" +#include "timer.h" + +#include "reaxc_types.h" +#include "reaxc_allocate.h" +#include "reaxc_control.h" +#include "reaxc_ffield.h" +#include "reaxc_forces_omp.h" +#include "reaxc_init_md_omp.h" +#include "reaxc_io_tools.h" +#include "reaxc_list.h" +#include "reaxc_lookup.h" +#include "reaxc_reset_tools.h" +#include "reaxc_tool_box.h" +#include "reaxc_traj.h" +#include "reaxc_vector.h" +#include "fix_reaxc_bonds.h" + +#if defined(_OPENMP) +#include <omp.h> +#endif + +using namespace LAMMPS_NS; + +#ifdef OMP_TIMING +double ompTimingData[LASTTIMINGINDEX]; +int ompTimingCount[LASTTIMINGINDEX]; +int ompTimingCGCount[LASTTIMINGINDEX]; +#endif + +/* ---------------------------------------------------------------------- */ + +PairReaxCOMP::PairReaxCOMP(LAMMPS *lmp) : PairReaxC(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + system->pair_ptr = this; + + num_nbrs_offset = NULL; + +#ifdef OMP_TIMING + for (int i=0;i<LASTTIMINGINDEX;i++) { + ompTimingData[i] = 0; + ompTimingCount[i] = 0; + ompTimingCGCount[i] = 0; + } +#endif +} + +/* ---------------------------------------------------------------------- */ + +PairReaxCOMP::~PairReaxCOMP() +{ + reax_list * bonds = lists+BONDS; + for (int i=0; i<bonds->num_intrs; ++i) + sfree(bonds->select.bond_list[i].bo_data.CdboReduction, "CdboReduction"); + + memory->destroy(num_nbrs_offset); + +#ifdef OMP_TIMING + int myrank; + + MPI_Comm_rank(mpi_data->world,&myrank); + + // Write screen output + if (timer->has_full() && myrank == 0 && screen) { + fprintf(screen,"\n\nWrite_Lists took %11.3lf seconds", ompTimingData[COMPUTEWLINDEX]); + + fprintf(screen,"\n\nCompute_Forces took %11.3lf seconds:", ompTimingData[COMPUTEINDEX]); + fprintf(screen,"\n ->Initial Forces: %11.3lf seconds", ompTimingData[COMPUTEIFINDEX]); + fprintf(screen,"\n ->Bond Order: %11.3lf seconds", ompTimingData[COMPUTEBOINDEX]); + fprintf(screen,"\n ->Atom Energy: %11.3lf seconds", ompTimingData[COMPUTEATOMENERGYINDEX]); + fprintf(screen,"\n ->Bond: %11.3lf seconds", ompTimingData[COMPUTEBONDSINDEX]); + fprintf(screen,"\n ->Hydrogen bonds: %11.3lf seconds", ompTimingData[COMPUTEHBONDSINDEX]); + fprintf(screen,"\n ->Torsion Angles: %11.3lf seconds", ompTimingData[COMPUTETORSIONANGLESBOINDEX]); + fprintf(screen,"\n ->Valence Angles: %11.3lf seconds", ompTimingData[COMPUTEVALENCEANGLESBOINDEX]); + fprintf(screen,"\n ->Non-Bonded For: %11.3lf seconds", ompTimingData[COMPUTENBFINDEX]); + fprintf(screen,"\n ->Total Forces: %11.3lf seconds", ompTimingData[COMPUTETFINDEX]); + + fprintf(screen,"\n\nfixQEQ: %11.3lf seconds", ompTimingData[COMPUTEQEQINDEX]); + fprintf(screen,"\n ->QEQ init: %11.3lf seconds", ompTimingData[COMPUTEINITMVINDEX]); + + double avg = double(ompTimingCGCount[COMPUTECG1INDEX]) / double(ompTimingCount[COMPUTECG1INDEX]); + fprintf(screen,"\n ->QEQ CG1: %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG1INDEX], avg); + + avg = double(ompTimingCGCount[COMPUTECG2INDEX]) / double(ompTimingCount[COMPUTECG2INDEX]); + fprintf(screen,"\n ->QEQ CG2: %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG2INDEX], avg); + fprintf(screen,"\n ->QEQ CalcQ: %11.3lf seconds\n", ompTimingData[COMPUTECALCQINDEX]); + } + + // Write logfile output + if (timer->has_full() && myrank == 0 && logfile) { + fprintf(logfile,"\n\nWrite_Lists took %11.3lf seconds", ompTimingData[COMPUTEWLINDEX]); + + fprintf(logfile,"\n\nCompute_Forces took %11.3lf seconds:", ompTimingData[COMPUTEINDEX]); + fprintf(logfile,"\n ->Initial Forces: %11.3lf seconds", ompTimingData[COMPUTEIFINDEX]); + fprintf(logfile,"\n ->Bond Order: %11.3lf seconds", ompTimingData[COMPUTEBOINDEX]); + fprintf(logfile,"\n ->Atom Energy: %11.3lf seconds", ompTimingData[COMPUTEATOMENERGYINDEX]); + fprintf(logfile,"\n ->Bond: %11.3lf seconds", ompTimingData[COMPUTEBONDSINDEX]); + fprintf(logfile,"\n ->Hydrogen bonds: %11.3lf seconds", ompTimingData[COMPUTEHBONDSINDEX]); + fprintf(logfile,"\n ->Torsion Angles: %11.3lf seconds", ompTimingData[COMPUTETORSIONANGLESBOINDEX]); + fprintf(logfile,"\n ->Valence Angles: %11.3lf seconds", ompTimingData[COMPUTEVALENCEANGLESBOINDEX]); + fprintf(logfile,"\n ->Non-Bonded For: %11.3lf seconds", ompTimingData[COMPUTENBFINDEX]); + fprintf(logfile,"\n ->Total Forces: %11.3lf seconds", ompTimingData[COMPUTETFINDEX]); + + fprintf(logfile,"\n\nfixQEQ: %11.3lf seconds", ompTimingData[COMPUTEQEQINDEX]); + fprintf(logfile,"\n ->QEQ init: %11.3lf seconds", ompTimingData[COMPUTEINITMVINDEX]); + + double avg = double(ompTimingCGCount[COMPUTECG1INDEX]) / double(ompTimingCount[COMPUTECG1INDEX]); + fprintf(logfile,"\n ->QEQ CG1: %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG1INDEX], avg); + + avg = double(ompTimingCGCount[COMPUTECG2INDEX]) / double(ompTimingCount[COMPUTECG2INDEX]); + fprintf(logfile,"\n ->QEQ CG2: %11.3lf seconds with %4.1lf iterations on average.", ompTimingData[COMPUTECG2INDEX], avg); + fprintf(logfile,"\n ->QEQ CalcQ: %11.3lf seconds\n", ompTimingData[COMPUTECALCQINDEX]); + } +#endif +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::compute(int eflag, int vflag) +{ + double evdwl,ecoul; + double t_start, t_end; + + // communicate num_bonds once every reneighboring + // 2 num arrays stored by fix, grab ptr to them + + if (neighbor->ago == 0) comm->forward_comm_fix(fix_reax); + int *num_bonds = fix_reax->num_bonds; + int *num_hbonds = fix_reax->num_hbonds; + + evdwl = ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else ev_unset(); + + if (vflag_global) control->virial = 1; + else control->virial = 0; + + system->n = atom->nlocal; // my atoms + system->N = atom->nlocal + atom->nghost; // mine + ghosts + system->bigN = static_cast<int> (atom->natoms); // all atoms in the system + + system->big_box.V = 0; + system->big_box.box_norms[0] = 0; + system->big_box.box_norms[1] = 0; + system->big_box.box_norms[2] = 0; + if( comm->me == 0 ) t_start = MPI_Wtime(); + // setup data structures + + setup(); + + Reset( system, control, data, workspace, &lists, world ); + + // Why not update workspace like in MPI-only code? + // Using the MPI-only way messes up the hb energy + //workspace->realloc.num_far = write_reax_lists(); + write_reax_lists(); + + // timing for filling in the reax lists + if( comm->me == 0 ) { + t_end = MPI_Wtime(); + data->timing.nbrs = t_end - t_start; + } + + // forces + +#ifdef OMP_TIMING + double startTimeBase,endTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + Compute_ForcesOMP(system,control,data,workspace,&lists,out_control,mpi_data); + read_reax_forces(vflag); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEINDEX] += (endTimeBase-startTimeBase); +#endif + +#if defined(_OPENMP) +#pragma omp parallel for schedule(static) +#endif + for(int k = 0; k < system->N; ++k) { + num_bonds[k] = system->my_atoms[k].num_bonds; + num_hbonds[k] = system->my_atoms[k].num_hbonds; + } + + // energies and pressure + + if (eflag_global) { + evdwl += data->my_en.e_bond; + evdwl += data->my_en.e_ov; + evdwl += data->my_en.e_un; + evdwl += data->my_en.e_lp; + evdwl += data->my_en.e_ang; + evdwl += data->my_en.e_pen; + evdwl += data->my_en.e_coa; + evdwl += data->my_en.e_hb; + evdwl += data->my_en.e_tor; + evdwl += data->my_en.e_con; + evdwl += data->my_en.e_vdW; + + ecoul += data->my_en.e_ele; + ecoul += data->my_en.e_pol; + + // Store the different parts of the energy + // in a list for output by compute pair command + + pvector[0] = data->my_en.e_bond; + pvector[1] = data->my_en.e_ov + data->my_en.e_un; + pvector[2] = data->my_en.e_lp; + pvector[3] = 0.0; + pvector[4] = data->my_en.e_ang; + pvector[5] = data->my_en.e_pen; + pvector[6] = data->my_en.e_coa; + pvector[7] = data->my_en.e_hb; + pvector[8] = data->my_en.e_tor; + pvector[9] = data->my_en.e_con; + pvector[10] = data->my_en.e_vdW; + pvector[11] = data->my_en.e_ele; + pvector[12] = 0.0; + pvector[13] = data->my_en.e_pol; + } + + if (vflag_fdotr) virial_fdotr_compute(); + +// Set internal timestep counter to that of LAMMPS + + data->step = update->ntimestep; + + Output_Results( system, control, data, &lists, out_control, mpi_data ); + + // populate tmpid and tmpbo arrays for fix reax/c/species + int i, j; + + if(fixspecies_flag) { + if (system->N > nmax) { + memory->destroy(tmpid); + memory->destroy(tmpbo); + nmax = system->N; + memory->create(tmpid,nmax,MAXSPECBOND,"pair:tmpid"); + memory->create(tmpbo,nmax,MAXSPECBOND,"pair:tmpbo"); + } + +#if defined(_OPENMP) +#pragma omp parallel for collapse(2) schedule(static) default(shared) +#endif + for (i = 0; i < system->N; i ++) + for (j = 0; j < MAXSPECBOND; j ++) { + tmpbo[i][j] = 0.0; + tmpid[i][j] = 0; + } + + FindBond(); + } +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::init_style( ) +{ + if (!atom->q_flag) + error->all(FLERR,"Pair reax/c/omp requires atom attribute q"); + + // firstwarn = 1; + + int iqeq = modify->find_fix_by_style("qeq/reax/omp"); + if (iqeq < 0 && qeqflag == 1) + error->all(FLERR,"Pair reax/c/omp requires use of fix qeq/reax/omp"); + + system->n = atom->nlocal; // my atoms + system->N = atom->nlocal + atom->nghost; // mine + ghosts + system->bigN = static_cast<int> (atom->natoms); // all atoms in the system + system->wsize = comm->nprocs; + + system->big_box.V = 0; + system->big_box.box_norms[0] = 0; + system->big_box.box_norms[1] = 0; + system->big_box.box_norms[2] = 0; + + if (atom->tag_enable == 0) + error->all(FLERR,"Pair style reax/c/omp requires atom IDs"); + if (force->newton_pair == 0) + error->all(FLERR,"Pair style reax/c/omp requires newton pair on"); + + // need a half neighbor list w/ Newton off and ghost neighbors + // built whenever re-neighboring occurs + + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->newton = 2; + neighbor->requests[irequest]->ghost = 1; + + cutmax = MAX3(control->nonb_cut, control->hbond_cut, 2*control->bond_cut); + + for( int i = 0; i < LIST_N; ++i ) + lists[i].allocated = 0; + + if (fix_reax == NULL) { + char **fixarg = new char*[3]; + fixarg[0] = (char *) "REAXC"; + fixarg[1] = (char *) "all"; + fixarg[2] = (char *) "REAXC"; + modify->add_fix(3,fixarg); + delete [] fixarg; + fix_reax = (FixReaxC *) modify->fix[modify->nfix-1]; + } + +#if defined(_OPENMP) + control->nthreads = omp_get_max_threads(); +#else + control->nthreads = 1; +#endif +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::setup( ) +{ + int oldN; + int mincap = system->mincap; + double safezone = system->safezone; + + system->n = atom->nlocal; // my atoms + system->N = atom->nlocal + atom->nghost; // mine + ghosts + oldN = system->N; + system->bigN = static_cast<int> (atom->natoms); // all atoms in the system + + if (system->N > nmax) { + memory->destroy(num_nbrs_offset); + // Don't update nmax here. It is updated at end of compute(). + memory->create(num_nbrs_offset, system->N, "pair:num_nbrs_offset"); + } + + if (setup_flag == 0) { + + setup_flag = 1; + + int *num_bonds = fix_reax->num_bonds; + int *num_hbonds = fix_reax->num_hbonds; + + control->vlist_cut = neighbor->cutneighmax; + + // determine the local and total capacity + + system->local_cap = MAX( (int)(system->n * safezone), mincap ); + system->total_cap = MAX( (int)(system->N * safezone), mincap ); + + // initialize my data structures + + PreAllocate_Space( system, control, workspace, world ); + write_reax_atoms(); + + int num_nbrs = estimate_reax_lists(); + if(!Make_List(system->total_cap, num_nbrs, TYP_FAR_NEIGHBOR, + lists+FAR_NBRS, world)) + error->all(FLERR,"Pair reax/c problem in far neighbor list"); + + write_reax_lists(); + + InitializeOMP( system, control, data, workspace, &lists, out_control, + mpi_data, world ); + + for( int k = 0; k < system->N; ++k ) { + num_bonds[k] = system->my_atoms[k].num_bonds; + num_hbonds[k] = system->my_atoms[k].num_hbonds; + } + + } else { + + // fill in reax datastructures + + write_reax_atoms(); + + // reset the bond list info for new atoms + + for(int k = oldN; k < system->N; ++k) + Set_End_Index( k, Start_Index( k, lists+BONDS ), lists+BONDS ); + + // estimate far neighbor list size + // Not present in MPI-only version + workspace->realloc.num_far = estimate_reax_lists(); + + // check if I need to shrink/extend my data-structs + + ReAllocate( system, control, data, workspace, &lists, mpi_data ); + } +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::write_reax_atoms() +{ + int *num_bonds = fix_reax->num_bonds; + int *num_hbonds = fix_reax->num_hbonds; + + if (system->N > system->total_cap) + error->all(FLERR,"Too many ghost atoms"); + +#if defined(_OPENMP) +#pragma omp parallel for schedule(static) default(shared) +#endif + for( int i = 0; i < system->N; ++i ){ + system->my_atoms[i].orig_id = atom->tag[i]; + system->my_atoms[i].type = map[atom->type[i]]; + system->my_atoms[i].x[0] = atom->x[i][0]; + system->my_atoms[i].x[1] = atom->x[i][1]; + system->my_atoms[i].x[2] = atom->x[i][2]; + system->my_atoms[i].q = atom->q[i]; + system->my_atoms[i].num_bonds = num_bonds[i]; + system->my_atoms[i].num_hbonds = num_hbonds[i]; + } +} + +/* ---------------------------------------------------------------------- */ + +int PairReaxCOMP::estimate_reax_lists() +{ + int i; + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int numall = list->inum + list->gnum; + int mincap = system->mincap; + + // for good performance in the OpenMP implementation, each thread needs + // to know where to place the neighbors of the atoms it is responsible for. + // The sumscan values for the list->numneigh will be used to determine the + // neighbor offset of each atom. Note that this may cause some significant + // memory overhead if delayed neighboring is used - so it may be desirable + // to work on this part to reduce the memory footprint of the far_nbrs list. + + int num_nbrs = 0; + + for (int itr_i = 0; itr_i < numall; ++itr_i) { + i = ilist[itr_i]; + num_nbrs += numneigh[i]; + } + + int new_estimate = MAX (num_nbrs, mincap*MIN_NBRS); + + return new_estimate; +} + +/* ---------------------------------------------------------------------- */ + +int PairReaxCOMP::write_reax_lists() +{ +#ifdef OMP_TIMING + double startTimeBase, endTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int itr_i, itr_j, i, j, num_mynbrs; + int *jlist; + double d_sqr, dist, cutoff_sqr; + rvec dvec; + + double **x = atom->x; + int *ilist = list->ilist; + int *numneigh = list->numneigh; + int **firstneigh = list->firstneigh; + reax_list *far_nbrs = lists + FAR_NBRS; + far_neighbor_data *far_list = far_nbrs->select.far_nbr_list; + + int num_nbrs = 0; + int inum = list->inum; + int gnum = list->gnum; + int numall = inum + gnum; + + // sumscan of the number of neighbors per atom to determine the offsets + // most likely, we are overallocating. desirable to work on this part + // to reduce the memory footprint of the far_nbrs list. + + num_nbrs = 0; + + for (itr_i = 0; itr_i < numall; ++itr_i) { + i = ilist[itr_i]; + num_nbrs_offset[i] = num_nbrs; + num_nbrs += numneigh[i]; + } + +#if defined(_OPENMP) +#pragma omp parallel for schedule(dynamic,50) default(shared) \ + private(itr_i, itr_j, i, j, jlist, cutoff_sqr, num_mynbrs, d_sqr, dvec, dist) +#endif + for (itr_i = 0; itr_i < numall; ++itr_i) { + i = ilist[itr_i]; + jlist = firstneigh[i]; + Set_Start_Index( i, num_nbrs_offset[i], far_nbrs ); + + if (i < inum) + cutoff_sqr = control->nonb_cut*control->nonb_cut; + else + cutoff_sqr = control->bond_cut*control->bond_cut; + + num_mynbrs = 0; + + for (itr_j = 0; itr_j < numneigh[i]; ++itr_j) { + j = jlist[itr_j]; + j &= NEIGHMASK; + get_distance( x[j], x[i], &d_sqr, &dvec ); + + if (d_sqr <= cutoff_sqr) { + dist = sqrt( d_sqr ); + set_far_nbr( &far_list[num_nbrs_offset[i] + num_mynbrs], j, dist, dvec ); + ++num_mynbrs; + } + } + Set_End_Index( i, num_nbrs_offset[i] + num_mynbrs, far_nbrs ); + } + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEWLINDEX] += (endTimeBase-startTimeBase); +#endif + + return num_nbrs; +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::read_reax_forces(int vflag) +{ +#if defined(_OPENMP) +#pragma omp parallel for schedule(static) default(shared) +#endif + for( int i = 0; i < system->N; ++i ) { + system->my_atoms[i].f[0] = workspace->f[i][0]; + system->my_atoms[i].f[1] = workspace->f[i][1]; + system->my_atoms[i].f[2] = workspace->f[i][2]; + + atom->f[i][0] = -workspace->f[i][0]; + atom->f[i][1] = -workspace->f[i][1]; + atom->f[i][2] = -workspace->f[i][2]; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairReaxCOMP::FindBond() +{ + int i, ii, j, pj, jtag, nj, jtmp, jj; + double bo_tmp, bo_cut, rij, rsq; + + bond_data *bo_ij; + bo_cut = 0.10; + +#if defined(_OPENMP) +#pragma omp parallel for schedule(static) default(shared) \ + private(i, nj, pj, bo_ij, j, bo_tmp) +#endif + for (i = 0; i < system->n; i++) { + nj = 0; + for( pj = Start_Index(i, lists); pj < End_Index(i, lists); ++pj ) { + bo_ij = &( lists->select.bond_list[pj] ); + j = bo_ij->nbr; + if (j < i) continue; + + bo_tmp = bo_ij->bo_data.BO; + + if (bo_tmp >= bo_cut ) { + tmpid[i][nj] = j; + tmpbo[i][nj] = bo_tmp; + nj ++; + if (nj > MAXSPECBOND) error->all(FLERR,"Increase MAXSPECBOND in fix_reaxc_species.h"); + } + } + } +} + diff --git a/src/USER-OMP/pair_reaxc_omp.h b/src/USER-OMP/pair_reaxc_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..a5e077c309601a59bd71f9b659eb5af34b1e6468 --- /dev/null +++ b/src/USER-OMP/pair_reaxc_omp.h @@ -0,0 +1,113 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Hasan Metin Aktulga, Purdue University + (now at Lawrence Berkeley National Laboratory, hmaktulga@lbl.gov) + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(reax/c/omp,PairReaxCOMP) + +#else + +#ifndef LMP_PAIR_REAXC_OMP_H +#define LMP_PAIR_REAXC_OMP_H + +#include "pair_reaxc.h" +#include "thr_omp.h" +#include "suffix.h" + +namespace LAMMPS_NS { + +class PairReaxCOMP : public PairReaxC, public ThrOMP { + public: + PairReaxCOMP(class LAMMPS *); + ~PairReaxCOMP(); + virtual void compute(int, int); + virtual void init_style(); + + inline FixOMP *getFixOMP() { + return fix; + }; + + inline void ev_setup_thr_proxy(int eflagparm, int vflagparm, int nallparm, + double *eatomparm, double **vatomparm, ThrData *thrparm) { + ev_setup_thr(eflagparm, vflagparm, nallparm, eatomparm, vatomparm, thrparm); + }; + + // reduce per thread data as needed + inline void reduce_thr_proxy(void * const styleparm, const int eflagparm, + const int vflagparm, ThrData * const thrparm) { + reduce_thr(styleparm, eflagparm, vflagparm, thrparm); + } + + inline void ev_tally_thr_proxy(Pair * const pairparm, const int iparm, const int jparm, + const int nlocalparm, const int newton_pairparm, + const double evdwlparm, const double ecoulparm, + const double fpairparm, const double delxparm, + const double delyparm, const double delzparm, + ThrData * const thrparm) { + ev_tally_thr(pairparm, iparm, jparm, nlocalparm, newton_pairparm, + evdwlparm, ecoulparm, fpairparm, delxparm, delyparm, delzparm, thrparm); + } + + inline void ev_tally_xyz_thr_proxy(Pair * const pairparm, const int iparm, const int jparm, + const int nlocalparm, const int newton_pairparm, + const double evdwlparm, const double ecoulparm, + const double fxparm, const double fyparm, const double fzparm, + const double delxparm, const double delyparm, + const double delzparm, ThrData * const thrparm) { + ev_tally_xyz_thr(pairparm, iparm, jparm, nlocalparm, newton_pairparm, + evdwlparm, ecoulparm, fxparm, fyparm, fzparm, + delxparm, delyparm, delzparm, thrparm); + } + + inline void ev_tally3_thr_proxy(Pair * const pairparm,int i, int j, int k, + double evdwl, double ecoul, double *fj, double *fk, + double *drji, double *drki, ThrData * const thrparm) { + ev_tally3_thr(pairparm, i, j, k, evdwl, ecoul, fj, fk, drji, drki, thrparm); + } + + protected: + virtual void setup(); + virtual void write_reax_atoms(); + virtual int estimate_reax_lists(); + virtual int write_reax_lists(); + virtual void read_reax_forces(int); + virtual void FindBond(); + + // work array used in write_reax_lists() + int * num_nbrs_offset; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Too many ghost atoms + +Number of ghost atoms has increased too much during simulation and has exceeded +the size of reax/c arrays. Increase safe_zone and min_cap in pair_style reax/c +command + +*/ diff --git a/src/USER-OMP/reaxc_bond_orders_omp.cpp b/src/USER-OMP/reaxc_bond_orders_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..222c00980ec53e04e8140ab68f13b924480c81c0 --- /dev/null +++ b/src/USER-OMP/reaxc_bond_orders_omp.cpp @@ -0,0 +1,736 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include "reaxc_types.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_vector.h" + +#if defined(_OPENMP) +#include <omp.h> +#endif + +using namespace LAMMPS_NS; + +void Add_dBond_to_ForcesOMP( reax_system *system, int i, int pj, + storage *workspace, reax_list **lists ) { + reax_list *bonds = (*lists) + BONDS; + bond_data *nbr_j, *nbr_k; + bond_order_data *bo_ij, *bo_ji; + dbond_coefficients coef; + int pk, k, j; + + PairReaxCOMP *pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr); + +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + long reductionOffset = (system->N * tid); + + /* Virial Tallying variables */ + double f_scaler; + rvec fi_tmp, fj_tmp, fk_tmp, delij, delji, delki, delkj, temp; + + /* Initializations */ + nbr_j = &(bonds->select.bond_list[pj]); + j = nbr_j->nbr; + bo_ij = &(nbr_j->bo_data); + bo_ji = &(bonds->select.bond_list[ nbr_j->sym_index ].bo_data); + + double c = bo_ij->Cdbo + bo_ji->Cdbo; + coef.C1dbo = bo_ij->C1dbo * c; + coef.C2dbo = bo_ij->C2dbo * c; + coef.C3dbo = bo_ij->C3dbo * c; + + c = bo_ij->Cdbopi + bo_ji->Cdbopi; + coef.C1dbopi = bo_ij->C1dbopi * c; + coef.C2dbopi = bo_ij->C2dbopi * c; + coef.C3dbopi = bo_ij->C3dbopi * c; + coef.C4dbopi = bo_ij->C4dbopi * c; + + c = bo_ij->Cdbopi2 + bo_ji->Cdbopi2; + coef.C1dbopi2 = bo_ij->C1dbopi2 * c; + coef.C2dbopi2 = bo_ij->C2dbopi2 * c; + coef.C3dbopi2 = bo_ij->C3dbopi2 * c; + coef.C4dbopi2 = bo_ij->C4dbopi2 * c; + + c = workspace->CdDelta[i] + workspace->CdDelta[j]; + coef.C1dDelta = bo_ij->C1dbo * c; + coef.C2dDelta = bo_ij->C2dbo * c; + coef.C3dDelta = bo_ij->C3dbo * c; + + // The same "c" refactoring here can be replicated below in Add_dBond_to_Forces_NPTOMP(), but + // I'd prefer to wait for a test to verify changes before doing so (just to be safe). + + // forces on i + // rvec_Scale( temp, coef.C1dbo, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C2dbo, workspace->dDeltap_self[i] ); + // rvec_ScaledAdd( temp, coef.C1dDelta, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C2dDelta, workspace->dDeltap_self[i] ); + // rvec_ScaledAdd( temp, coef.C1dbopi, bo_ij->dln_BOp_pi ); + // rvec_ScaledAdd( temp, coef.C2dbopi, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C3dbopi, workspace->dDeltap_self[i]); + // rvec_ScaledAdd( temp, coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); + // rvec_ScaledAdd( temp, coef.C2dbopi2, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C3dbopi2, workspace->dDeltap_self[i] ); + + c = (coef.C1dbo + coef.C1dDelta + coef.C2dbopi + coef.C2dbopi2); + rvec_Scale( temp, c, bo_ij->dBOp ); + + c = (coef.C2dbo + coef.C2dDelta + coef.C3dbopi + coef.C3dbopi2); + rvec_ScaledAdd( temp, c, workspace->dDeltap_self[i] ); + + rvec_ScaledAdd( temp, coef.C1dbopi, bo_ij->dln_BOp_pi ); + rvec_ScaledAdd( temp, coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); + + rvec_Add(workspace->forceReduction[reductionOffset+i],temp ); + + if( system->pair_ptr->vflag_atom) { + rvec_Scale(fi_tmp, -1.0, temp); + rvec_ScaledSum( delij, 1., system->my_atoms[i].x,-1., system->my_atoms[j].x ); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,i,j,system->N,0,0,0, + fi_tmp[0],fi_tmp[1],fi_tmp[2], + delij[0],delij[1],delij[2],thr); + } + + // forces on j + // rvec_Scale( temp, -coef.C1dbo, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C3dbo, workspace->dDeltap_self[j] ); + // rvec_ScaledAdd( temp, -coef.C1dDelta, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C3dDelta, workspace->dDeltap_self[j]); + // rvec_ScaledAdd( temp, -coef.C1dbopi, bo_ij->dln_BOp_pi ); + // rvec_ScaledAdd( temp, -coef.C2dbopi, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C4dbopi, workspace->dDeltap_self[j]); + // rvec_ScaledAdd( temp, -coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); + // rvec_ScaledAdd( temp, -coef.C2dbopi2, bo_ij->dBOp ); + // rvec_ScaledAdd( temp, coef.C4dbopi2, workspace->dDeltap_self[j]); + + + c = -(coef.C1dbo + coef.C1dDelta + coef.C2dbopi + coef.C2dbopi2); + rvec_Scale( temp, c, bo_ij->dBOp ); + + c = (coef.C3dbo + coef.C3dDelta + coef.C4dbopi + coef.C4dbopi2); + rvec_ScaledAdd( temp, c, workspace->dDeltap_self[j] ); + + rvec_ScaledAdd( temp, -coef.C1dbopi, bo_ij->dln_BOp_pi ); + rvec_ScaledAdd( temp, -coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); + + + rvec_Add(workspace->forceReduction[reductionOffset+j],temp ); + + if( system->pair_ptr->vflag_atom) { + rvec_Scale(fj_tmp, -1.0, temp); + rvec_ScaledSum( delji, 1., system->my_atoms[j].x,-1., system->my_atoms[i].x ); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,j,i,system->N,0,0,0, + fj_tmp[0],fj_tmp[1],fj_tmp[2], + delji[0],delji[1],delji[2],thr); + } + + // forces on k: i neighbor + for( pk = Start_Index(i, bonds); pk < End_Index(i, bonds); ++pk ) { + nbr_k = &(bonds->select.bond_list[pk]); + k = nbr_k->nbr; + + // rvec_Scale( temp, -coef.C2dbo, nbr_k->bo_data.dBOp); + // rvec_ScaledAdd( temp, -coef.C2dDelta, nbr_k->bo_data.dBOp); + // rvec_ScaledAdd( temp, -coef.C3dbopi, nbr_k->bo_data.dBOp); + // rvec_ScaledAdd( temp, -coef.C3dbopi2, nbr_k->bo_data.dBOp); + + const double c = -(coef.C2dbo + coef.C2dDelta + coef.C3dbopi + coef.C3dbopi2); + rvec_Scale(temp, c, nbr_k->bo_data.dBOp); + + rvec_Add(workspace->forceReduction[reductionOffset+k],temp ); + + if( system->pair_ptr->vflag_atom ) { + rvec_Scale(fk_tmp, -1.0, temp); + rvec_ScaledSum(delki,1.,system->my_atoms[k].x,-1.,system->my_atoms[i].x); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,i,system->N,0,0,0, + fk_tmp[0],fk_tmp[1],fk_tmp[2], + delki[0],delki[1],delki[2],thr); + rvec_ScaledSum(delkj,1.,system->my_atoms[k].x,-1.,system->my_atoms[j].x); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,j,system->N,0,0,0, + fk_tmp[0],fk_tmp[1],fk_tmp[2], + delkj[0],delkj[1],delkj[2],thr); + } + } + + // forces on k: j neighbor + for( pk = Start_Index(j, bonds); pk < End_Index(j, bonds); ++pk ) { + nbr_k = &(bonds->select.bond_list[pk]); + k = nbr_k->nbr; + + // rvec_Scale( temp, -coef.C3dbo, nbr_k->bo_data.dBOp ); + // rvec_ScaledAdd( temp, -coef.C3dDelta, nbr_k->bo_data.dBOp); + // rvec_ScaledAdd( temp, -coef.C4dbopi, nbr_k->bo_data.dBOp); + // rvec_ScaledAdd( temp, -coef.C4dbopi2, nbr_k->bo_data.dBOp); + + const double c = -(coef.C3dbo + coef.C3dDelta + coef.C4dbopi + coef.C4dbopi2); + rvec_Scale(temp, c, nbr_k->bo_data.dBOp); + + rvec_Add(workspace->forceReduction[reductionOffset+k],temp ); + + if( system->pair_ptr->vflag_atom ) { + rvec_Scale(fk_tmp, -1.0, temp); + rvec_ScaledSum(delki,1.,system->my_atoms[k].x,-1.,system->my_atoms[i].x); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,i,system->N,0,0,0, + fk_tmp[0],fk_tmp[1],fk_tmp[2], + delki[0],delki[1],delki[2],thr); + + rvec_ScaledSum(delkj,1.,system->my_atoms[k].x,-1.,system->my_atoms[j].x); + + pair_reax_ptr->ev_tally_xyz_thr_proxy(system->pair_ptr,k,j,system->N,0,0,0, + fk_tmp[0],fk_tmp[1],fk_tmp[2], + delkj[0],delkj[1],delkj[2],thr); + } + } +} + +/* ---------------------------------------------------------------------- */ + +void Add_dBond_to_Forces_NPTOMP( reax_system *system, int i, int pj, simulation_data *data, + storage *workspace, reax_list **lists ) { + reax_list *bonds = (*lists) + BONDS; + bond_data *nbr_j, *nbr_k; + bond_order_data *bo_ij, *bo_ji; + dbond_coefficients coef; + rvec temp, ext_press; + ivec rel_box; + int pk, k, j; + + PairReaxCOMP *pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr); + +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + long reductionOffset = (system->N * tid); + + /* Initializations */ + nbr_j = &(bonds->select.bond_list[pj]); + j = nbr_j->nbr; + bo_ij = &(nbr_j->bo_data); + bo_ji = &(bonds->select.bond_list[ nbr_j->sym_index ].bo_data); + + coef.C1dbo = bo_ij->C1dbo * (bo_ij->Cdbo + bo_ji->Cdbo); + coef.C2dbo = bo_ij->C2dbo * (bo_ij->Cdbo + bo_ji->Cdbo); + coef.C3dbo = bo_ij->C3dbo * (bo_ij->Cdbo + bo_ji->Cdbo); + + coef.C1dbopi = bo_ij->C1dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi); + coef.C2dbopi = bo_ij->C2dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi); + coef.C3dbopi = bo_ij->C3dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi); + coef.C4dbopi = bo_ij->C4dbopi * (bo_ij->Cdbopi + bo_ji->Cdbopi); + + coef.C1dbopi2 = bo_ij->C1dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2); + coef.C2dbopi2 = bo_ij->C2dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2); + coef.C3dbopi2 = bo_ij->C3dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2); + coef.C4dbopi2 = bo_ij->C4dbopi2 * (bo_ij->Cdbopi2 + bo_ji->Cdbopi2); + + coef.C1dDelta = bo_ij->C1dbo * (workspace->CdDelta[i]+workspace->CdDelta[j]); + coef.C2dDelta = bo_ij->C2dbo * (workspace->CdDelta[i]+workspace->CdDelta[j]); + coef.C3dDelta = bo_ij->C3dbo * (workspace->CdDelta[i]+workspace->CdDelta[j]); + + + /************************************ + * forces related to atom i * + * first neighbors of atom i * + ************************************/ + for( pk = Start_Index(i, bonds); pk < End_Index(i, bonds); ++pk ) { + nbr_k = &(bonds->select.bond_list[pk]); + k = nbr_k->nbr; + + rvec_Scale(temp, -coef.C2dbo, nbr_k->bo_data.dBOp); /*2nd, dBO*/ + rvec_ScaledAdd(temp, -coef.C2dDelta, nbr_k->bo_data.dBOp);/*dDelta*/ + rvec_ScaledAdd(temp, -coef.C3dbopi, nbr_k->bo_data.dBOp); /*3rd, dBOpi*/ + rvec_ScaledAdd(temp, -coef.C3dbopi2, nbr_k->bo_data.dBOp);/*3rd, dBOpi2*/ + + /* force */ + rvec_Add(workspace->forceReduction[reductionOffset+k],temp ); + + /* pressure */ + rvec_iMultiply( ext_press, nbr_k->rel_box, temp ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + + /* then atom i itself */ + rvec_Scale( temp, coef.C1dbo, bo_ij->dBOp ); /*1st,dBO*/ + rvec_ScaledAdd( temp, coef.C2dbo, workspace->dDeltap_self[i] ); /*2nd,dBO*/ + rvec_ScaledAdd( temp, coef.C1dDelta, bo_ij->dBOp ); /*1st,dBO*/ + rvec_ScaledAdd( temp, coef.C2dDelta, workspace->dDeltap_self[i] );/*2nd,dBO*/ + rvec_ScaledAdd( temp, coef.C1dbopi, bo_ij->dln_BOp_pi ); /*1st,dBOpi*/ + rvec_ScaledAdd( temp, coef.C2dbopi, bo_ij->dBOp ); /*2nd,dBOpi*/ + rvec_ScaledAdd( temp, coef.C3dbopi, workspace->dDeltap_self[i]);/*3rd,dBOpi*/ + + rvec_ScaledAdd( temp, coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); /*1st,dBO_pi2*/ + rvec_ScaledAdd( temp, coef.C2dbopi2, bo_ij->dBOp ); /*2nd,dBO_pi2*/ + rvec_ScaledAdd( temp, coef.C3dbopi2, workspace->dDeltap_self[i] );/*3rd*/ + + /* force */ + rvec_Add(workspace->forceReduction[reductionOffset+i],temp ); + + for( pk = Start_Index(j, bonds); pk < End_Index(j, bonds); ++pk ) { + nbr_k = &(bonds->select.bond_list[pk]); + k = nbr_k->nbr; + + rvec_Scale( temp, -coef.C3dbo, nbr_k->bo_data.dBOp ); /*3rd,dBO*/ + rvec_ScaledAdd( temp, -coef.C3dDelta, nbr_k->bo_data.dBOp);/*dDelta*/ + rvec_ScaledAdd( temp, -coef.C4dbopi, nbr_k->bo_data.dBOp); /*4th,dBOpi*/ + rvec_ScaledAdd( temp, -coef.C4dbopi2, nbr_k->bo_data.dBOp);/*4th,dBOpi2*/ + + /* force */ + rvec_Add(workspace->forceReduction[reductionOffset+k],temp ); + + /* pressure */ + if( k != i ) { + ivec_Sum( rel_box, nbr_k->rel_box, nbr_j->rel_box ); //rel_box(k, i) + rvec_iMultiply( ext_press, rel_box, temp ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + } + + /* then atom j itself */ + rvec_Scale( temp, -coef.C1dbo, bo_ij->dBOp ); /*1st, dBO*/ + rvec_ScaledAdd( temp, coef.C3dbo, workspace->dDeltap_self[j] ); /*2nd, dBO*/ + rvec_ScaledAdd( temp, -coef.C1dDelta, bo_ij->dBOp ); /*1st, dBO*/ + rvec_ScaledAdd( temp, coef.C3dDelta, workspace->dDeltap_self[j]);/*2nd, dBO*/ + + rvec_ScaledAdd( temp, -coef.C1dbopi, bo_ij->dln_BOp_pi ); /*1st,dBOpi*/ + rvec_ScaledAdd( temp, -coef.C2dbopi, bo_ij->dBOp ); /*2nd,dBOpi*/ + rvec_ScaledAdd( temp, coef.C4dbopi, workspace->dDeltap_self[j]);/*3rd,dBOpi*/ + + rvec_ScaledAdd( temp, -coef.C1dbopi2, bo_ij->dln_BOp_pi2 ); /*1st,dBOpi2*/ + rvec_ScaledAdd( temp, -coef.C2dbopi2, bo_ij->dBOp ); /*2nd,dBOpi2*/ + rvec_ScaledAdd( temp,coef.C4dbopi2,workspace->dDeltap_self[j]);/*3rd,dBOpi2*/ + + /* force */ + rvec_Add(workspace->forceReduction[reductionOffset+j],temp ); + + /* pressure */ + rvec_iMultiply( ext_press, nbr_j->rel_box, temp ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); +} + +/* ---------------------------------------------------------------------- */ + +int BOp_OMP( storage *workspace, reax_list *bonds, double bo_cut, + int i, int btop_i, far_neighbor_data *nbr_pj, + single_body_parameters *sbp_i, single_body_parameters *sbp_j, + two_body_parameters *twbp, + int btop_j, double C12, double C34, double C56, double BO, double BO_s, double BO_pi, double BO_pi2) { + int j; + double rr2; + double Cln_BOp_s, Cln_BOp_pi, Cln_BOp_pi2; + bond_data *ibond, *jbond; + bond_order_data *bo_ij, *bo_ji; + + j = nbr_pj->nbr; + rr2 = 1.0 / SQR(nbr_pj->d); + + // Top portion of BOp() moved to reaxc_forces_omp.cpp::Init_Forces_noQEq_OMP() + + /* Initially BO values are the uncorrected ones, page 1 */ + + /****** bonds i-j and j-i ******/ + ibond = &( bonds->select.bond_list[btop_i] ); + jbond = &( bonds->select.bond_list[btop_j] ); + + ibond->nbr = j; + jbond->nbr = i; + ibond->d = nbr_pj->d; + jbond->d = nbr_pj->d; + rvec_Copy( ibond->dvec, nbr_pj->dvec ); + rvec_Scale( jbond->dvec, -1, nbr_pj->dvec ); + ivec_Copy( ibond->rel_box, nbr_pj->rel_box ); + ivec_Scale( jbond->rel_box, -1, nbr_pj->rel_box ); + ibond->dbond_index = btop_i; + jbond->dbond_index = btop_i; + ibond->sym_index = btop_j; + jbond->sym_index = btop_i; + + bo_ij = &( ibond->bo_data ); + bo_ji = &( jbond->bo_data ); + bo_ji->BO = bo_ij->BO = BO; + bo_ji->BO_s = bo_ij->BO_s = BO_s; + bo_ji->BO_pi = bo_ij->BO_pi = BO_pi; + bo_ji->BO_pi2 = bo_ij->BO_pi2 = BO_pi2; + + /* Bond Order page2-3, derivative of total bond order prime */ + Cln_BOp_s = twbp->p_bo2 * C12 * rr2; + Cln_BOp_pi = twbp->p_bo4 * C34 * rr2; + Cln_BOp_pi2 = twbp->p_bo6 * C56 * rr2; + + /* Only dln_BOp_xx wrt. dr_i is stored here, note that + dln_BOp_xx/dr_i = -dln_BOp_xx/dr_j and all others are 0 */ + rvec_Scale(bo_ij->dln_BOp_s,-bo_ij->BO_s*Cln_BOp_s,ibond->dvec); + rvec_Scale(bo_ij->dln_BOp_pi,-bo_ij->BO_pi*Cln_BOp_pi,ibond->dvec); + rvec_Scale(bo_ij->dln_BOp_pi2, + -bo_ij->BO_pi2*Cln_BOp_pi2,ibond->dvec); + rvec_Scale(bo_ji->dln_BOp_s, -1., bo_ij->dln_BOp_s); + rvec_Scale(bo_ji->dln_BOp_pi, -1., bo_ij->dln_BOp_pi ); + rvec_Scale(bo_ji->dln_BOp_pi2, -1., bo_ij->dln_BOp_pi2 ); + + rvec_Scale( bo_ij->dBOp, + -(bo_ij->BO_s * Cln_BOp_s + + bo_ij->BO_pi * Cln_BOp_pi + + bo_ij->BO_pi2 * Cln_BOp_pi2), ibond->dvec ); + rvec_Scale( bo_ji->dBOp, -1., bo_ij->dBOp ); + + bo_ij->BO_s -= bo_cut; + bo_ij->BO -= bo_cut; + bo_ji->BO_s -= bo_cut; + bo_ji->BO -= bo_cut; + + bo_ij->Cdbo = bo_ij->Cdbopi = bo_ij->Cdbopi2 = 0.0; + bo_ji->Cdbo = bo_ji->Cdbopi = bo_ji->Cdbopi2 = 0.0; + + return 1; +} + +/* ---------------------------------------------------------------------- */ + +void BOOMP( reax_system *system, control_params *control, simulation_data *data, + storage *workspace, reax_list **lists, output_controls *out_control ) +{ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + double p_lp1 = system->reax_param.gp.l[15]; + int num_bonds = 0; + double p_boc1 = system->reax_param.gp.l[0]; + double p_boc2 = system->reax_param.gp.l[1]; + reax_list *bonds = (*lists) + BONDS; + int natoms = system->N; + int nthreads = control->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) +#endif + { + int i, j, pj, type_i, type_j; + int start_i, end_i, sym_index; + double val_i, Deltap_i, Deltap_boc_i; + double val_j, Deltap_j, Deltap_boc_j; + double f1, f2, f3, f4, f5, f4f5, exp_f4, exp_f5; + double exp_p1i, exp_p2i, exp_p1j, exp_p2j, explp1; + double temp, u1_ij, u1_ji, Cf1A_ij, Cf1B_ij, Cf1_ij, Cf1_ji; + double Cf45_ij, Cf45_ji; //u_ij, u_ji + double A0_ij, A1_ij, A2_ij, A2_ji, A3_ij, A3_ji; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + bond_order_data *bo_ij, *bo_ji; + +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + /* Calculate Deltaprime, Deltaprime_boc values */ +#if defined(_OPENMP) +#pragma omp for schedule(static) +#endif + for (i = 0; i < system->N; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + sbp_i = &(system->reax_param.sbp[type_i]); + workspace->Deltap[i] = workspace->total_bond_order[i] - sbp_i->valency; + workspace->Deltap_boc[i] = + workspace->total_bond_order[i] - sbp_i->valency_val; + + workspace->total_bond_order[i] = 0; + } + + // Wait till initialization complete +#if defined(_OPENMP) +#pragma omp barrier +#endif + + /* Corrected Bond Order calculations */ +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for (i = 0; i < system->N; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + sbp_i = &(system->reax_param.sbp[type_i]); + val_i = sbp_i->valency; + Deltap_i = workspace->Deltap[i]; + Deltap_boc_i = workspace->Deltap_boc[i]; + start_i = Start_Index(i, bonds); + end_i = End_Index(i, bonds); + + for (pj = start_i; pj < end_i; ++pj) { + j = bonds->select.bond_list[pj].nbr; + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + bo_ij = &( bonds->select.bond_list[pj].bo_data ); + + if( i < j || workspace->bond_mark[j] > 3) { + twbp = &( system->reax_param.tbp[type_i][type_j] ); + + if( twbp->ovc < 0.001 && twbp->v13cor < 0.001 ) { + bo_ij->C1dbo = 1.000000; + bo_ij->C2dbo = 0.000000; + bo_ij->C3dbo = 0.000000; + + bo_ij->C1dbopi = bo_ij->BO_pi; + bo_ij->C2dbopi = 0.000000; + bo_ij->C3dbopi = 0.000000; + bo_ij->C4dbopi = 0.000000; + + bo_ij->C1dbopi2 = bo_ij->BO_pi2; + bo_ij->C2dbopi2 = 0.000000; + bo_ij->C3dbopi2 = 0.000000; + bo_ij->C4dbopi2 = 0.000000; + + } + else { + val_j = system->reax_param.sbp[type_j].valency; + Deltap_j = workspace->Deltap[j]; + Deltap_boc_j = workspace->Deltap_boc[j]; + + /* on page 1 */ + if( twbp->ovc >= 0.001 ) { + /* Correction for overcoordination */ + exp_p1i = exp( -p_boc1 * Deltap_i ); + exp_p2i = exp( -p_boc2 * Deltap_i ); + exp_p1j = exp( -p_boc1 * Deltap_j ); + exp_p2j = exp( -p_boc2 * Deltap_j ); + + f2 = exp_p1i + exp_p1j; + f3 = -1.0 / p_boc2 * log( 0.5 * ( exp_p2i + exp_p2j ) ); + f1 = 0.5 * ( ( val_i + f2 )/( val_i + f2 + f3 ) + + ( val_j + f2 )/( val_j + f2 + f3 ) ); + + /* Now come the derivates */ + /* Bond Order pages 5-7, derivative of f1 */ + temp = f2 + f3; + u1_ij = val_i + temp; + u1_ji = val_j + temp; + Cf1A_ij = 0.5 * f3 * (1.0 / SQR( u1_ij ) + + 1.0 / SQR( u1_ji )); + Cf1B_ij = -0.5 * (( u1_ij - f3 ) / SQR( u1_ij ) + + ( u1_ji - f3 ) / SQR( u1_ji )); + + Cf1_ij = 0.50 * ( -p_boc1 * exp_p1i / u1_ij - + ((val_i+f2) / SQR(u1_ij)) * + ( -p_boc1 * exp_p1i + + exp_p2i / ( exp_p2i + exp_p2j ) ) + + -p_boc1 * exp_p1i / u1_ji - + ((val_j+f2) / SQR(u1_ji)) * + ( -p_boc1 * exp_p1i + + exp_p2i / ( exp_p2i + exp_p2j ) )); + + + Cf1_ji = -Cf1A_ij * p_boc1 * exp_p1j + + Cf1B_ij * exp_p2j / ( exp_p2i + exp_p2j ); + } + else { + /* No overcoordination correction! */ + f1 = 1.0; + Cf1_ij = Cf1_ji = 0.0; + } + + if( twbp->v13cor >= 0.001 ) { + /* Correction for 1-3 bond orders */ + exp_f4 =exp(-(twbp->p_boc4 * SQR( bo_ij->BO ) - + Deltap_boc_i) * twbp->p_boc3 + twbp->p_boc5); + exp_f5 =exp(-(twbp->p_boc4 * SQR( bo_ij->BO ) - + Deltap_boc_j) * twbp->p_boc3 + twbp->p_boc5); + + f4 = 1. / (1. + exp_f4); + f5 = 1. / (1. + exp_f5); + f4f5 = f4 * f5; + + /* Bond Order pages 8-9, derivative of f4 and f5 */ + Cf45_ij = -f4 * exp_f4; + Cf45_ji = -f5 * exp_f5; + } + else { + f4 = f5 = f4f5 = 1.0; + Cf45_ij = Cf45_ji = 0.0; + } + + /* Bond Order page 10, derivative of total bond order */ + A0_ij = f1 * f4f5; + A1_ij = -2 * twbp->p_boc3 * twbp->p_boc4 * bo_ij->BO * + (Cf45_ij + Cf45_ji); + A2_ij = Cf1_ij / f1 + twbp->p_boc3 * Cf45_ij; + A2_ji = Cf1_ji / f1 + twbp->p_boc3 * Cf45_ji; + A3_ij = A2_ij + Cf1_ij / f1; + A3_ji = A2_ji + Cf1_ji / f1; + + /* find corrected bond orders and their derivative coef */ + bo_ij->BO = bo_ij->BO * A0_ij; + bo_ij->BO_pi = bo_ij->BO_pi * A0_ij *f1; + bo_ij->BO_pi2= bo_ij->BO_pi2* A0_ij *f1; + bo_ij->BO_s = bo_ij->BO - ( bo_ij->BO_pi + bo_ij->BO_pi2 ); + + bo_ij->C1dbo = A0_ij + bo_ij->BO * A1_ij; + bo_ij->C2dbo = bo_ij->BO * A2_ij; + bo_ij->C3dbo = bo_ij->BO * A2_ji; + + bo_ij->C1dbopi = f1*f1*f4*f5; + bo_ij->C2dbopi = bo_ij->BO_pi * A1_ij; + bo_ij->C3dbopi = bo_ij->BO_pi * A3_ij; + bo_ij->C4dbopi = bo_ij->BO_pi * A3_ji; + + bo_ij->C1dbopi2 = f1*f1*f4*f5; + bo_ij->C2dbopi2 = bo_ij->BO_pi2 * A1_ij; + bo_ij->C3dbopi2 = bo_ij->BO_pi2 * A3_ij; + bo_ij->C4dbopi2 = bo_ij->BO_pi2 * A3_ji; + } + + /* neglect bonds that are < 1e-10 */ + if( bo_ij->BO < 1e-10 ) + bo_ij->BO = 0.0; + if( bo_ij->BO_s < 1e-10 ) + bo_ij->BO_s = 0.0; + if( bo_ij->BO_pi < 1e-10 ) + bo_ij->BO_pi = 0.0; + if( bo_ij->BO_pi2 < 1e-10 ) + bo_ij->BO_pi2 = 0.0; + + workspace->total_bond_order[i] += bo_ij->BO; //now keeps total_BO + } + // else { + // /* We only need to update bond orders from bo_ji + // everything else is set in uncorrected_bo calculations */ + // sym_index = bonds->select.bond_list[pj].sym_index; + // bo_ji = &(bonds->select.bond_list[ sym_index ].bo_data); + // bo_ij->BO = bo_ji->BO; + // bo_ij->BO_s = bo_ji->BO_s; + // bo_ij->BO_pi = bo_ji->BO_pi; + // bo_ij->BO_pi2 = bo_ji->BO_pi2; + + // workspace->total_bond_order[i] += bo_ij->BO;// now keeps total_BO + // } + } + + } + + // Wait for bo_ij to be updated +#if defined(_OPENMP) +#pragma omp barrier +#endif + // Try to combine the following for-loop back into the for-loop above + /*-------------------------*/ +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for (i = 0; i < system->N; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + start_i = Start_Index(i, bonds); + end_i = End_Index(i, bonds); + + for (pj = start_i; pj < end_i; ++pj) { + j = bonds->select.bond_list[pj].nbr; + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + + if( i < j || workspace->bond_mark[j] > 3) { + // Computed in previous for-loop + } else { + /* We only need to update bond orders from bo_ji + everything else is set in uncorrected_bo calculations */ + sym_index = bonds->select.bond_list[pj].sym_index; + + bo_ij = &( bonds->select.bond_list[pj].bo_data ); + bo_ji = &(bonds->select.bond_list[ sym_index ].bo_data); + bo_ij->BO = bo_ji->BO; + bo_ij->BO_s = bo_ji->BO_s; + bo_ij->BO_pi = bo_ji->BO_pi; + bo_ij->BO_pi2 = bo_ji->BO_pi2; + + workspace->total_bond_order[i] += bo_ij->BO;// now keeps total_BO + } + } + + } + + /*-------------------------*/ + + // Need to wait for total_bond_order to be accumulated. +#if defined(_OPENMP) +#pragma omp barrier +#endif + /* Calculate some helper variables that are used at many places + throughout force calculations */ +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for(j = 0; j < system->N; ++j ) { + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + sbp_j = &(system->reax_param.sbp[ type_j ]); + + workspace->Delta[j] = workspace->total_bond_order[j] - sbp_j->valency; + workspace->Delta_e[j] = workspace->total_bond_order[j] - sbp_j->valency_e; + workspace->Delta_boc[j] = workspace->total_bond_order[j] - + sbp_j->valency_boc; + workspace->Delta_val[j] = workspace->total_bond_order[j] - + sbp_j->valency_val; + + workspace->vlpex[j] = workspace->Delta_e[j] - + 2.0 * (int)(workspace->Delta_e[j]/2.0); + explp1 = exp(-p_lp1 * SQR(2.0 + workspace->vlpex[j])); + workspace->nlp[j] = explp1 - (int)(workspace->Delta_e[j] / 2.0); + workspace->Delta_lp[j] = sbp_j->nlp_opt - workspace->nlp[j]; + workspace->Clp[j] = 2.0 * p_lp1 * explp1 * (2.0 + workspace->vlpex[j]); + workspace->dDelta_lp[j] = workspace->Clp[j]; + + if( sbp_j->mass > 21.0 ) { + workspace->nlp_temp[j] = 0.5 * (sbp_j->valency_e - sbp_j->valency); + workspace->Delta_lp_temp[j] = sbp_j->nlp_opt - workspace->nlp_temp[j]; + workspace->dDelta_lp_temp[j] = 0.; + } + else { + workspace->nlp_temp[j] = workspace->nlp[j]; + workspace->Delta_lp_temp[j] = sbp_j->nlp_opt - workspace->nlp_temp[j]; + workspace->dDelta_lp_temp[j] = workspace->Clp[j]; + } + } + + } // parallel region + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEBOINDEX] += (endTimeBase-startTimeBase); + +#endif +} diff --git a/src/USER-OMP/reaxc_bond_orders_omp.h b/src/USER-OMP/reaxc_bond_orders_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..272309caddc50dd2428017a8980c9eb94fd60858 --- /dev/null +++ b/src/USER-OMP/reaxc_bond_orders_omp.h @@ -0,0 +1,43 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __BOND_ORDERS_OMP_H_ +#define __BOND_ORDERS_OMP_H_ + +#include "reaxc_types.h" +#include "reaxc_bond_orders.h" + +void Add_dBond_to_ForcesOMP( reax_system*, int, int, storage*, reax_list** ); +void Add_dBond_to_Forces_NPTOMP( reax_system *system, int, int, simulation_data*, + storage*, reax_list** ); + +int BOp_OMP(storage*, reax_list*, double, int, int, far_neighbor_data*, + single_body_parameters*, single_body_parameters*, two_body_parameters*, + int, double, double, double, double, double, double, double); + +void BOOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); +#endif diff --git a/src/USER-OMP/reaxc_bonds_omp.cpp b/src/USER-OMP/reaxc_bonds_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dcf788a79c228d8b9dacfba56058262933841a2f --- /dev/null +++ b/src/USER-OMP/reaxc_bonds_omp.cpp @@ -0,0 +1,186 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" + +#include "reaxc_bonds_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_tool_box.h" +#include "reaxc_vector.h" + +#if defined(_OPENMP) +#include <omp.h> +#endif + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void BondsOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control ) +{ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int natoms = system->n; + int nthreads = control->nthreads; + reax_list *bonds = (*lists) + BONDS; + double gp3 = system->reax_param.gp.l[3]; + double gp4 = system->reax_param.gp.l[4]; + double gp7 = system->reax_param.gp.l[7]; + double gp10 = system->reax_param.gp.l[10]; + double gp37 = (int) system->reax_param.gp.l[37]; + double total_Ebond = 0.0; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) reduction(+: total_Ebond) +#endif + { + int i, j, pj; + int start_i, end_i; + int type_i, type_j; + double ebond, ebond_thr=0.0, pow_BOs_be2, exp_be12, CEbo; + double gp3, gp4, gp7, gp10, gp37; + double exphu, exphua1, exphub1, exphuov, hulpov, estriph, estriph_thr=0.0; + double decobdbo, decobdboua, decobdboub; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + bond_order_data *bo_ij; + +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + long reductionOffset = (system->N * tid); + + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, natoms, + system->pair_ptr->eatom, system->pair_ptr->vatom, thr); + +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for (i = 0; i < natoms; ++i) { + start_i = Start_Index(i, bonds); + end_i = End_Index(i, bonds); + + for (pj = start_i; pj < end_i; ++pj) { + j = bonds->select.bond_list[pj].nbr; + + if( system->my_atoms[i].orig_id > system->my_atoms[j].orig_id ) continue; + + if( system->my_atoms[i].orig_id == system->my_atoms[j].orig_id ) { + if (system->my_atoms[j].x[2] < system->my_atoms[i].x[2]) continue; + if (system->my_atoms[j].x[2] == system->my_atoms[i].x[2] && + system->my_atoms[j].x[1] < system->my_atoms[i].x[1]) continue; + if (system->my_atoms[j].x[2] == system->my_atoms[i].x[2] && + system->my_atoms[j].x[1] == system->my_atoms[i].x[1] && + system->my_atoms[j].x[0] < system->my_atoms[i].x[0]) continue; + } + + /* set the pointers */ + type_i = system->my_atoms[i].type; + type_j = system->my_atoms[j].type; + sbp_i = &( system->reax_param.sbp[type_i] ); + sbp_j = &( system->reax_param.sbp[type_j] ); + twbp = &( system->reax_param.tbp[type_i][type_j] ); + bo_ij = &( bonds->select.bond_list[pj].bo_data ); + + /* calculate the constants */ + pow_BOs_be2 = pow( bo_ij->BO_s, twbp->p_be2 ); + exp_be12 = exp( twbp->p_be1 * ( 1.0 - pow_BOs_be2 ) ); + CEbo = -twbp->De_s * exp_be12 * + ( 1.0 - twbp->p_be1 * twbp->p_be2 * pow_BOs_be2 ); + + /* calculate the Bond Energy */ + total_Ebond += ebond = + -twbp->De_s * bo_ij->BO_s * exp_be12 + -twbp->De_p * bo_ij->BO_pi + -twbp->De_pp * bo_ij->BO_pi2; + + /* tally into per-atom energy */ + if (system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, + ebond, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + + /* calculate derivatives of Bond Orders */ + bo_ij->Cdbo += CEbo; + bo_ij->Cdbopi -= (CEbo + twbp->De_p); + bo_ij->Cdbopi2 -= (CEbo + twbp->De_pp); + + /* Stabilisation terminal triple bond */ + if (bo_ij->BO >= 1.00) { + if (gp37 == 2 || + (sbp_i->mass == 12.0000 && sbp_j->mass == 15.9990) || + (sbp_j->mass == 12.0000 && sbp_i->mass == 15.9990)) { + exphu = exp( -gp7 * SQR(bo_ij->BO - 2.50) ); + exphua1 = exp(-gp3 * (workspace->total_bond_order[i]-bo_ij->BO)); + exphub1 = exp(-gp3 * (workspace->total_bond_order[j]-bo_ij->BO)); + exphuov = exp(gp4 * (workspace->Delta[i] + workspace->Delta[j])); + hulpov = 1.0 / (1.0 + 25.0 * exphuov); + + estriph = gp10 * exphu * hulpov * (exphua1 + exphub1); + total_Ebond += estriph; + + decobdbo = gp10 * exphu * hulpov * (exphua1 + exphub1) * + ( gp3 - 2.0 * gp7 * (bo_ij->BO-2.50) ); + decobdboua = -gp10 * exphu * hulpov * + (gp3*exphua1 + 25.0*gp4*exphuov*hulpov*(exphua1+exphub1)); + decobdboub = -gp10 * exphu * hulpov * + (gp3*exphub1 + 25.0*gp4*exphuov*hulpov*(exphua1+exphub1)); + + /* tally into per-atom energy */ + if (system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, + estriph, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + + bo_ij->Cdbo += decobdbo; + workspace->CdDelta[i] += decobdboua; + workspace->CdDeltaReduction[reductionOffset+j] += decobdboub; + } + } + } + } // for(i) + + } // omp + + data->my_en.e_bond += total_Ebond; + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEBONDSINDEX] += (endTimeBase-startTimeBase); +#endif + +} diff --git a/src/USER-OMP/reaxc_bonds_omp.h b/src/USER-OMP/reaxc_bonds_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..8c07fd8957e3e03a4a9a74cc4bfd0568ef81e3b1 --- /dev/null +++ b/src/USER-OMP/reaxc_bonds_omp.h @@ -0,0 +1,35 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __BONDS_OMP_H_ +#define __BONDS_OMP_H_ + +#include "reaxc_types.h" + +void BondsOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +#endif diff --git a/src/USER-OMP/reaxc_forces_omp.cpp b/src/USER-OMP/reaxc_forces_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4e37dac38dfb319b1ca8d85b542713a36f2044cf --- /dev/null +++ b/src/USER-OMP/reaxc_forces_omp.cpp @@ -0,0 +1,649 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include "thr_data.h" + +#include "reaxc_forces_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_bonds_omp.h" +#include "reaxc_hydrogen_bonds_omp.h" +#include "reaxc_io_tools.h" +#include "reaxc_list.h" +#include "reaxc_lookup.h" +#include "reaxc_multi_body_omp.h" +#include "reaxc_nonbonded_omp.h" +#include "reaxc_tool_box.h" +#include "reaxc_torsion_angles_omp.h" +#include "reaxc_valence_angles_omp.h" +#include "reaxc_vector.h" + +#if defined(_OPENMP) +#include <omp.h> +#endif + +using namespace LAMMPS_NS; + +// Functions defined in reaxc_forces.cpp +extern interaction_function Interaction_Functions[]; +extern double Compute_H(double, double, double*); +extern double Compute_tabH(double, int, int); +extern void Dummy_Interaction(reax_system*, control_params*, simulation_data*, storage*, reax_list**, output_controls*); + +/* ---------------------------------------------------------------------- */ + +void Init_Force_FunctionsOMP( control_params *control ) +{ + Interaction_Functions[0] = BOOMP; + Interaction_Functions[1] = BondsOMP; //Dummy_Interaction; + Interaction_Functions[2] = Atom_EnergyOMP; //Dummy_Interaction; + Interaction_Functions[3] = Valence_AnglesOMP; //Dummy_Interaction; + Interaction_Functions[4] = Torsion_AnglesOMP; //Dummy_Interaction; + if( control->hbond_cut > 0 ) + Interaction_Functions[5] = Hydrogen_BondsOMP; + else Interaction_Functions[5] = Dummy_Interaction; + Interaction_Functions[6] = Dummy_Interaction; //empty + Interaction_Functions[7] = Dummy_Interaction; //empty + Interaction_Functions[8] = Dummy_Interaction; //empty + Interaction_Functions[9] = Dummy_Interaction; //empty +} + +/* ---------------------------------------------------------------------- */ + +// Only difference with MPI-only version is inclusion of OMP_TIMING statements +void Compute_Bonded_ForcesOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + MPI_Comm comm ) +{ + int i; + +#ifdef OMP_TIMING + double startTimeBase, endTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + /* Implement all force calls as function pointers */ + for( i = 0; i < NUM_INTRS; i++ ) { + (Interaction_Functions[i])( system, control, data, workspace, + lists, out_control ); + } + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEBFINDEX] += (endTimeBase-startTimeBase); +#endif + +} + +// Only difference with MPI-only version is inclusion of OMP_TIMING statements +void Compute_NonBonded_ForcesOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + MPI_Comm comm ) +{ + /* van der Waals and Coulomb interactions */ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + if( control->tabulate == 0 ) + vdW_Coulomb_Energy_OMP( system, control, data, workspace, + lists, out_control ); + else + Tabulated_vdW_Coulomb_Energy_OMP( system, control, data, workspace, + lists, out_control ); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTENBFINDEX] += (endTimeBase-startTimeBase); +#endif +} + +/* ---------------------------------------------------------------------- */ + +/* this version of Compute_Total_Force computes forces from + coefficients accumulated by all interaction functions. + Saves enormous time & space! */ +void Compute_Total_ForceOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, mpi_datatypes *mpi_data ) +{ +#ifdef OMP_TIMING + double startTimeBase,endTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int natoms = system->N; + int nthreads = control->nthreads; + long totalReductionSize = system->N * nthreads; + reax_list *bonds = (*lists) + BONDS; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) //default(none) +#endif + { + int i, j, k, pj, pk, start_j, end_j; +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + bond_order_data *bo_jk; + + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(0, 1, natoms, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for (i = 0; i < system->N; ++i) { + for (j = 0; j < nthreads; ++j) + workspace->CdDelta[i] += workspace->CdDeltaReduction[system->N*j+i]; + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (j = 0; j < system->N; ++j) { + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + for (pk = start_j; pk < end_j; ++pk) { + bo_jk = &( bonds->select.bond_list[pk].bo_data ); + for (k = 0; k < nthreads; ++k) + bo_jk->Cdbo += bo_jk->CdboReduction[k]; + } + } + +// #pragma omp for schedule(guided) //(dynamic,50) +// for (i = 0; i < system->N; ++i) +// for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj) +// if (i < bonds->select.bond_list[pj].nbr) { +// if (control->virial == 0) +// Add_dBond_to_ForcesOMP( system, i, pj, workspace, lists ); +// else +// Add_dBond_to_Forces_NPTOMP(system, i, pj, data, workspace, lists ); +// } + + if(control->virial == 0) { + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (i = 0; i < system->N; ++i) { + const int startj = Start_Index(i, bonds); + const int endj = End_Index(i, bonds); + for (pj = startj; pj < endj; ++pj) + if (i < bonds->select.bond_list[pj].nbr) + Add_dBond_to_ForcesOMP( system, i, pj, workspace, lists ); + } + + } else { + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (i = 0; i < system->N; ++i) { + const int startj = Start_Index(i, bonds); + const int endj = End_Index(i, bonds); + for (pj = startj; pj < endj; ++pj) + if (i < bonds->select.bond_list[pj].nbr) + Add_dBond_to_Forces_NPTOMP(system, i, pj, data, workspace, lists ); + } + + } // if(virial == 0) + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, 0, 1, thr); + +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for (i = 0; i < system->N; ++i) { + for (j = 0; j < nthreads; ++j) + rvec_Add( workspace->f[i], workspace->forceReduction[system->N*j+i] ); + } + + +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for (i = 0; i < totalReductionSize; i++) { + workspace->forceReduction[i][0] = 0; + workspace->forceReduction[i][1] = 0; + workspace->forceReduction[i][2] = 0; + workspace->CdDeltaReduction[i] = 0; + } + } // parallel region + + if (control->virial) + for (int i=0; i < nthreads; ++i) { + rvec_Add(data->my_ext_press, workspace->my_ext_pressReduction[i]); + workspace->my_ext_pressReduction[i][0] = 0; + workspace->my_ext_pressReduction[i][1] = 0; + workspace->my_ext_pressReduction[i][2] = 0; + } + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTETFINDEX] += (endTimeBase-startTimeBase); +#endif +} + +/* ---------------------------------------------------------------------- */ + +void Validate_ListsOMP( reax_system *system, storage *workspace, reax_list **lists, + int step, int n, int N, int numH, MPI_Comm comm ) +{ + int i, comp, Hindex; + reax_list *bonds, *hbonds; + reallocate_data *realloc = &(workspace->realloc); + double saferzone = system->saferzone; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) private(i, comp, Hindex) +#endif + { + + /* bond list */ + if( N > 0 ) { + bonds = *lists + BONDS; + +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for( i = 0; i < N; ++i ) { + system->my_atoms[i].num_bonds = MAX(Num_Entries(i,bonds)*2, MIN_BONDS); + + if( i < N-1 ) + comp = Start_Index(i+1, bonds); + else comp = bonds->num_intrs; + + if( End_Index(i, bonds) > comp ) { + fprintf( stderr, "step%d-bondchk failed: i=%d end(i)=%d str(i+1)=%d\n", + step, i, End_Index(i,bonds), comp ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + } + } + + + /* hbonds list */ + if( numH > 0 ) { + hbonds = *lists + HBONDS; + +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for( i = 0; i < n; ++i ) { + Hindex = system->my_atoms[i].Hindex; + if( Hindex > -1 ) { + system->my_atoms[i].num_hbonds = + (int)(MAX( Num_Entries(Hindex, hbonds)*saferzone, MIN_HBONDS )); + + if( Hindex < numH-1 ) + comp = Start_Index(Hindex+1, hbonds); + else comp = hbonds->num_intrs; + + if( End_Index(Hindex, hbonds) > comp ) { + fprintf(stderr,"step%d-hbondchk failed: H=%d end(H)=%d str(H+1)=%d\n", + step, Hindex, End_Index(Hindex,hbonds), comp ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + } + } + } + + } // omp parallel +} + + +void Init_Forces_noQEq_OMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + MPI_Comm comm ) { +#ifdef OMP_TIMING + double startTimeBase, endTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int i, j, pi, pj; + int start_i, end_i, start_j, end_j; + int type_i, type_j; + int ihb, jhb, ihb_top, jhb_top; + int local, flag; + double r_ij, cutoff; + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + far_neighbor_data *nbr_pj; + reax_atom *atom_i, *atom_j; + bond_data *ibond, *jbond; + reax_list *far_nbrs = *lists + FAR_NBRS; + reax_list *bonds = *lists + BONDS; + reax_list *hbonds = *lists + HBONDS; + int num_bonds = 0; + int num_hbonds = 0; + int btop_i = 0; + int btop_j = 0; + int renbr = (data->step-data->prev_steps) % control->reneighbor == 0; + + // We will use CdDeltaReduction as a temporary (double) buffer to accumulate total_bond_order + // This is safe because CdDeltaReduction is currently zeroed and its accumulation doesn't start until BondsOMP() + double * tmp_bond_order = workspace->CdDeltaReduction; + + // We do the same with forceReduction as a temporary (rvec) buffer to accumulate dDeltap_self + // This is safe because forceReduction is currently zeroed and its accumulation does start until Hydrogen_BondsOMP() + rvec * tmp_ddelta = workspace->forceReduction; + + /* uncorrected bond orders */ + cutoff = control->bond_cut; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) \ + private(i, atom_i, type_i, pi, start_i, end_i, sbp_i, btop_i, ibond, ihb, ihb_top, \ + j, atom_j, type_j, pj, start_j, end_j, sbp_j, nbr_pj, jbond, jhb, twbp) +#endif + { + + int nthreads = control->nthreads; +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + long reductionOffset = system->N * tid; + long totalReductionSize = system->N * nthreads; + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) reduction(+ : num_bonds) +#endif + for (i = 0; i < system->N; ++i) { + atom_i = &(system->my_atoms[i]); + type_i = atom_i->type; + sbp_i = &(system->reax_param.sbp[type_i]); + + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); + + for( pj = start_i; pj < end_i; ++pj ) { + nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); + if (nbr_pj->d <= cutoff) { + j = nbr_pj->nbr; + atom_j = &(system->my_atoms[j]); + type_j = atom_j->type; + sbp_j = &(system->reax_param.sbp[type_j]); + twbp = &(system->reax_param.tbp[type_i][type_j]); + +// #pragma omp critical +// { +// btop_i = End_Index(i, bonds); +// if( BOp(workspace, bonds, control->bo_cut, i, btop_i, nbr_pj, sbp_i, sbp_j, twbp) ) { +// num_bonds++; +// btop_i++; +// Set_End_Index(i, btop_i, bonds); +// } + +// } + + // Trying to minimize time spent in critical section by moving initial part of BOp() + // outside of critical section. + + // Start top portion of BOp() + int jj = nbr_pj->nbr; + double C12, C34, C56; + double BO, BO_s, BO_pi, BO_pi2; + double bo_cut = control->bo_cut; + + if( sbp_i->r_s > 0.0 && sbp_j->r_s > 0.0 ) { + C12 = twbp->p_bo1 * pow( nbr_pj->d / twbp->r_s, twbp->p_bo2 ); + BO_s = (1.0 + bo_cut) * exp( C12 ); + } + else BO_s = C12 = 0.0; + + if( sbp_i->r_pi > 0.0 && sbp_j->r_pi > 0.0 ) { + C34 = twbp->p_bo3 * pow( nbr_pj->d / twbp->r_p, twbp->p_bo4 ); + BO_pi = exp( C34 ); + } + else BO_pi = C34 = 0.0; + + if( sbp_i->r_pi_pi > 0.0 && sbp_j->r_pi_pi > 0.0 ) { + C56 = twbp->p_bo5 * pow( nbr_pj->d / twbp->r_pp, twbp->p_bo6 ); + BO_pi2= exp( C56 ); + } + else BO_pi2 = C56 = 0.0; + + /* Initially BO values are the uncorrected ones, page 1 */ + BO = BO_s + BO_pi + BO_pi2; + // End top portion of BOp() + + if(BO >= bo_cut) { + int btop_j; + + // Update indices in critical section +#if defined(_OPENMP) +#pragma omp critical +#endif + { + btop_i = End_Index( i, bonds ); + btop_j = End_Index( j, bonds ); + Set_End_Index( j, btop_j+1, bonds ); + Set_End_Index( i, btop_i+1, bonds ); + } // omp critical + + // Finish remaining BOp() work + BOp_OMP(workspace, bonds, bo_cut, + i , btop_i, nbr_pj, sbp_i, sbp_j, twbp, btop_j, + C12, C34, C56, BO, BO_s, BO_pi, BO_pi2); + + bond_data * ibond = &(bonds->select.bond_list[btop_i]); + bond_order_data * bo_ij = &(ibond->bo_data); + + bond_data * jbond = &(bonds->select.bond_list[btop_j]); + bond_order_data * bo_ji = &(jbond->bo_data); + + workspace->total_bond_order[i] += bo_ij->BO; + tmp_bond_order[reductionOffset + j] += bo_ji->BO; + + rvec_Add(workspace->dDeltap_self[i], bo_ij->dBOp); + rvec_Add(tmp_ddelta[reductionOffset + j], bo_ji->dBOp); + + btop_i++; + num_bonds++; + } // if(BO>=bo_cut) + + } // if(cutoff) + + } // for(pj) + } // for(i) + + // Need to wait for all indices and tmp arrays accumulated. +#if defined(_OPENMP) +#pragma omp barrier +#endif + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for(i=0; i<system->N; i++) + for(int t=0; t<nthreads; t++) { + const int indx = t*system->N + i; + workspace->dDeltap_self[i][0] += tmp_ddelta[indx][0]; + workspace->dDeltap_self[i][1] += tmp_ddelta[indx][1]; + workspace->dDeltap_self[i][2] += tmp_ddelta[indx][2]; + workspace->total_bond_order[i] += tmp_bond_order[indx]; + } + + /* hydrogen bond list */ + if (control->hbond_cut > 0) { + cutoff = control->hbond_cut; + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) reduction(+ : num_hbonds) +#endif + for (i = 0; i < system->n; ++i) { + atom_i = &(system->my_atoms[i]); + type_i = atom_i->type; + sbp_i = &(system->reax_param.sbp[type_i]); + ihb = sbp_i->p_hbond; + +#if defined(_OPENMP) +#pragma omp critical +#endif + { + + if (ihb == 1 || ihb == 2) { + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); + + for (pj = start_i; pj < end_i; ++pj) { + nbr_pj = &( far_nbrs->select.far_nbr_list[pj] ); + j = nbr_pj->nbr; + atom_j = &(system->my_atoms[j]); + type_j = atom_j->type; + if(type_j < 0) continue; + sbp_j = &(system->reax_param.sbp[type_j]); + jhb = sbp_j->p_hbond; + + if (nbr_pj->d <= control->hbond_cut) { + int iflag = 0; + int jflag = 0; + + if(ihb==1 && jhb==2) iflag = 1; + else if(j<system->n && ihb == 2 && jhb == 1) jflag = 1; + + if(iflag || jflag) { + if(iflag) { + ihb_top = End_Index(atom_i->Hindex, hbonds); + Set_End_Index(atom_i->Hindex, ihb_top+1, hbonds); + } else if(jflag) { + jhb_top = End_Index(atom_j->Hindex, hbonds); + Set_End_Index(atom_j->Hindex, jhb_top+1, hbonds); + } + + if(iflag) { + hbonds->select.hbond_list[ihb_top].nbr = j; + hbonds->select.hbond_list[ihb_top].scl = 1; + hbonds->select.hbond_list[ihb_top].ptr = nbr_pj; + } else if(jflag) { + hbonds->select.hbond_list[jhb_top].nbr = i; + hbonds->select.hbond_list[jhb_top].scl = -1; + hbonds->select.hbond_list[jhb_top].ptr = nbr_pj; + } + + num_hbonds++; + } // if(iflag || jflag) + + } + } + } + + } // omp critical + } + + } // if(control->hbond > 0) + + // Zero buffers for others to use as intended. +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for(i=0; i<totalReductionSize; i++) { + tmp_ddelta[i][0] = 0.0; + tmp_ddelta[i][1] = 0.0; + tmp_ddelta[i][2] = 0.0; + tmp_bond_order[i] = 0.0; + } + + } // omp + + workspace->realloc.num_bonds = num_bonds; + workspace->realloc.num_hbonds = num_hbonds; + + Validate_ListsOMP( system, workspace, lists, data->step, + system->n, system->N, system->numH, comm ); + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEIFINDEX] += (endTimeBase-startTimeBase); +#endif +} + +/* ---------------------------------------------------------------------- */ + +void Compute_ForcesOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data ) +{ + int qeq_flag; + MPI_Comm comm = mpi_data->world; + + // Init Forces +#if defined(LOG_PERFORMANCE) + double t_start = 0; + if( system->my_rank == MASTER_NODE ) + t_start = Get_Time( ); +#endif + + Init_Forces_noQEq_OMP( system, control, data, workspace, + lists, out_control, comm ); + +#if defined(LOG_PERFORMANCE) + //MPI_Barrier( comm ); + if( system->my_rank == MASTER_NODE ) + Update_Timing_Info( &t_start, &(data->timing.init_forces) ); +#endif + + // Bonded Interactions + Compute_Bonded_ForcesOMP( system, control, data, workspace, + lists, out_control, mpi_data->world ); + +#if defined(LOG_PERFORMANCE) + if( system->my_rank == MASTER_NODE ) + Update_Timing_Info( &t_start, &(data->timing.bonded) ); +#endif + + // Nonbonded Interactions + Compute_NonBonded_ForcesOMP( system, control, data, workspace, + lists, out_control, mpi_data->world ); + +#if defined(LOG_PERFORMANCE) + if( system->my_rank == MASTER_NODE ) + Update_Timing_Info( &t_start, &(data->timing.nonb) ); +#endif + + // Total Force + Compute_Total_ForceOMP( system, control, data, workspace, lists, mpi_data ); + +#if defined(LOG_PERFORMANCE) + if( system->my_rank == MASTER_NODE ) + Update_Timing_Info( &t_start, &(data->timing.bonded) ); +#endif +} diff --git a/src/USER-OMP/reaxc_forces_omp.h b/src/USER-OMP/reaxc_forces_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..f4941fc7fa5b6e79fe958c68814d68ab8b643f2d --- /dev/null +++ b/src/USER-OMP/reaxc_forces_omp.h @@ -0,0 +1,36 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __FORCES_OMP_H_ +#define __FORCES_OMP_H_ + +#include "reaxc_types.h" +#include "reaxc_defs.h" + +void Init_Force_FunctionsOMP( control_params* ); +void Compute_ForcesOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls*, mpi_datatypes* ); +#endif diff --git a/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp b/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c446151150cafef4e5831666293bf964f2c5c468 --- /dev/null +++ b/src/USER-OMP/reaxc_hydrogen_bonds_omp.cpp @@ -0,0 +1,252 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" + +#include "reaxc_hydrogen_bonds_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_valence_angles.h" // To access Calculate_Theta() +#include "reaxc_valence_angles_omp.h" // To access Calculate_dCos_ThetaOMP() +#include "reaxc_vector.h" + +#if defined(_OPENMP) +#include <omp.h> +#endif + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void Hydrogen_BondsOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) +{ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + const int nthreads = control->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) //default(none) +#endif + { + int i, j, k, pi, pk; + int type_i, type_j, type_k; + int start_j, end_j, hb_start_j, hb_end_j; + int hblist[MAX_BONDS]; + int itr, top; + int num_hb_intrs = 0; + ivec rel_jk; + double r_jk, theta, cos_theta, sin_xhz4, cos_xhz1, sin_theta2; + double e_hb, e_hb_thr = 0.0, exp_hb2, exp_hb3, CEhb1, CEhb2, CEhb3; + rvec dcos_theta_di, dcos_theta_dj, dcos_theta_dk; + rvec dvec_jk, force, ext_press; + hbond_parameters *hbp; + bond_order_data *bo_ij; + bond_data *pbond_ij; + far_neighbor_data *nbr_jk; + reax_list *bonds, *hbonds; + bond_data *bond_list; + hbond_data *hbond_list; + + // tally variables + double fi_tmp[3], fk_tmp[3], delij[3], delkj[3]; + + bonds = (*lists) + BONDS; + bond_list = bonds->select.bond_list; + hbonds = (*lists) + HBONDS; + hbond_list = hbonds->select.hbond_list; + + int natoms = system->n; +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + const int idelta = 1 + natoms/nthreads; + int ifrom = tid*idelta; + int ito = ((ifrom + idelta) > natoms) ? natoms : ifrom + idelta; + + long reductionOffset = (system->N * tid); + + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr); + + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + natoms, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + + /* loops below discover the Hydrogen bonds between i-j-k triplets. + here j is H atom and there has to be some bond between i and j. + Hydrogen bond is between j and k. + so in this function i->X, j->H, k->Z when we map + variables onto the ones in the handout.*/ + // for( j = 0; j < system->n; ++j ) + for( j = ifrom; j < ito; ++j ) { + /* j has to be of type H */ + if( system->reax_param.sbp[system->my_atoms[j].type].p_hbond == 1 ) { + /*set j's variables */ + type_j = system->my_atoms[j].type; + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + hb_start_j = Start_Index( system->my_atoms[j].Hindex, hbonds ); + hb_end_j = End_Index( system->my_atoms[j].Hindex, hbonds ); + if(type_j < 0) continue; + + top = 0; + for( pi = start_j; pi < end_j; ++pi ) { + pbond_ij = &( bond_list[pi] ); + i = pbond_ij->nbr; + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + bo_ij = &(pbond_ij->bo_data); + + if( system->reax_param.sbp[type_i].p_hbond == 2 && + bo_ij->BO >= HB_THRESHOLD ) + hblist[top++] = pi; + } + + for( pk = hb_start_j; pk < hb_end_j; ++pk ) { + /* set k's varibles */ + k = hbond_list[pk].nbr; + type_k = system->my_atoms[k].type; + if(type_k < 0) continue; + nbr_jk = hbond_list[pk].ptr; + r_jk = nbr_jk->d; + rvec_Scale( dvec_jk, hbond_list[pk].scl, nbr_jk->dvec ); + + for( itr = 0; itr < top; ++itr ) { + pi = hblist[itr]; + pbond_ij = &( bonds->select.bond_list[pi] ); + i = pbond_ij->nbr; + + if( system->my_atoms[i].orig_id != system->my_atoms[k].orig_id ) { + bo_ij = &(pbond_ij->bo_data); + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + hbp = &(system->reax_param.hbp[ type_i ][ type_j ][ type_k ]); + ++num_hb_intrs; + + Calculate_Theta( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, + &theta, &cos_theta ); + /* the derivative of cos(theta) */ + Calculate_dCos_ThetaOMP( pbond_ij->dvec, pbond_ij->d, dvec_jk, r_jk, + &dcos_theta_di, &dcos_theta_dj, + &dcos_theta_dk ); + + /* hydrogen bond energy*/ + sin_theta2 = sin( theta/2.0 ); + sin_xhz4 = SQR(sin_theta2); + sin_xhz4 *= sin_xhz4; + cos_xhz1 = ( 1.0 - cos_theta ); + exp_hb2 = exp( -hbp->p_hb2 * bo_ij->BO ); + exp_hb3 = exp( -hbp->p_hb3 * ( hbp->r0_hb / r_jk + + r_jk / hbp->r0_hb - 2.0 ) ); + + e_hb_thr += e_hb = hbp->p_hb1 * (1.0 - exp_hb2) * exp_hb3 * sin_xhz4; + + CEhb1 = hbp->p_hb1 * hbp->p_hb2 * exp_hb2 * exp_hb3 * sin_xhz4; + CEhb2 = -hbp->p_hb1/2.0 * (1.0 - exp_hb2) * exp_hb3 * cos_xhz1; + CEhb3 = -hbp->p_hb3 * + (-hbp->r0_hb / SQR(r_jk) + 1.0 / hbp->r0_hb) * e_hb; + + /* hydrogen bond forces */ + bo_ij->Cdbo += CEhb1; // dbo term + + if( control->virial == 0 ) { + // dcos terms + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+i], +CEhb2, dcos_theta_di ); + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j], +CEhb2, dcos_theta_dj ); + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+k], +CEhb2, dcos_theta_dk ); + // dr terms + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j], -CEhb3/r_jk, dvec_jk ); + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+k], +CEhb3/r_jk, dvec_jk ); + } + else { + /* for pressure coupling, terms that are not related to bond order + derivatives are added directly into pressure vector/tensor */ + rvec_Scale( force, +CEhb2, dcos_theta_di ); // dcos terms + rvec_Add(workspace->forceReduction[reductionOffset+i], force ); + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_ScaledAdd( workspace->my_ext_pressReduction[tid],1.0, ext_press ); + + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j], +CEhb2, dcos_theta_dj ); + + ivec_Scale( rel_jk, hbond_list[pk].scl, nbr_jk->rel_box ); + rvec_Scale( force, +CEhb2, dcos_theta_dk ); + rvec_Add(workspace->forceReduction[reductionOffset+k], force ); + rvec_iMultiply( ext_press, rel_jk, force ); + rvec_ScaledAdd( workspace->my_ext_pressReduction[tid],1.0, ext_press ); + // dr terms + rvec_ScaledAdd(workspace->forceReduction[reductionOffset+j],-CEhb3/r_jk, dvec_jk ); + + rvec_Scale( force, CEhb3/r_jk, dvec_jk ); + rvec_Add(workspace->forceReduction[reductionOffset+k], force ); + rvec_iMultiply( ext_press, rel_jk, force ); + rvec_ScaledAdd( workspace->my_ext_pressReduction[tid],1.0, ext_press ); + } + + /* tally into per-atom virials */ + if (system->pair_ptr->vflag_atom || system->pair_ptr->evflag) { + rvec_ScaledSum( delij, 1., system->my_atoms[j].x, + -1., system->my_atoms[i].x ); + rvec_ScaledSum( delkj, 1., system->my_atoms[j].x, + -1., system->my_atoms[k].x ); + + rvec_Scale(fi_tmp, CEhb2, dcos_theta_di); + rvec_Scale(fk_tmp, CEhb2, dcos_theta_dk); + rvec_ScaledAdd(fk_tmp, CEhb3/r_jk, dvec_jk); + + pair_reax_ptr->ev_tally3_thr_proxy(system->pair_ptr,i,j,k,e_hb,0.0,fi_tmp,fk_tmp,delij,delkj,thr); + } + } + } + } + + } + } +#if defined(_OPENMP) +#pragma omp critical +#endif + { + data->my_en.e_hb += e_hb_thr; + } + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); +} + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEHBONDSINDEX] += (endTimeBase-startTimeBase); +#endif +} diff --git a/src/USER-OMP/reaxc_hydrogen_bonds_omp.h b/src/USER-OMP/reaxc_hydrogen_bonds_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..b4a2d78dbb22fe49317a73e5594d2531d28c3837 --- /dev/null +++ b/src/USER-OMP/reaxc_hydrogen_bonds_omp.h @@ -0,0 +1,35 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __HBONDS_OMP_H_ +#define __HBONDS_OMP_H_ + +#include "reaxc_types.h" + +void Hydrogen_BondsOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +#endif diff --git a/src/USER-OMP/reaxc_init_md_omp.cpp b/src/USER-OMP/reaxc_init_md_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6cce08a041d19f95132644ff87612b1715838342 --- /dev/null +++ b/src/USER-OMP/reaxc_init_md_omp.cpp @@ -0,0 +1,186 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include "reaxc_init_md_omp.h" +#include "reaxc_allocate.h" +#include "reaxc_forces.h" +#include "reaxc_forces_omp.h" +#include "reaxc_io_tools.h" +#include "reaxc_list.h" +#include "reaxc_lookup.h" +#include "reaxc_reset_tools.h" +#include "reaxc_system_props.h" +#include "reaxc_tool_box.h" +#include "reaxc_vector.h" + +// Functions definedd in reaxc_init_md.cpp +extern int Init_MPI_Datatypes(reax_system*, storage*, mpi_datatypes*, MPI_Comm, char*); +extern int Init_System(reax_system*, control_params*, char*); +extern int Init_Simulation_Data(reax_system*, control_params*, simulation_data*, char*); +extern int Init_Workspace(reax_system*, control_params*, storage*, MPI_Comm, char*); + +/* ---------------------------------------------------------------------- */ + +int Init_ListsOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + mpi_datatypes *mpi_data, char *msg ) +{ + int i, total_hbonds, total_bonds, bond_cap, num_3body, cap_3body, Htop; + int *hb_top, *bond_top; + MPI_Comm comm; + + int mincap = system->mincap; + double safezone = system->safezone; + double saferzone = system->saferzone; + + comm = mpi_data->world; + bond_top = (int*) calloc( system->total_cap, sizeof(int) ); + hb_top = (int*) calloc( system->local_cap, sizeof(int) ); + Estimate_Storages( system, control, lists, + &Htop, hb_top, bond_top, &num_3body, comm ); + + if( control->hbond_cut > 0 ) { + /* init H indexes */ + total_hbonds = 0; + for( i = 0; i < system->n; ++i ) { + system->my_atoms[i].num_hbonds = hb_top[i]; + total_hbonds += hb_top[i]; + } + total_hbonds = (int)(MAX( total_hbonds*saferzone, mincap*MIN_HBONDS )); + + if( !Make_List( system->Hcap, total_hbonds, TYP_HBOND, + *lists+HBONDS, comm ) ) { + fprintf( stderr, "not enough space for hbonds list. terminating!\n" ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + } + + total_bonds = 0; + for( i = 0; i < system->N; ++i ) { + system->my_atoms[i].num_bonds = bond_top[i]; + total_bonds += bond_top[i]; + } + bond_cap = (int)(MAX( total_bonds*safezone, mincap*MIN_BONDS )); + + if( !Make_List( system->total_cap, bond_cap, TYP_BOND, + *lists+BONDS, comm ) ) { + fprintf( stderr, "not enough space for bonds list. terminating!\n" ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + + int nthreads = control->nthreads; + reax_list *bonds = (*lists)+BONDS; + + for (i = 0; i < bonds->num_intrs; ++i) + bonds->select.bond_list[i].bo_data.CdboReduction = + (double*) smalloc(sizeof(double)*nthreads, "CdboReduction", comm); + + /* 3bodies list */ + cap_3body = (int)(MAX( num_3body*safezone, MIN_3BODIES )); + if( !Make_List( bond_cap, cap_3body, TYP_THREE_BODY, + *lists+THREE_BODIES, comm ) ){ + + fprintf( stderr, "Problem in initializing angles list. Terminating!\n" ); + MPI_Abort( comm, INSUFFICIENT_MEMORY ); + } + + free( hb_top ); + free( bond_top ); + + return SUCCESS; +} + +/* ---------------------------------------------------------------------- */ + +// The only difference with the MPI-only function is calls to Init_ListsOMP and Init_Force_FunctionsOMP(). +void InitializeOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control, + mpi_datatypes *mpi_data, MPI_Comm comm ) +{ + char msg[MAX_STR]; + + + if( Init_MPI_Datatypes(system, workspace, mpi_data, comm, msg) == FAILURE ) { + fprintf( stderr, "p%d: init_mpi_datatypes: could not create datatypes\n", + system->my_rank ); + fprintf( stderr, "p%d: mpi_data couldn't be initialized! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( Init_System(system, control, msg) == FAILURE ){ + fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); + fprintf( stderr, "p%d: system could not be initialized! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( Init_Simulation_Data( system, control, data, msg ) == FAILURE ) { + fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); + fprintf( stderr, "p%d: sim_data couldn't be initialized! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( Init_Workspace( system, control, workspace, mpi_data->world, msg ) == + FAILURE ) { + fprintf( stderr, "p%d:init_workspace: not enough memory\n", + system->my_rank ); + fprintf( stderr, "p%d:workspace couldn't be initialized! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( Init_ListsOMP( system, control, data, workspace, lists, mpi_data, msg ) == + FAILURE ) { + fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); + fprintf( stderr, "p%d: system could not be initialized! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( Init_Output_Files(system,control,out_control,mpi_data,msg)== FAILURE) { + fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); + fprintf( stderr, "p%d: could not open output files! terminating...\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + + if( control->tabulate ) { + if( Init_Lookup_Tables( system, control, workspace, mpi_data, msg ) == FAILURE ) { + fprintf( stderr, "p%d: %s\n", system->my_rank, msg ); + fprintf( stderr, "p%d: couldn't create lookup table! terminating.\n", + system->my_rank ); + MPI_Abort( mpi_data->world, CANNOT_INITIALIZE ); + } + } + + + Init_Force_FunctionsOMP( control ); +} + diff --git a/src/USER-OMP/reaxc_init_md_omp.h b/src/USER-OMP/reaxc_init_md_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..bb4e9c7061bb7b2dd048656408c54180c04dc341 --- /dev/null +++ b/src/USER-OMP/reaxc_init_md_omp.h @@ -0,0 +1,34 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __INIT_MD_OMP_H_ +#define __INIT_MD_OMP_H_ + +#include "reaxc_types.h" + +void InitializeOMP( reax_system*, control_params*, simulation_data*, storage*, + reax_list**, output_controls*, mpi_datatypes*, MPI_Comm ); +#endif diff --git a/src/USER-OMP/reaxc_multi_body_omp.cpp b/src/USER-OMP/reaxc_multi_body_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..acbe4ec268e74f7af165c082ad4c4299cccad96f --- /dev/null +++ b/src/USER-OMP/reaxc_multi_body_omp.cpp @@ -0,0 +1,297 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include "thr_data.h" + +#include "reaxc_multi_body_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_vector.h" + +#if defined(_OPENMP) +#include <omp.h> +#endif + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void Atom_EnergyOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, reax_list **lists, + output_controls *out_control ) +{ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + /* Initialize parameters */ + double p_lp1 = system->reax_param.gp.l[15]; + double p_lp3 = system->reax_param.gp.l[5]; + double p_ovun3 = system->reax_param.gp.l[32]; + double p_ovun4 = system->reax_param.gp.l[31]; + double p_ovun6 = system->reax_param.gp.l[6]; + double p_ovun7 = system->reax_param.gp.l[8]; + double p_ovun8 = system->reax_param.gp.l[9]; + + int natoms = system->n; + int nthreads = control->nthreads; + reax_list *bonds = (*lists) + BONDS; + + double total_Elp = 0.0; + double total_Eun = 0.0; + double total_Eov = 0.0; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) reduction(+:total_Elp, total_Eun, total_Eov) +#endif +{ + int i, j, pj, type_i, type_j; + double Delta_lpcorr, dfvl; + double e_lp, expvd2, inv_expvd2, dElp, CElp, DlpVi; + double e_lph, Di, vov3, deahu2dbo, deahu2dsbo; + double e_ov, CEover1, CEover2, CEover3, CEover4; + double exp_ovun1, exp_ovun2, sum_ovun1, sum_ovun2; + double exp_ovun2n, exp_ovun6, exp_ovun8; + double inv_exp_ovun1, inv_exp_ovun2, inv_exp_ovun2n, inv_exp_ovun8; + double e_un, CEunder1, CEunder2, CEunder3, CEunder4; + double eng_tmp, f_tmp; + double p_lp2, p_ovun2, p_ovun5; + int numbonds; + + single_body_parameters *sbp_i, *sbp_j; + two_body_parameters *twbp; + bond_data *pbond; + bond_order_data *bo_ij; + +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + + long reductionOffset = (system->N * tid); + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, natoms, + system->pair_ptr->eatom, system->pair_ptr->vatom, thr); + +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for ( i = 0; i < system->n; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + sbp_i = &(system->reax_param.sbp[ type_i ]); + + /* lone-pair Energy */ + p_lp2 = sbp_i->p_lp2; + expvd2 = exp( -75 * workspace->Delta_lp[i] ); + inv_expvd2 = 1. / (1. + expvd2 ); + + numbonds = 0; + e_lp = 0.0; + for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) + numbonds ++; + + /* calculate the energy */ + if(numbonds > 0) + total_Elp += e_lp = + p_lp2 * workspace->Delta_lp[i] * inv_expvd2; + + dElp = p_lp2 * inv_expvd2 + + 75 * p_lp2 * workspace->Delta_lp[i] * expvd2 * SQR(inv_expvd2); + CElp = dElp * workspace->dDelta_lp[i]; + + if(numbonds > 0) workspace->CdDelta[i] += CElp; // lp - 1st term + + /* tally into per-atom energy */ + if( system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, i, system->n, 1, + e_lp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + + /* correction for C2 */ + if( p_lp3 > 0.001 && !strcmp(system->reax_param.sbp[type_i].name, "C") ) + for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) { + j = bonds->select.bond_list[pj].nbr; + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + + if( !strcmp( system->reax_param.sbp[type_j].name, "C" ) ) { + twbp = &( system->reax_param.tbp[type_i][type_j]); + bo_ij = &( bonds->select.bond_list[pj].bo_data ); + Di = workspace->Delta[i]; + vov3 = bo_ij->BO - Di - 0.040*pow(Di, 4.); + + if( vov3 > 3. ) { + total_Elp += e_lph = p_lp3 * SQR(vov3-3.0); + + deahu2dbo = 2.*p_lp3*(vov3 - 3.); + deahu2dsbo = 2.*p_lp3*(vov3 - 3.)*(-1. - 0.16*pow(Di, 3.)); + + bo_ij->Cdbo += deahu2dbo; + workspace->CdDelta[i] += deahu2dsbo; + + /* tally into per-atom energy */ + if( system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, system->n, 1, + e_lph, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + } + } + } + } +#if defined(_OPENMP) +#pragma omp barrier +#pragma omp for schedule(guided) +#endif + for (i = 0; i < system->n; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + sbp_i = &(system->reax_param.sbp[ type_i ]); + + /* over-coordination energy */ + if( sbp_i->mass > 21.0 ) + dfvl = 0.0; + else dfvl = 1.0; // only for 1st-row elements + + p_ovun2 = sbp_i->p_ovun2; + sum_ovun1 = sum_ovun2 = 0; + for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj) { + j = bonds->select.bond_list[pj].nbr; + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + bo_ij = &(bonds->select.bond_list[pj].bo_data); + twbp = &(system->reax_param.tbp[ type_i ][ type_j ]); + + sum_ovun1 += twbp->p_ovun1 * twbp->De_s * bo_ij->BO; + sum_ovun2 += (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j])* + ( bo_ij->BO_pi + bo_ij->BO_pi2 ); + } + + exp_ovun1 = p_ovun3 * exp( p_ovun4 * sum_ovun2 ); + inv_exp_ovun1 = 1.0 / (1 + exp_ovun1); + Delta_lpcorr = workspace->Delta[i] - + (dfvl * workspace->Delta_lp_temp[i]) * inv_exp_ovun1; + + exp_ovun2 = exp( p_ovun2 * Delta_lpcorr ); + inv_exp_ovun2 = 1.0 / (1.0 + exp_ovun2); + + DlpVi = 1.0 / (Delta_lpcorr + sbp_i->valency + 1e-8); + CEover1 = Delta_lpcorr * DlpVi * inv_exp_ovun2; + + total_Eov += e_ov = sum_ovun1 * CEover1; + + CEover2 = sum_ovun1 * DlpVi * inv_exp_ovun2 * + (1.0 - Delta_lpcorr * ( DlpVi + p_ovun2 * exp_ovun2 * inv_exp_ovun2 )); + + CEover3 = CEover2 * (1.0 - dfvl * workspace->dDelta_lp[i] * inv_exp_ovun1 ); + + CEover4 = CEover2 * (dfvl * workspace->Delta_lp_temp[i]) * + p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1); + + + /* under-coordination potential */ + p_ovun2 = sbp_i->p_ovun2; + p_ovun5 = sbp_i->p_ovun5; + + exp_ovun2n = 1.0 / exp_ovun2; + exp_ovun6 = exp( p_ovun6 * Delta_lpcorr ); + exp_ovun8 = p_ovun7 * exp(p_ovun8 * sum_ovun2); + inv_exp_ovun2n = 1.0 / (1.0 + exp_ovun2n); + inv_exp_ovun8 = 1.0 / (1.0 + exp_ovun8); + + numbonds = 0; + e_un = 0.0; + for( pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj ) + numbonds ++; + + if(numbonds > 0) total_Eun += e_un = + -p_ovun5 * (1.0 - exp_ovun6) * inv_exp_ovun2n * inv_exp_ovun8; + + CEunder1 = inv_exp_ovun2n * + ( p_ovun5 * p_ovun6 * exp_ovun6 * inv_exp_ovun8 + + p_ovun2 * e_un * exp_ovun2n ); + CEunder2 = -e_un * p_ovun8 * exp_ovun8 * inv_exp_ovun8; + CEunder3 = CEunder1 * (1.0 - dfvl*workspace->dDelta_lp[i]*inv_exp_ovun1); + CEunder4 = CEunder1 * (dfvl*workspace->Delta_lp_temp[i]) * + p_ovun4 * exp_ovun1 * SQR(inv_exp_ovun1) + CEunder2; + + /* tally into per-atom energy */ + if (system->pair_ptr->evflag) { + eng_tmp = e_ov; + if(numbonds > 0) eng_tmp+= e_un; + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, i, system->n, 1, + eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + } + + /* forces */ + workspace->CdDelta[i] += CEover3; // OvCoor - 2nd term + if(numbonds > 0) workspace->CdDelta[i] += CEunder3; // UnCoor - 1st term + + for (pj = Start_Index(i, bonds); pj < End_Index(i, bonds); ++pj) { + pbond = &(bonds->select.bond_list[pj]); + j = pbond->nbr; + bo_ij = &(pbond->bo_data); + twbp = &(system->reax_param.tbp[ system->my_atoms[i].type ] + [system->my_atoms[pbond->nbr].type]); + + bo_ij->Cdbo += CEover1 * twbp->p_ovun1 * twbp->De_s; // OvCoor-1st + workspace->CdDeltaReduction[reductionOffset+j] += + CEover4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * (bo_ij->BO_pi + bo_ij->BO_pi2); // OvCoor-3a + + bo_ij->Cdbopi += CEover4 * + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // OvCoor-3b + bo_ij->Cdbopi2 += CEover4 * + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // OvCoor-3b + + workspace->CdDeltaReduction[reductionOffset+j] += + CEunder4 * (1.0 - dfvl*workspace->dDelta_lp[j]) * (bo_ij->BO_pi + bo_ij->BO_pi2); // UnCoor - 2a + + bo_ij->Cdbopi += CEunder4 * + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // UnCoor-2b + bo_ij->Cdbopi2 += CEunder4 * + (workspace->Delta[j] - dfvl*workspace->Delta_lp_temp[j]); // UnCoor-2b + } + } + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); + + } + + data->my_en.e_lp += total_Elp; + data->my_en.e_ov += total_Eov; + data->my_en.e_un += total_Eun; + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEATOMENERGYINDEX] += (endTimeBase-startTimeBase); +#endif +} diff --git a/src/USER-OMP/reaxc_multi_body_omp.h b/src/USER-OMP/reaxc_multi_body_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..b0746569cafffcc22f4bedb847f0779b3bd494ff --- /dev/null +++ b/src/USER-OMP/reaxc_multi_body_omp.h @@ -0,0 +1,35 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __MULTI_BODY_OMP_H_ +#define __MULTI_BODY_OMP_H_ + +#include "reaxc_types.h" + +void Atom_EnergyOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +#endif diff --git a/src/USER-OMP/reaxc_nonbonded_omp.cpp b/src/USER-OMP/reaxc_nonbonded_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..38a6d9e860178602cc4da1cc94d6f3d43663b2b1 --- /dev/null +++ b/src/USER-OMP/reaxc_nonbonded_omp.cpp @@ -0,0 +1,400 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include "thr_data.h" + +#include "reaxc_types.h" + +#include "reaxc_nonbonded.h" +#include "reaxc_nonbonded_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_vector.h" + +#if defined(_OPENMP) +#include <omp.h> +#endif + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void vdW_Coulomb_Energy_OMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) { + + int natoms = system->n; + int nthreads = control->nthreads; + long totalReductionSize = system->N * nthreads; + reax_list *far_nbrs = (*lists) + FAR_NBRS; + double p_vdW1 = system->reax_param.gp.l[28]; + double p_vdW1i = 1.0 / p_vdW1; + double total_EvdW = 0.; + double total_Eele = 0.; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) reduction(+: total_EvdW, total_Eele) +#endif + { +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + int i, j, pj; + int start_i, end_i, orig_i, orig_j, flag; + double powr_vdW1, powgi_vdW1; + double tmp, r_ij, fn13, exp1, exp2; + double Tap, dTap, dfn13, CEvd, CEclmb, de_core; + double dr3gamij_1, dr3gamij_3; + double e_ele, e_ele_thr, e_vdW, e_vdW_thr, e_core, SMALL = 0.0001; + double e_lg, de_lg, r_ij5, r_ij6, re6; + rvec temp, ext_press; + two_body_parameters *twbp; + far_neighbor_data *nbr_pj; + + // Tallying variables: + double pe_vdw, f_tmp, delij[3]; + + long reductionOffset = (system->N * tid); + + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + natoms, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + e_core = 0; + e_vdW = 0; + e_vdW_thr = 0; + e_lg = 0; + de_lg = 0.0; + +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for( i = 0; i < natoms; ++i ) { + if(system->my_atoms[i].type < 0) continue; + start_i = Start_Index(i, far_nbrs); + end_i = End_Index(i, far_nbrs); + orig_i = system->my_atoms[i].orig_id; + + for( pj = start_i; pj < end_i; ++pj ) { + nbr_pj = &(far_nbrs->select.far_nbr_list[pj]); + j = nbr_pj->nbr; + orig_j = system->my_atoms[j].orig_id; + + flag = 0; + if(nbr_pj->d <= control->nonb_cut) { + if(j < natoms) flag = 1; + else if (orig_i < orig_j) flag = 1; + else if (orig_i == orig_j) { + if (nbr_pj->dvec[2] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[2]) < SMALL) { + if (nbr_pj->dvec[1] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[1]) < SMALL && nbr_pj->dvec[0] > SMALL) + flag = 1; + } + } + } + + if (flag) { + + r_ij = nbr_pj->d; + twbp = &(system->reax_param.tbp[ system->my_atoms[i].type ] + [ system->my_atoms[j].type ]); + + /* Calculate Taper and its derivative */ + // Tap = nbr_pj->Tap; -- precomputed during compte_H + Tap = workspace->Tap[7] * r_ij + workspace->Tap[6]; + Tap = Tap * r_ij + workspace->Tap[5]; + Tap = Tap * r_ij + workspace->Tap[4]; + Tap = Tap * r_ij + workspace->Tap[3]; + Tap = Tap * r_ij + workspace->Tap[2]; + Tap = Tap * r_ij + workspace->Tap[1]; + Tap = Tap * r_ij + workspace->Tap[0]; + + dTap = 7*workspace->Tap[7] * r_ij + 6*workspace->Tap[6]; + dTap = dTap * r_ij + 5*workspace->Tap[5]; + dTap = dTap * r_ij + 4*workspace->Tap[4]; + dTap = dTap * r_ij + 3*workspace->Tap[3]; + dTap = dTap * r_ij + 2*workspace->Tap[2]; + dTap += workspace->Tap[1]/r_ij; + + /*vdWaals Calculations*/ + if(system->reax_param.gp.vdw_type==1 || system->reax_param.gp.vdw_type==3) + { // shielding + powr_vdW1 = pow(r_ij, p_vdW1); + powgi_vdW1 = pow( 1.0 / twbp->gamma_w, p_vdW1); + + fn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i ); + exp1 = exp( twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); + exp2 = exp( 0.5 * twbp->alpha * (1.0 - fn13 / twbp->r_vdW) ); + + e_vdW = twbp->D * (exp1 - 2.0 * exp2); + total_EvdW += Tap * e_vdW; + + dfn13 = pow( powr_vdW1 + powgi_vdW1, p_vdW1i - 1.0) * + pow(r_ij, p_vdW1 - 2.0); + + CEvd = dTap * e_vdW - + Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) * dfn13; + } + else{ // no shielding + exp1 = exp( twbp->alpha * (1.0 - r_ij / twbp->r_vdW) ); + exp2 = exp( 0.5 * twbp->alpha * (1.0 - r_ij / twbp->r_vdW) ); + + e_vdW = twbp->D * (exp1 - 2.0 * exp2); + total_EvdW += Tap * e_vdW; + + CEvd = dTap * e_vdW - + Tap * twbp->D * (twbp->alpha / twbp->r_vdW) * (exp1 - exp2) / r_ij; + } + + if(system->reax_param.gp.vdw_type==2 || system->reax_param.gp.vdw_type==3) + { // innner wall + e_core = twbp->ecore * exp(twbp->acore * (1.0-(r_ij/twbp->rcore))); + total_EvdW += Tap * e_core; + + de_core = -(twbp->acore/twbp->rcore) * e_core; + CEvd += dTap * e_core + Tap * de_core / r_ij; + + // lg correction, only if lgvdw is yes + if (control->lgflag) { + r_ij5 = pow( r_ij, 5.0 ); + r_ij6 = pow( r_ij, 6.0 ); + re6 = pow( twbp->lgre, 6.0 ); + + e_lg = -(twbp->lgcij/( r_ij6 + re6 )); + total_EvdW += Tap * e_lg; + + de_lg = -6.0 * e_lg * r_ij5 / ( r_ij6 + re6 ) ; + CEvd += dTap * e_lg + Tap * de_lg / r_ij; + } + + } + + /*Coulomb Calculations*/ + dr3gamij_1 = ( r_ij * r_ij * r_ij + twbp->gamma ); + dr3gamij_3 = pow( dr3gamij_1 , 0.33333333333333 ); + + tmp = Tap / dr3gamij_3; + total_Eele += e_ele = + C_ele * system->my_atoms[i].q * system->my_atoms[j].q * tmp; + + CEclmb = C_ele * system->my_atoms[i].q * system->my_atoms[j].q * + ( dTap - Tap * r_ij / dr3gamij_1 ) / dr3gamij_3; + + /* tally into per-atom energy */ + if( system->pair_ptr->evflag || system->pair_ptr->vflag_atom) { + pe_vdw = Tap * (e_vdW + e_core + e_lg); + rvec_ScaledSum( delij, 1., system->my_atoms[i].x, + -1., system->my_atoms[j].x ); + f_tmp = -(CEvd + CEclmb); + pair_reax_ptr->ev_tally_thr_proxy( system->pair_ptr, i, j, natoms, + 1, pe_vdw, e_ele, f_tmp, + delij[0], delij[1], delij[2], thr); + } + + if( control->virial == 0 ) { + rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), nbr_pj->dvec ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+j], + +(CEvd + CEclmb), nbr_pj->dvec ); + } + else { /* NPT, iNPT or sNPT */ + /* for pressure coupling, terms not related to bond order + derivatives are added directly into pressure vector/tensor */ + + rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec ); + rvec_ScaledAdd( workspace->f[reductionOffset+i], -1., temp ); + rvec_Add( workspace->forceReduction[reductionOffset+j], temp); + + rvec_iMultiply( ext_press, nbr_pj->rel_box, temp ); + + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + } + } + } + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); + } // parallel region + + data->my_en.e_vdW = total_EvdW; + data->my_en.e_ele = total_Eele; + + Compute_Polarization_Energy( system, data ); +} + +/* ---------------------------------------------------------------------- */ + +void Tabulated_vdW_Coulomb_Energy_OMP(reax_system *system,control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, + output_controls *out_control ) { + + double SMALL = 0.0001; + int natoms = system->n; + reax_list *far_nbrs = (*lists) + FAR_NBRS; + int nthreads = control->nthreads; + long totalReductionSize = system->N * nthreads; + double total_EvdW = 0.; + double total_Eele = 0.; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) reduction(+:total_EvdW, total_Eele) +#endif + { + int i, j, pj, r; + int type_i, type_j, tmin, tmax; + int start_i, end_i, orig_i, orig_j, flag; + double r_ij, base, dif; + double e_vdW, e_ele; + double CEvd, CEclmb; + double f_tmp, delij[3]; + rvec temp, ext_press; + far_neighbor_data *nbr_pj; + LR_lookup_table *t; +#if defined(_OPENMP) + int tid = omp_get_thread_num(); + #else + int tid = 0; +#endif + long froffset = (system->N * tid); + + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + natoms, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + +#if defined(_OPENMP) +#pragma omp for schedule(guided) +#endif + for (i = 0; i < natoms; ++i) { + type_i = system->my_atoms[i].type; + if(type_i < 0) continue; + start_i = Start_Index(i,far_nbrs); + end_i = End_Index(i,far_nbrs); + orig_i = system->my_atoms[i].orig_id; + + for (pj = start_i; pj < end_i; ++pj) { + nbr_pj = &(far_nbrs->select.far_nbr_list[pj]); + j = nbr_pj->nbr; + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + orig_j = system->my_atoms[j].orig_id; + + flag = 0; + if(nbr_pj->d <= control->nonb_cut) { + if(j < natoms) flag = 1; + else if (orig_i < orig_j) flag = 1; + else if (orig_i == orig_j) { + if (nbr_pj->dvec[2] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[2]) < SMALL) { + if (nbr_pj->dvec[1] > SMALL) flag = 1; + else if (fabs(nbr_pj->dvec[1]) < SMALL && nbr_pj->dvec[0] > SMALL) + flag = 1; + } + } + + } + + if (flag) { + + r_ij = nbr_pj->d; + tmin = MIN( type_i, type_j ); + tmax = MAX( type_i, type_j ); + t = &( LR[tmin][tmax] ); + + /* Cubic Spline Interpolation */ + r = (int)(r_ij * t->inv_dx); + if( r == 0 ) ++r; + base = (double)(r+1) * t->dx; + dif = r_ij - base; + + e_vdW = ((t->vdW[r].d*dif + t->vdW[r].c)*dif + t->vdW[r].b)*dif + + t->vdW[r].a; + + e_ele = ((t->ele[r].d*dif + t->ele[r].c)*dif + t->ele[r].b)*dif + + t->ele[r].a; + e_ele *= system->my_atoms[i].q * system->my_atoms[j].q; + + total_EvdW += e_vdW; + total_Eele += e_ele; + + CEvd = ((t->CEvd[r].d*dif + t->CEvd[r].c)*dif + t->CEvd[r].b)*dif + + t->CEvd[r].a; + + CEclmb = ((t->CEclmb[r].d*dif+t->CEclmb[r].c)*dif+t->CEclmb[r].b)*dif + + t->CEclmb[r].a; + CEclmb *= system->my_atoms[i].q * system->my_atoms[j].q; + + /* tally into per-atom energy */ + if( system->pair_ptr->evflag || system->pair_ptr->vflag_atom) { + rvec_ScaledSum( delij, 1., system->my_atoms[i].x, + -1., system->my_atoms[j].x ); + f_tmp = -(CEvd + CEclmb); + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, i, j, natoms, 1, e_vdW, e_ele, + f_tmp, delij[0], delij[1], delij[2], thr); + } + + if( control->virial == 0 ) { + rvec_ScaledAdd( workspace->f[i], -(CEvd + CEclmb), nbr_pj->dvec ); + rvec_ScaledAdd( workspace->forceReduction[froffset+j], + +(CEvd + CEclmb), nbr_pj->dvec ); + } + else { // NPT, iNPT or sNPT + /* for pressure coupling, terms not related to bond order derivatives + are added directly into pressure vector/tensor */ + rvec_Scale( temp, CEvd + CEclmb, nbr_pj->dvec ); + + rvec_ScaledAdd( workspace->f[i], -1., temp ); + rvec_Add( workspace->forceReduction[froffset+j], temp ); + + rvec_iMultiply( ext_press, nbr_pj->rel_box, temp ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + } + } + } + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); + } // end omp parallel + + data->my_en.e_vdW = total_EvdW; + data->my_en.e_ele = total_Eele; + + Compute_Polarization_Energy( system, data ); +} diff --git a/src/USER-OMP/reaxc_nonbonded_omp.h b/src/USER-OMP/reaxc_nonbonded_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..1ea51257cf361536f572f6052cae867b398d45c2 --- /dev/null +++ b/src/USER-OMP/reaxc_nonbonded_omp.h @@ -0,0 +1,38 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __NONBONDED_OMP_H_ +#define __NONBONDED_OMP_H_ + +#include "reaxc_types.h" + +void vdW_Coulomb_Energy_OMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +void Tabulated_vdW_Coulomb_Energy_OMP( reax_system*, control_params*, + simulation_data*, storage*, + reax_list**, output_controls* ); +#endif diff --git a/src/USER-OMP/reaxc_torsion_angles_omp.cpp b/src/USER-OMP/reaxc_torsion_angles_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4ede439ed472404c29c4584e39ac0ec9d0845256 --- /dev/null +++ b/src/USER-OMP/reaxc_torsion_angles_omp.cpp @@ -0,0 +1,477 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include "thr_data.h" + +#include "reaxc_types.h" +#include "reaxc_torsion_angles_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_tool_box.h" +#include "reaxc_vector.h" + +#if defined(_OPENMP) +#include <omp.h> +#endif + +#define MIN_SINE 1e-10 + +using namespace LAMMPS_NS; + +// Functions defined in reaxc_torsion_angles.cpp +extern double Calculate_Omega(rvec, double, rvec, double, rvec, double, rvec, double, + three_body_interaction_data*, three_body_interaction_data*, + rvec, rvec, rvec, rvec, output_controls*); + +/* ---------------------------------------------------------------------- */ + +void Torsion_AnglesOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) +{ +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + int natoms = system->n; + reax_list *bonds = (*lists) + BONDS; + reax_list *thb_intrs = (*lists) + THREE_BODIES; + double p_tor2 = system->reax_param.gp.l[23]; + double p_tor3 = system->reax_param.gp.l[24]; + double p_tor4 = system->reax_param.gp.l[25]; + double p_cot2 = system->reax_param.gp.l[27]; + double total_Etor = 0; + double total_Econ = 0; + int nthreads = control->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(shared) reduction(+: total_Etor, total_Econ) +#endif + { + int i, j, k, l, pi, pj, pk, pl, pij, plk; + int type_i, type_j, type_k, type_l; + int start_j, end_j; + int start_pj, end_pj, start_pk, end_pk; + int num_frb_intrs = 0; + + double Delta_j, Delta_k; + double r_ij, r_jk, r_kl, r_li; + double BOA_ij, BOA_jk, BOA_kl; + + double exp_tor2_ij, exp_tor2_jk, exp_tor2_kl; + double exp_tor1, exp_tor3_DjDk, exp_tor4_DjDk, exp_tor34_inv; + double exp_cot2_jk, exp_cot2_ij, exp_cot2_kl; + double fn10, f11_DjDk, dfn11, fn12; + double theta_ijk, theta_jkl; + double sin_ijk, sin_jkl; + double cos_ijk, cos_jkl; + double tan_ijk_i, tan_jkl_i; + double omega, cos_omega, cos2omega, cos3omega; + rvec dcos_omega_di, dcos_omega_dj, dcos_omega_dk, dcos_omega_dl; + double CV, cmn, CEtors1, CEtors2, CEtors3, CEtors4; + double CEtors5, CEtors6, CEtors7, CEtors8, CEtors9; + double Cconj, CEconj1, CEconj2, CEconj3; + double CEconj4, CEconj5, CEconj6; + double e_tor, e_con; + rvec dvec_li; + rvec force, ext_press; + ivec rel_box_jl; + // rtensor total_rtensor, temp_rtensor; + four_body_header *fbh; + four_body_parameters *fbp; + bond_data *pbond_ij, *pbond_jk, *pbond_kl; + bond_order_data *bo_ij, *bo_jk, *bo_kl; + three_body_interaction_data *p_ijk, *p_jkl; + + // Virial tallying variables + double delil[3], deljl[3], delkl[3]; + double eng_tmp, fi_tmp[3], fj_tmp[3], fk_tmp[3]; + +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + long reductionOffset = (system->N * tid); + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + system->N, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + +#if defined(_OPENMP) +#pragma omp for schedule(static) +#endif + for (j = 0; j < system->N; ++j) { + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + for (pk = start_j; pk < end_j; ++pk) { + bo_jk = &( bonds->select.bond_list[pk].bo_data ); + for (k = 0; k < nthreads; ++k) + bo_jk->CdboReduction[k] = 0.; + } + } + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (j = 0; j < natoms; ++j) { + type_j = system->my_atoms[j].type; + Delta_j = workspace->Delta_boc[j]; + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + for (pk = start_j; pk < end_j; ++pk) { + pbond_jk = &( bonds->select.bond_list[pk] ); + k = pbond_jk->nbr; + bo_jk = &( pbond_jk->bo_data ); + BOA_jk = bo_jk->BO - control->thb_cut; + + /* see if there are any 3-body interactions involving j&k + where j is the central atom. Otherwise there is no point in + trying to form a 4-body interaction out of this neighborhood */ + if (system->my_atoms[j].orig_id < system->my_atoms[k].orig_id && + bo_jk->BO > control->thb_cut/*0*/ && Num_Entries(pk, thb_intrs)) { + pj = pbond_jk->sym_index; // pj points to j on k's list + + /* do the same check as above: + are there any 3-body interactions involving k&j + where k is the central atom */ + if (Num_Entries(pj, thb_intrs)) { + type_k = system->my_atoms[k].type; + Delta_k = workspace->Delta_boc[k]; + r_jk = pbond_jk->d; + + start_pk = Start_Index(pk, thb_intrs ); + end_pk = End_Index(pk, thb_intrs ); + start_pj = Start_Index(pj, thb_intrs ); + end_pj = End_Index(pj, thb_intrs ); + + exp_tor2_jk = exp( -p_tor2 * BOA_jk ); + exp_cot2_jk = exp( -p_cot2 * SQR(BOA_jk - 1.5) ); + exp_tor3_DjDk = exp( -p_tor3 * (Delta_j + Delta_k) ); + exp_tor4_DjDk = exp( p_tor4 * (Delta_j + Delta_k) ); + exp_tor34_inv = 1.0 / (1.0 + exp_tor3_DjDk + exp_tor4_DjDk); + f11_DjDk = (2.0 + exp_tor3_DjDk) * exp_tor34_inv; + + + /* pick i up from j-k interaction where j is the central atom */ + for (pi = start_pk; pi < end_pk; ++pi) { + p_ijk = &( thb_intrs->select.three_body_list[pi] ); + pij = p_ijk->pthb; // pij is pointer to i on j's bond_list + pbond_ij = &( bonds->select.bond_list[pij] ); + bo_ij = &( pbond_ij->bo_data ); + + if (bo_ij->BO > control->thb_cut/*0*/) { + i = p_ijk->thb; + type_i = system->my_atoms[i].type; + r_ij = pbond_ij->d; + BOA_ij = bo_ij->BO - control->thb_cut; + + theta_ijk = p_ijk->theta; + sin_ijk = sin( theta_ijk ); + cos_ijk = cos( theta_ijk ); + //tan_ijk_i = 1. / tan( theta_ijk ); + if( sin_ijk >= 0 && sin_ijk <= MIN_SINE ) + tan_ijk_i = cos_ijk / MIN_SINE; + else if( sin_ijk <= 0 && sin_ijk >= -MIN_SINE ) + tan_ijk_i = cos_ijk / -MIN_SINE; + else tan_ijk_i = cos_ijk / sin_ijk; + + exp_tor2_ij = exp( -p_tor2 * BOA_ij ); + exp_cot2_ij = exp( -p_cot2 * SQR(BOA_ij -1.5) ); + + + /* pick l up from j-k interaction where k is the central atom */ + for (pl = start_pj; pl < end_pj; ++pl) { + p_jkl = &( thb_intrs->select.three_body_list[pl] ); + l = p_jkl->thb; + plk = p_jkl->pthb; //pointer to l on k's bond_list! + pbond_kl = &( bonds->select.bond_list[plk] ); + bo_kl = &( pbond_kl->bo_data ); + type_l = system->my_atoms[l].type; + fbh = &(system->reax_param.fbp[type_i][type_j] + [type_k][type_l]); + fbp = &(system->reax_param.fbp[type_i][type_j] + [type_k][type_l].prm[0]); + + if (i != l && fbh->cnt && + bo_kl->BO > control->thb_cut/*0*/ && + bo_ij->BO * bo_jk->BO * bo_kl->BO > control->thb_cut/*0*/) { + ++num_frb_intrs; + //fprintf(stderr, + // "%5d: %6d %6d %6d %6d\n", num_frb_intrs, + // system->my_atoms[i].orig_id,system->my_atoms[j].orig_id, + // system->my_atoms[k].orig_id,system->my_atoms[l].orig_id); + + r_kl = pbond_kl->d; + BOA_kl = bo_kl->BO - control->thb_cut; + + theta_jkl = p_jkl->theta; + sin_jkl = sin( theta_jkl ); + cos_jkl = cos( theta_jkl ); + //tan_jkl_i = 1. / tan( theta_jkl ); + if( sin_jkl >= 0 && sin_jkl <= MIN_SINE ) + tan_jkl_i = cos_jkl / MIN_SINE; + else if( sin_jkl <= 0 && sin_jkl >= -MIN_SINE ) + tan_jkl_i = cos_jkl / -MIN_SINE; + else tan_jkl_i = cos_jkl /sin_jkl; + + rvec_ScaledSum( dvec_li, 1., system->my_atoms[i].x, + -1., system->my_atoms[l].x ); + r_li = rvec_Norm( dvec_li ); + + + /* omega and its derivative */ + omega = Calculate_Omega( pbond_ij->dvec, r_ij, + pbond_jk->dvec, r_jk, + pbond_kl->dvec, r_kl, + dvec_li, r_li, + p_ijk, p_jkl, + dcos_omega_di, dcos_omega_dj, + dcos_omega_dk, dcos_omega_dl, + out_control ); + + cos_omega = cos( omega ); + cos2omega = cos( 2. * omega ); + cos3omega = cos( 3. * omega ); + /* end omega calculations */ + + /* torsion energy */ + exp_tor1 = exp( fbp->p_tor1 * + SQR(2.0 - bo_jk->BO_pi - f11_DjDk) ); + exp_tor2_kl = exp( -p_tor2 * BOA_kl ); + exp_cot2_kl = exp( -p_cot2 * SQR(BOA_kl - 1.5) ); + fn10 = (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk) * + (1.0 - exp_tor2_kl); + + CV = 0.5 * ( fbp->V1 * (1.0 + cos_omega) + + fbp->V2 * exp_tor1 * (1.0 - cos2omega) + + fbp->V3 * (1.0 + cos3omega) ); + + total_Etor += e_tor = fn10 * sin_ijk * sin_jkl * CV; + + dfn11 = (-p_tor3 * exp_tor3_DjDk + + (p_tor3 * exp_tor3_DjDk - p_tor4 * exp_tor4_DjDk) * + (2.0 + exp_tor3_DjDk) * exp_tor34_inv) * + exp_tor34_inv; + + CEtors1 = sin_ijk * sin_jkl * CV; + + CEtors2 = -fn10 * 2.0 * fbp->p_tor1 * fbp->V2 * exp_tor1 * + (2.0 - bo_jk->BO_pi - f11_DjDk) * (1.0 - SQR(cos_omega)) * + sin_ijk * sin_jkl; + CEtors3 = CEtors2 * dfn11; + + CEtors4 = CEtors1 * p_tor2 * exp_tor2_ij * + (1.0 - exp_tor2_jk) * (1.0 - exp_tor2_kl); + CEtors5 = CEtors1 * p_tor2 * + (1.0 - exp_tor2_ij) * exp_tor2_jk * (1.0 - exp_tor2_kl); + CEtors6 = CEtors1 * p_tor2 * + (1.0 - exp_tor2_ij) * (1.0 - exp_tor2_jk) * exp_tor2_kl; + + cmn = -fn10 * CV; + CEtors7 = cmn * sin_jkl * tan_ijk_i; + CEtors8 = cmn * sin_ijk * tan_jkl_i; + + CEtors9 = fn10 * sin_ijk * sin_jkl * + (0.5 * fbp->V1 - 2.0 * fbp->V2 * exp_tor1 * cos_omega + + 1.5 * fbp->V3 * (cos2omega + 2.0 * SQR(cos_omega))); + /* end of torsion energy */ + + + /* 4-body conjugation energy */ + fn12 = exp_cot2_ij * exp_cot2_jk * exp_cot2_kl; + //data->my_en.e_con += e_con = + total_Econ += e_con = + fbp->p_cot1 * fn12 * + (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl); + + Cconj = -2.0 * fn12 * fbp->p_cot1 * p_cot2 * + (1.0 + (SQR(cos_omega) - 1.0) * sin_ijk * sin_jkl); + + CEconj1 = Cconj * (BOA_ij - 1.5e0); + CEconj2 = Cconj * (BOA_jk - 1.5e0); + CEconj3 = Cconj * (BOA_kl - 1.5e0); + + CEconj4 = -fbp->p_cot1 * fn12 * + (SQR(cos_omega) - 1.0) * sin_jkl * tan_ijk_i; + CEconj5 = -fbp->p_cot1 * fn12 * + (SQR(cos_omega) - 1.0) * sin_ijk * tan_jkl_i; + CEconj6 = 2.0 * fbp->p_cot1 * fn12 * + cos_omega * sin_ijk * sin_jkl; + /* end 4-body conjugation energy */ + + /* FORCES */ + bo_jk->Cdbopi += CEtors2; + workspace->CdDelta[j] += CEtors3; + //workspace->CdDelta[k] += CEtors3; + workspace->CdDeltaReduction[reductionOffset+k] += CEtors3; + bo_ij->Cdbo += (CEtors4 + CEconj1); + bo_jk->Cdbo += (CEtors5 + CEconj2); + //bo_kl->Cdbo += (CEtors6 + CEconj3); + bo_kl->CdboReduction[tid] += (CEtors6 + CEconj3); + + if( control->virial == 0 ) { + /* dcos_theta_ijk */ + rvec_ScaledAdd( workspace->f[j], + CEtors7 + CEconj4, p_ijk->dcos_dj ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i], + CEtors7 + CEconj4, p_ijk->dcos_dk ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], + CEtors7 + CEconj4, p_ijk->dcos_di ); + + /* dcos_theta_jkl */ + rvec_ScaledAdd( workspace->f[j], + CEtors8 + CEconj5, p_jkl->dcos_di ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], + CEtors8 + CEconj5, p_jkl->dcos_dj ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+l], + CEtors8 + CEconj5, p_jkl->dcos_dk ); + + /* dcos_omega */ + rvec_ScaledAdd( workspace->f[j], + CEtors9 + CEconj6, dcos_omega_dj ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i], + CEtors9 + CEconj6, dcos_omega_di ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], + CEtors9 + CEconj6, dcos_omega_dk ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+l], + CEtors9 + CEconj6, dcos_omega_dl ); + } + else { + ivec_Sum(rel_box_jl, pbond_jk->rel_box, pbond_kl->rel_box); + + /* dcos_theta_ijk */ + rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_dk ); + rvec_Add( workspace->forceReduction[reductionOffset+i], force ); + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + rvec_ScaledAdd( workspace->f[j], + CEtors7 + CEconj4, p_ijk->dcos_dj ); + + rvec_Scale( force, CEtors7 + CEconj4, p_ijk->dcos_di ); + rvec_Add( workspace->forceReduction[reductionOffset+k], force ); + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + /* dcos_theta_jkl */ + rvec_ScaledAdd( workspace->f[j], + CEtors8 + CEconj5, p_jkl->dcos_di ); + + rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dj ); + rvec_Add( workspace->forceReduction[reductionOffset+k], force ); + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + rvec_Scale( force, CEtors8 + CEconj5, p_jkl->dcos_dk ); + rvec_Add( workspace->forceReduction[reductionOffset+l], force ); + rvec_iMultiply( ext_press, rel_box_jl, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + /* dcos_omega */ + rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_di ); + rvec_Add( workspace->forceReduction[reductionOffset+i], force ); + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + rvec_ScaledAdd( workspace->f[j], + CEtors9 + CEconj6, dcos_omega_dj ); + + rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_dk ); + rvec_Add( workspace->forceReduction[reductionOffset+k], force ); + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + rvec_Scale( force, CEtors9 + CEconj6, dcos_omega_dl ); + rvec_Add( workspace->forceReduction[reductionOffset+i], force ); + rvec_iMultiply( ext_press, rel_box_jl, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + + /* tally into per-atom virials */ + if( system->pair_ptr->vflag_atom || system->pair_ptr->evflag) { + + // acquire vectors + rvec_ScaledSum( delil, 1., system->my_atoms[l].x, + -1., system->my_atoms[i].x ); + rvec_ScaledSum( deljl, 1., system->my_atoms[l].x, + -1., system->my_atoms[j].x ); + rvec_ScaledSum( delkl, 1., system->my_atoms[l].x, + -1., system->my_atoms[k].x ); + // dcos_theta_ijk + rvec_Scale( fi_tmp, CEtors7 + CEconj4, p_ijk->dcos_dk ); + rvec_Scale( fj_tmp, CEtors7 + CEconj4, p_ijk->dcos_dj ); + rvec_Scale( fk_tmp, CEtors7 + CEconj4, p_ijk->dcos_di ); + + // dcos_theta_jkl + rvec_ScaledAdd( fj_tmp, CEtors8 + CEconj5, p_jkl->dcos_di ); + rvec_ScaledAdd( fk_tmp, CEtors8 + CEconj5, p_jkl->dcos_dj ); + + // dcos_omega + rvec_ScaledAdd( fi_tmp, CEtors9 + CEconj6, dcos_omega_di ); + rvec_ScaledAdd( fj_tmp, CEtors9 + CEconj6, dcos_omega_dj ); + rvec_ScaledAdd( fk_tmp, CEtors9 + CEconj6, dcos_omega_dk ); + + // tally + eng_tmp = e_tor + e_con; + + if (system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, j, k, system->n, 1, + eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + + // NEED TO MAKE AN OMP VERSION OF THIS CALL! + if (system->pair_ptr->vflag_atom) + system->pair_ptr->v_tally4(i, j, k, l, fi_tmp, fj_tmp, fk_tmp, + delil, deljl, delkl ); + } + + } // pl check ends + } // pl loop ends + } // pi check ends + } // pi loop ends + } // k-j neighbor check ends + } // j<k && j-k neighbor check ends + } // pk loop ends + } // j loop + + } // end omp parallel + + data->my_en.e_tor = total_Etor; + data->my_en.e_con = total_Econ; + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTETORSIONANGLESBOINDEX] += (endTimeBase-startTimeBase); +#endif +} diff --git a/src/USER-OMP/reaxc_torsion_angles_omp.h b/src/USER-OMP/reaxc_torsion_angles_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..51b05f7ae1e0038a990221ec46ab4ad8934dac97 --- /dev/null +++ b/src/USER-OMP/reaxc_torsion_angles_omp.h @@ -0,0 +1,36 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __TORSION_ANGLES_OMP_H_ +#define __TORSION_ANGLES_OMP_H_ + +#include "reaxc_types.h" +#include "reaxc_torsion_angles.h" + +void Torsion_AnglesOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +#endif diff --git a/src/USER-OMP/reaxc_valence_angles_omp.cpp b/src/USER-OMP/reaxc_valence_angles_omp.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d6f0962020164d38a334bc53e2d290dfaaa5bad5 --- /dev/null +++ b/src/USER-OMP/reaxc_valence_angles_omp.cpp @@ -0,0 +1,623 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#include "pair_reaxc_omp.h" +#include "thr_data.h" + +#include "reaxc_types.h" +#include "reaxc_valence_angles.h" +#include "reaxc_valence_angles_omp.h" +#include "reaxc_bond_orders_omp.h" +#include "reaxc_list.h" +#include "reaxc_vector.h" + +#if defined(_OPENMP) +#include <omp.h> +#endif + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +void Calculate_dCos_ThetaOMP( rvec dvec_ji, double d_ji, rvec dvec_jk, double d_jk, + rvec* dcos_theta_di, + rvec* dcos_theta_dj, + rvec* dcos_theta_dk ) +{ + double sqr_d_ji = SQR(d_ji); + double sqr_d_jk = SQR(d_jk); + double inv_dists = 1.0 / (d_ji * d_jk); + double inv_dists3 = inv_dists * inv_dists * inv_dists; + double dot_dvecs = dvec_ji[0]*dvec_jk[0] + dvec_ji[1]*dvec_jk[1] + dvec_ji[2]*dvec_jk[2]; + double Cdot_inv3 = dot_dvecs * inv_dists3; + + double csqr_jk = Cdot_inv3 * sqr_d_jk; + double csqr_ji = Cdot_inv3 * sqr_d_ji; + + // Try to help compiler out by unrolling + // x-component + double dinv_jk = dvec_jk[0] * inv_dists; + double dinv_ji = dvec_ji[0] * inv_dists; + + double cdev_ji = csqr_jk * dvec_ji[0]; + double cdev_jk = csqr_ji * dvec_jk[0]; + + (*dcos_theta_di)[0] = dinv_jk - cdev_ji; + (*dcos_theta_dj)[0] = -(dinv_jk + dinv_ji) + cdev_ji + cdev_jk; + (*dcos_theta_dk)[0] = dinv_ji - cdev_jk; + + // y-component + dinv_jk = dvec_jk[1] * inv_dists; + dinv_ji = dvec_ji[1] * inv_dists; + + cdev_ji = csqr_jk * dvec_ji[1]; + cdev_jk = csqr_ji * dvec_jk[1]; + + (*dcos_theta_di)[1] = dinv_jk - cdev_ji; + (*dcos_theta_dj)[1] = -(dinv_jk + dinv_ji) + cdev_ji + cdev_jk; + (*dcos_theta_dk)[1] = dinv_ji - cdev_jk; + + // z-component + dinv_jk = dvec_jk[2] * inv_dists; + dinv_ji = dvec_ji[2] * inv_dists; + + cdev_ji = csqr_jk * dvec_ji[2]; + cdev_jk = csqr_ji * dvec_jk[2]; + + (*dcos_theta_di)[2] = dinv_jk - cdev_ji; + (*dcos_theta_dj)[2] = -(dinv_jk + dinv_ji) + cdev_ji + cdev_jk; + (*dcos_theta_dk)[2] = dinv_ji - cdev_jk; +} + +/* ---------------------------------------------------------------------- */ + +/* this is a 3-body interaction in which the main role is + played by j which sits in the middle of the other two. */ +void Valence_AnglesOMP( reax_system *system, control_params *control, + simulation_data *data, storage *workspace, + reax_list **lists, output_controls *out_control ) +{ + +#ifdef OMP_TIMING + double endTimeBase, startTimeBase; + startTimeBase = MPI_Wtime(); +#endif + + reax_list *bonds = (*lists) + BONDS; + reax_list *thb_intrs = (*lists) + THREE_BODIES; + + // Precompute and store valence_angle offsets for OpenMP code. + int * _my_offset = workspace->valence_angle_atom_myoffset; + + /* global parameters used in these calculations */ + double p_val6 = system->reax_param.gp.l[14]; + double p_val8 = system->reax_param.gp.l[33]; + double p_val9 = system->reax_param.gp.l[16]; + double p_val10 = system->reax_param.gp.l[17]; + double total_Eang = 0; + double total_Epen = 0; + double total_Ecoa = 0; + + int nthreads = control->nthreads; + int num_thb_intrs = 0; + int TWICE = 2; +#if defined(_OPENMP) +#pragma omp parallel default(shared) reduction(+:total_Eang, total_Epen, total_Ecoa, num_thb_intrs) +#endif + { + int i, j, pi, k, pk, t; + int type_i, type_j, type_k; + int start_j, end_j, start_pk, end_pk; + int cnt, my_offset; + + double temp, temp_bo_jt, pBOjt7; + double p_val1, p_val2, p_val3, p_val4, p_val5, p_val7; + double p_pen1, p_pen2, p_pen3, p_pen4; + double p_coa1, p_coa2, p_coa3, p_coa4; + double trm8, expval6, expval7, expval2theta, expval12theta, exp3ij, exp3jk; + double exp_pen2ij, exp_pen2jk, exp_pen3, exp_pen4, trm_pen34, exp_coa2; + double dSBO1, dSBO2, SBO, SBO2, CSBO2, SBOp, prod_SBO, vlpadj; + double CEval1, CEval2, CEval3, CEval4, CEval5, CEval6, CEval7, CEval8; + double CEpen1, CEpen2, CEpen3; + double e_ang, e_coa, e_pen; + double CEcoa1, CEcoa2, CEcoa3, CEcoa4, CEcoa5; + double Cf7ij, Cf7jk, Cf8j, Cf9j; + double f7_ij, f7_jk, f8_Dj, f9_Dj; + double Ctheta_0, theta_0, theta_00, theta, cos_theta, sin_theta; + double BOA_ij, BOA_jk; + rvec force, ext_press; + // rtensor temp_rtensor, total_rtensor; + + // Tallying variables + double eng_tmp, fi_tmp[3], fj_tmp[3], fk_tmp[3]; + double delij[3], delkj[3]; + + three_body_header *thbh; + three_body_parameters *thbp; + three_body_interaction_data *p_ijk, *p_kji; + bond_data *pbond_ij, *pbond_jk, *pbond_jt; + bond_order_data *bo_ij, *bo_jk, *bo_jt; + +#if defined(_OPENMP) + int tid = omp_get_thread_num(); +#else + int tid = 0; +#endif + long reductionOffset = (system->N * tid); + class PairReaxCOMP *pair_reax_ptr; + pair_reax_ptr = static_cast<class PairReaxCOMP*>(system->pair_ptr); + class ThrData *thr = pair_reax_ptr->getFixOMP()->get_thr(tid); + + pair_reax_ptr->ev_setup_thr_proxy(system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, + system->N, system->pair_ptr->eatom, + system->pair_ptr->vatom, thr); + + + // Run through a minimal for(j<N) loop once to precompute offsets with safe number of threads + + const int per_thread = thb_intrs->num_intrs / nthreads; + +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50) +#endif + for (j = 0; j < system->N; ++j) { + type_j = system->my_atoms[j].type; + _my_offset[j] = 0; + if(type_j < 0) continue; + + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + // Always point to start of workspace to count angles + my_offset = tid * per_thread; + + for (pi = start_j; pi < end_j; ++pi) { + Set_Start_Index( pi, my_offset, thb_intrs ); + pbond_ij = &(bonds->select.bond_list[pi]); + bo_ij = &(pbond_ij->bo_data); + BOA_ij = bo_ij->BO - control->thb_cut; + + if (BOA_ij > 0.0) { + i = pbond_ij->nbr; + + /* first copy 3-body intrs from previously computed ones where i>k. + in the second for-loop below, + we compute only new 3-body intrs where i < k */ + for (pk = start_j; pk < pi; ++pk) { + start_pk = Start_Index( pk, thb_intrs ); + end_pk = End_Index( pk, thb_intrs ); + + for (t = start_pk; t < end_pk; ++t) + if (thb_intrs->select.three_body_list[t].thb == i) { + + p_ijk = &(thb_intrs->select.three_body_list[my_offset] ); + p_ijk->thb = bonds->select.bond_list[pk].nbr; + + ++my_offset; + break; + } + } // for(pk) + + /* and this is the second for loop mentioned above */ + for (pk = pi+1; pk < end_j; ++pk) { + pbond_jk = &(bonds->select.bond_list[pk]); + k = pbond_jk->nbr; + + if (j >= system->n && i >= system->n && k >= system->n) continue; + + p_ijk = &( thb_intrs->select.three_body_list[my_offset] ); + p_ijk->thb = k; + + ++my_offset; // add this to the list of 3-body interactions + } // for(pk) + } // if() + + Set_End_Index(pi, my_offset, thb_intrs ); + } // for(pi) + + // Confirm that thb_intrs->num_intrs / nthreads is enough to hold all angles from a single atom + if(my_offset >= (tid+1)*per_thread) { + int me; + MPI_Comm_rank(MPI_COMM_WORLD,&me); + fprintf( stderr, "step%d-ran out of space on angle_list on proc %i for atom %i:", data->step, me, j); + fprintf( stderr, " nthreads= %d, tid=%d, my_offset=%d, per_thread=%d\n", nthreads, tid, my_offset, per_thread); + fprintf( stderr, " num_intrs= %i N= %i\n",thb_intrs->num_intrs , system->N); + MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); + } + + // Number of angles owned by this atom + _my_offset[j] = my_offset - tid * per_thread; + } // for(j) + + // Wait for all threads to finish counting angles +#if defined(_OPENMP) && !defined(__NVCC__) +#pragma omp barrier +#endif + // Master thread uses angle counts to compute offsets + // This can be threaded +#if defined(_OPENMP) && !defined(__NVCC__) +#pragma omp master +#endif + { + int current_count = 0; + int m = _my_offset[0]; + _my_offset[0] = current_count; + for(j=1; j<system->N; j++) { + current_count+= m; + m = _my_offset[j]; + _my_offset[j] = current_count; + } + _my_offset[system->N] = current_count + m; // Used to test if last particle has any angles + } + + // All threads wait till master thread finished computing offsets +#if defined(_OPENMP) && !defined(__NVCC__) +#pragma omp barrier +#endif + // Original loop, but now using precomputed offsets + // Safe to use all threads available, regardless of threads tasked above + // We also now skip over atoms that have no angles assigned +#if defined(_OPENMP) +#pragma omp for schedule(dynamic,50)//(dynamic,chunksize)//(guided) +#endif + for (j = 0; j < system->N; ++j) { // Ray: the first one with system->N + type_j = system->my_atoms[j].type; + if(type_j < 0) continue; + + // Skip if no angles for this atom + if(_my_offset[j] == _my_offset[j+1]) continue; + + start_j = Start_Index(j, bonds); + end_j = End_Index(j, bonds); + + type_j = system->my_atoms[j].type; + + my_offset = _my_offset[j]; + + p_val3 = system->reax_param.sbp[ type_j ].p_val3; + p_val5 = system->reax_param.sbp[ type_j ].p_val5; + + SBOp = 0, prod_SBO = 1; + for (t = start_j; t < end_j; ++t) { + bo_jt = &(bonds->select.bond_list[t].bo_data); + SBOp += (bo_jt->BO_pi + bo_jt->BO_pi2); + temp = SQR( bo_jt->BO ); + temp *= temp; + temp *= temp; + prod_SBO *= exp( -temp ); + } + + // modifications to match Adri's code - 09/01/09 + if( workspace->vlpex[j] >= 0 ){ + vlpadj = 0; + dSBO2 = prod_SBO - 1; + } + else{ + vlpadj = workspace->nlp[j]; + dSBO2 = (prod_SBO - 1) * (1 - p_val8 * workspace->dDelta_lp[j]); + } + + SBO = SBOp + (1 - prod_SBO) * (-workspace->Delta_boc[j] - p_val8 * vlpadj); + dSBO1 = -8 * prod_SBO * ( workspace->Delta_boc[j] + p_val8 * vlpadj ); + + if( SBO <= 0 ) + SBO2 = 0, CSBO2 = 0; + else if( SBO > 0 && SBO <= 1 ) { + SBO2 = pow( SBO, p_val9 ); + CSBO2 = p_val9 * pow( SBO, p_val9 - 1 ); + } + else if( SBO > 1 && SBO < 2 ) { + SBO2 = 2 - pow( 2-SBO, p_val9 ); + CSBO2 = p_val9 * pow( 2 - SBO, p_val9 - 1 ); + } + else + SBO2 = 2, CSBO2 = 0; + + expval6 = exp( p_val6 * workspace->Delta_boc[j] ); + + for (pi = start_j; pi < end_j; ++pi) { + Set_Start_Index( pi, my_offset, thb_intrs ); + pbond_ij = &(bonds->select.bond_list[pi]); + bo_ij = &(pbond_ij->bo_data); + BOA_ij = bo_ij->BO - control->thb_cut; + + + if (BOA_ij > 0.0) { + i = pbond_ij->nbr; + type_i = system->my_atoms[i].type; + + + /* first copy 3-body intrs from previously computed ones where i>k. + in the second for-loop below, + we compute only new 3-body intrs where i < k */ + for (pk = start_j; pk < pi; ++pk) { + start_pk = Start_Index( pk, thb_intrs ); + end_pk = End_Index( pk, thb_intrs ); + + for (t = start_pk; t < end_pk; ++t) + if (thb_intrs->select.three_body_list[t].thb == i) { + p_ijk = &(thb_intrs->select.three_body_list[my_offset] ); + p_kji = &(thb_intrs->select.three_body_list[t]); + + p_ijk->thb = bonds->select.bond_list[pk].nbr; + p_ijk->pthb = pk; + p_ijk->theta = p_kji->theta; + rvec_Copy( p_ijk->dcos_di, p_kji->dcos_dk ); + rvec_Copy( p_ijk->dcos_dj, p_kji->dcos_dj ); + rvec_Copy( p_ijk->dcos_dk, p_kji->dcos_di ); + + ++my_offset; + ++num_thb_intrs; + break; + } + } // for(pk) + + + /* and this is the second for loop mentioned above */ + for (pk = pi+1; pk < end_j; ++pk) { + pbond_jk = &(bonds->select.bond_list[pk]); + bo_jk = &(pbond_jk->bo_data); + BOA_jk = bo_jk->BO - control->thb_cut; + k = pbond_jk->nbr; + type_k = system->my_atoms[k].type; + p_ijk = &( thb_intrs->select.three_body_list[my_offset] ); + + // Fix by Sudhir + // if (BOA_jk <= 0) continue; + if (j >= system->n && i >= system->n && k >= system->n) continue; + + Calculate_Theta( pbond_ij->dvec, pbond_ij->d, + pbond_jk->dvec, pbond_jk->d, + &theta, &cos_theta ); + + Calculate_dCos_ThetaOMP( pbond_ij->dvec, pbond_ij->d, + pbond_jk->dvec, pbond_jk->d, + &(p_ijk->dcos_di), &(p_ijk->dcos_dj), + &(p_ijk->dcos_dk) ); + p_ijk->thb = k; + p_ijk->pthb = pk; + p_ijk->theta = theta; + + sin_theta = sin( theta ); + if( sin_theta < 1.0e-5 ) + sin_theta = 1.0e-5; + + ++my_offset; // add this to the list of 3-body interactions + ++num_thb_intrs; + + if ((j < system->n) && (BOA_jk > 0.0) && + (bo_ij->BO > control->thb_cut) && + (bo_jk->BO > control->thb_cut) && + (bo_ij->BO * bo_jk->BO > control->thb_cutsq)) { + thbh = &( system->reax_param.thbp[ type_i ][ type_j ][ type_k ] ); + + for (cnt = 0; cnt < thbh->cnt; ++cnt) { + + if( fabs(thbh->prm[cnt].p_val1) > 0.001 ) { + thbp = &( thbh->prm[cnt] ); + + /* ANGLE ENERGY */ + p_val1 = thbp->p_val1; + p_val2 = thbp->p_val2; + p_val4 = thbp->p_val4; + p_val7 = thbp->p_val7; + theta_00 = thbp->theta_00; + + exp3ij = exp( -p_val3 * pow( BOA_ij, p_val4 ) ); + f7_ij = 1.0 - exp3ij; + Cf7ij = p_val3 * p_val4 * pow( BOA_ij, p_val4 - 1.0 ) * exp3ij; + + exp3jk = exp( -p_val3 * pow( BOA_jk, p_val4 ) ); + f7_jk = 1.0 - exp3jk; + Cf7jk = p_val3 * p_val4 * pow( BOA_jk, p_val4 - 1.0 ) * exp3jk; + + expval7 = exp( -p_val7 * workspace->Delta_boc[j] ); + trm8 = 1.0 + expval6 + expval7; + f8_Dj = p_val5 - ( (p_val5 - 1.0) * (2.0 + expval6) / trm8 ); + Cf8j = ( (1.0 - p_val5) / SQR(trm8) ) * + ( p_val6 * expval6 * trm8 - + (2.0 + expval6) * ( p_val6*expval6 - p_val7*expval7 ) ); + + theta_0 = 180.0 - theta_00 * (1.0 - + exp(-p_val10 * (2.0 - SBO2))); + theta_0 = DEG2RAD( theta_0 ); + + expval2theta = exp( -p_val2 * SQR(theta_0 - theta) ); + if (p_val1 >= 0) + expval12theta = p_val1 * (1.0 - expval2theta); + else // To avoid linear Me-H-Me angles (6/6/06) + expval12theta = p_val1 * -expval2theta; + + CEval1 = Cf7ij * f7_jk * f8_Dj * expval12theta; + CEval2 = Cf7jk * f7_ij * f8_Dj * expval12theta; + CEval3 = Cf8j * f7_ij * f7_jk * expval12theta; + CEval4 = -2.0 * p_val1 * p_val2 * f7_ij * f7_jk * f8_Dj * + expval2theta * (theta_0 - theta); + + Ctheta_0 = p_val10 * DEG2RAD(theta_00) * + exp( -p_val10 * (2.0 - SBO2) ); + + CEval5 = -CEval4 * Ctheta_0 * CSBO2; + CEval6 = CEval5 * dSBO1; + CEval7 = CEval5 * dSBO2; + CEval8 = -CEval4 / sin_theta; + + total_Eang += e_ang = + f7_ij * f7_jk * f8_Dj * expval12theta; + /* END ANGLE ENERGY*/ + + + /* PENALTY ENERGY */ + p_pen1 = thbp->p_pen1; + p_pen2 = system->reax_param.gp.l[19]; + p_pen3 = system->reax_param.gp.l[20]; + p_pen4 = system->reax_param.gp.l[21]; + + exp_pen2ij = exp( -p_pen2 * SQR( BOA_ij - 2.0 ) ); + exp_pen2jk = exp( -p_pen2 * SQR( BOA_jk - 2.0 ) ); + exp_pen3 = exp( -p_pen3 * workspace->Delta[j] ); + exp_pen4 = exp( p_pen4 * workspace->Delta[j] ); + trm_pen34 = 1.0 + exp_pen3 + exp_pen4; + f9_Dj = ( 2.0 + exp_pen3 ) / trm_pen34; + Cf9j = ( -p_pen3 * exp_pen3 * trm_pen34 - + (2.0 + exp_pen3) * ( -p_pen3 * exp_pen3 + + p_pen4 * exp_pen4 ) ) / + SQR( trm_pen34 ); + + total_Epen += e_pen = + p_pen1 * f9_Dj * exp_pen2ij * exp_pen2jk; + + CEpen1 = e_pen * Cf9j / f9_Dj; + temp = -2.0 * p_pen2 * e_pen; + CEpen2 = temp * (BOA_ij - 2.0); + CEpen3 = temp * (BOA_jk - 2.0); + /* END PENALTY ENERGY */ + + + /* COALITION ENERGY */ + p_coa1 = thbp->p_coa1; + p_coa2 = system->reax_param.gp.l[2]; + p_coa3 = system->reax_param.gp.l[38]; + p_coa4 = system->reax_param.gp.l[30]; + + exp_coa2 = exp( p_coa2 * workspace->Delta_val[j] ); + total_Ecoa += e_coa = + p_coa1 / (1. + exp_coa2) * + exp( -p_coa3 * SQR(workspace->total_bond_order[i]-BOA_ij) ) * + exp( -p_coa3 * SQR(workspace->total_bond_order[k]-BOA_jk) ) * + exp( -p_coa4 * SQR(BOA_ij - 1.5) ) * + exp( -p_coa4 * SQR(BOA_jk - 1.5) ); + + CEcoa1 = -2 * p_coa4 * (BOA_ij - 1.5) * e_coa; + CEcoa2 = -2 * p_coa4 * (BOA_jk - 1.5) * e_coa; + CEcoa3 = -p_coa2 * exp_coa2 * e_coa / (1 + exp_coa2); + CEcoa4 = -2 * p_coa3 * + (workspace->total_bond_order[i]-BOA_ij) * e_coa; + CEcoa5 = -2 * p_coa3 * + (workspace->total_bond_order[k]-BOA_jk) * e_coa; + /* END COALITION ENERGY */ + + + /* FORCES */ + bo_ij->Cdbo += (CEval1 + CEpen2 + (CEcoa1 - CEcoa4)); + bo_jk->Cdbo += (CEval2 + CEpen3 + (CEcoa2 - CEcoa5)); + workspace->CdDelta[j] += ((CEval3 + CEval7) + CEpen1 + CEcoa3); + workspace->CdDeltaReduction[reductionOffset+i] += CEcoa4; + workspace->CdDeltaReduction[reductionOffset+k] += CEcoa5; + + for (t = start_j; t < end_j; ++t) { + pbond_jt = &( bonds->select.bond_list[t] ); + bo_jt = &(pbond_jt->bo_data); + temp_bo_jt = bo_jt->BO; + temp = CUBE( temp_bo_jt ); + pBOjt7 = temp * temp * temp_bo_jt; + + bo_jt->Cdbo += (CEval6 * pBOjt7); + bo_jt->Cdbopi += CEval5; + bo_jt->Cdbopi2 += CEval5; + } + + if( control->virial == 0 ) { + rvec_ScaledAdd( workspace->f[j], CEval8, p_ijk->dcos_dj ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+i], + CEval8, p_ijk->dcos_di ); + rvec_ScaledAdd( workspace->forceReduction[reductionOffset+k], + CEval8, p_ijk->dcos_dk ); + } + else { + /* terms not related to bond order derivatives are + added directly into forces and pressure vector/tensor */ + rvec_Scale( force, CEval8, p_ijk->dcos_di ); + rvec_Add( workspace->forceReduction[reductionOffset+i], force ); + + rvec_iMultiply( ext_press, pbond_ij->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + + rvec_ScaledAdd( workspace->f[j], CEval8, p_ijk->dcos_dj ); + + rvec_Scale( force, CEval8, p_ijk->dcos_dk ); + rvec_Add( workspace->forceReduction[reductionOffset+k], force ); + + rvec_iMultiply( ext_press, pbond_jk->rel_box, force ); + rvec_Add( workspace->my_ext_pressReduction[tid], ext_press ); + } + + /* tally into per-atom virials */ + if( system->pair_ptr->vflag_atom || system->pair_ptr->evflag) { + + /* Acquire vectors */ + rvec_ScaledSum( delij, 1., system->my_atoms[i].x, + -1., system->my_atoms[j].x ); + rvec_ScaledSum( delkj, 1., system->my_atoms[k].x, + -1., system->my_atoms[j].x ); + + rvec_Scale( fi_tmp, -CEval8, p_ijk->dcos_di ); + rvec_Scale( fj_tmp, -CEval8, p_ijk->dcos_dj ); + rvec_Scale( fk_tmp, -CEval8, p_ijk->dcos_dk ); + + eng_tmp = e_ang + e_pen + e_coa; + + if( system->pair_ptr->evflag) + pair_reax_ptr->ev_tally_thr_proxy(system->pair_ptr, j, j, system->N, 1, + eng_tmp, 0.0, 0.0, 0.0, 0.0, 0.0, thr); + if( system->pair_ptr->vflag_atom) + // NEED TO MAKE AN OMP VERSION OF THIS CALL! + system->pair_ptr->v_tally3( i, j, k, fi_tmp, fk_tmp, delij, delkj); + } + + } // if(p_val1>0.001) + } // for(cnt) + } // if(j<n && BOA_jk>0) + } // for(pk) + } // if(BOA_ij>0) + + Set_End_Index(pi, my_offset, thb_intrs ); + } // for(pi) + } // for(j) + + pair_reax_ptr->reduce_thr_proxy(system->pair_ptr, system->pair_ptr->eflag_either, + system->pair_ptr->vflag_either, thr); + } // end omp parallel + + data->my_en.e_ang = total_Eang; + data->my_en.e_pen = total_Epen; + data->my_en.e_coa = total_Ecoa; + + if( num_thb_intrs >= thb_intrs->num_intrs * DANGER_ZONE ) { + workspace->realloc.num_3body = num_thb_intrs * TWICE; + if( num_thb_intrs > thb_intrs->num_intrs ) { + fprintf( stderr, "step%d-ran out of space on angle_list: top=%d, max=%d", + data->step, num_thb_intrs, thb_intrs->num_intrs ); + MPI_Abort( MPI_COMM_WORLD, INSUFFICIENT_MEMORY ); + } + } + +#ifdef OMP_TIMING + endTimeBase = MPI_Wtime(); + ompTimingData[COMPUTEVALENCEANGLESBOINDEX] += (endTimeBase-startTimeBase); +#endif +} diff --git a/src/USER-OMP/reaxc_valence_angles_omp.h b/src/USER-OMP/reaxc_valence_angles_omp.h new file mode 100644 index 0000000000000000000000000000000000000000..ce304acced78982d962263c623ad9f94365ec164 --- /dev/null +++ b/src/USER-OMP/reaxc_valence_angles_omp.h @@ -0,0 +1,37 @@ +/*---------------------------------------------------------------------- + PuReMD - Purdue ReaxFF Molecular Dynamics Program + + Copyright (2010) Purdue University + Hasan Metin Aktulga, hmaktulga@lbl.gov + Joseph Fogarty, jcfogart@mail.usf.edu + Sagar Pandit, pandit@usf.edu + Ananth Y Grama, ayg@cs.purdue.edu + + Please cite the related publication: + H. M. Aktulga, J. C. Fogarty, S. A. Pandit, A. Y. Grama, + "Parallel Reactive Molecular Dynamics: Numerical Methods and + Algorithmic Techniques", Parallel Computing, in press. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details: + <http://www.gnu.org/licenses/>. + ----------------------------------------------------------------------*/ + +#ifndef __VALENCE_ANGLES_OMP_H_ +#define __VALENCE_ANGLES_OMP_H_ + +#include "reaxc_types.h" + +void Valence_AnglesOMP( reax_system*, control_params*, simulation_data*, + storage*, reax_list**, output_controls* ); + +void Calculate_dCos_ThetaOMP( rvec, double, rvec, double, rvec*, rvec*, rvec* ); + +#endif diff --git a/src/USER-REAXC/compute_spec_atom.cpp b/src/USER-REAXC/compute_spec_atom.cpp index 164ce87205a15854d0a2fe1079d41d3f6af9818e..e913225492a08bcf545f4ed43ba9861a33478022 100644 --- a/src/USER-REAXC/compute_spec_atom.cpp +++ b/src/USER-REAXC/compute_spec_atom.cpp @@ -43,9 +43,7 @@ ComputeSpecAtom::ComputeSpecAtom(LAMMPS *lmp, int narg, char **arg) : else size_peratom_cols = nvalues; // Initiate reaxc - reaxc = (PairReaxC *) force->pair_match("reax/c",1); - if (reaxc == NULL) - reaxc = (PairReaxC *) force->pair_match("reax/c/kk",1); + reaxc = (PairReaxC *) force->pair_match("reax/c",0); pack_choice = new FnPtrPack[nvalues]; diff --git a/src/USER-REAXC/fix_qeq_reax.cpp b/src/USER-REAXC/fix_qeq_reax.cpp index 96df03c668a260ce63b87221bd63bac42b88a12c..9d165f3fd3bbb7903cc1adaa2a592715d4d828f0 100644 --- a/src/USER-REAXC/fix_qeq_reax.cpp +++ b/src/USER-REAXC/fix_qeq_reax.cpp @@ -64,11 +64,11 @@ static const char cite_fix_qeq_reax[] = /* ---------------------------------------------------------------------- */ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : - Fix(lmp, narg, arg) + Fix(lmp, narg, arg), pertype_option(NULL) { if (lmp->citeme) lmp->citeme->add(cite_fix_qeq_reax); - if (narg != 8) error->all(FLERR,"Illegal fix qeq/reax command"); + if (narg<8 || narg>9) error->all(FLERR,"Illegal fix qeq/reax command"); nevery = force->inumeric(FLERR,arg[3]); if (nevery <= 0) error->all(FLERR,"Illegal fix qeq/reax command"); @@ -76,8 +76,17 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : swa = force->numeric(FLERR,arg[4]); swb = force->numeric(FLERR,arg[5]); tolerance = force->numeric(FLERR,arg[6]); - pertype_parameters(arg[7]); - + int len = strlen(arg[7]) + 1; + pertype_option = new char[len]; + strcpy(pertype_option,arg[7]); + + // dual CG support only available for USER-OMP variant + // check for compatibility is in Fix::post_constructor() + dual_enabled = 0; + if (narg == 9) { + if (strcmp(arg[8],"dual") == 0) dual_enabled = 1; + else error->all(FLERR,"Illegal fix qeq/reax command"); + } shld = NULL; n = n_cap = 0; @@ -106,31 +115,37 @@ FixQEqReax::FixQEqReax(LAMMPS *lmp, int narg, char **arg) : H.jlist = NULL; H.val = NULL; - comm_forward = comm_reverse = 1; + // dual CG support + // Update comm sizes for this fix + if (dual_enabled) comm_forward = comm_reverse = 2; + else comm_forward = comm_reverse = 1; // perform initial allocation of atom-based arrays // register with Atom class - s_hist = t_hist = NULL; - grow_arrays(atom->nmax); - atom->add_callback(0); - for( int i = 0; i < atom->nmax; i++ ) - for (int j = 0; j < nprev; ++j ) - s_hist[i][j] = t_hist[i][j] = 0; - reaxc = NULL; - reaxc = (PairReaxC *) force->pair_match("reax/c",1); + reaxc = (PairReaxC *) force->pair_match("reax/c",0); + if (reaxc) { + s_hist = t_hist = NULL; + grow_arrays(atom->nmax); + atom->add_callback(0); + for (int i = 0; i < atom->nmax; i++) + for (int j = 0; j < nprev; ++j) + s_hist[i][j] = t_hist[i][j] = 0; + } } /* ---------------------------------------------------------------------- */ FixQEqReax::~FixQEqReax() { - // unregister callbacks to this fix from Atom class - if (copymode) return; + delete[] pertype_option; + + // unregister callbacks to this fix from Atom class + atom->delete_callback(id,0); memory->destroy(s_hist); @@ -150,6 +165,15 @@ FixQEqReax::~FixQEqReax() /* ---------------------------------------------------------------------- */ +void FixQEqReax::post_constructor() +{ + pertype_parameters(pertype_option); + if (dual_enabled) + error->all(FLERR,"Dual keyword only supported with fix qeq/reax/omp"); +} + +/* ---------------------------------------------------------------------- */ + int FixQEqReax::setmask() { int mask = 0; @@ -165,11 +189,9 @@ void FixQEqReax::pertype_parameters(char *arg) { if (strcmp(arg,"reax/c") == 0) { reaxflag = 1; - Pair *pair = force->pair_match("reax/c",1); - if (pair == NULL) - pair = force->pair_match("reax/c/kk",1); - + Pair *pair = force->pair_match("reax/c",0); if (pair == NULL) error->all(FLERR,"No pair reax/c for fix qeq/reax"); + int tmp; chi = (double *) pair->extract("chi",tmp); eta = (double *) pair->extract("eta",tmp); @@ -227,10 +249,14 @@ void FixQEqReax::allocate_storage() memory->create(b_prc,nmax,"qeq:b_prc"); memory->create(b_prm,nmax,"qeq:b_prm"); - memory->create(p,nmax,"qeq:p"); - memory->create(q,nmax,"qeq:q"); - memory->create(r,nmax,"qeq:r"); - memory->create(d,nmax,"qeq:d"); + // dual CG support + int size = nmax; + if (dual_enabled) size*= 2; + + memory->create(p,size,"qeq:p"); + memory->create(q,size,"qeq:q"); + memory->create(r,size,"qeq:r"); + memory->create(d,size,"qeq:d"); } /* ---------------------------------------------------------------------- */ @@ -271,7 +297,7 @@ void FixQEqReax::allocate_matrix() int mincap; double safezone; - if( reaxflag ) { + if (reaxflag) { mincap = reaxc->system->mincap; safezone = reaxc->system->safezone; } else { @@ -280,7 +306,7 @@ void FixQEqReax::allocate_matrix() } n = atom->nlocal; - n_cap = MAX( (int)(n * safezone), mincap ); + n_cap = MAX( (int)(n * safezone), mincap); // determine the total space for the H matrix @@ -295,11 +321,11 @@ void FixQEqReax::allocate_matrix() } m = 0; - for( ii = 0; ii < inum; ii++ ) { + for (ii = 0; ii < inum; ii++) { i = ilist[ii]; m += numneigh[i]; } - m_cap = MAX( (int)(m * safezone), mincap * MIN_NBRS ); + m_cap = MAX( (int)(m * safezone), mincap * MIN_NBRS); H.n = n_cap; H.m = m_cap; @@ -331,18 +357,12 @@ void FixQEqReax::reallocate_matrix() void FixQEqReax::init() { - if (!atom->q_flag) error->all(FLERR,"Fix qeq/reax requires atom attribute q"); + if (!atom->q_flag) + error->all(FLERR,"Fix qeq/reax requires atom attribute q"); ngroup = group->count(igroup); if (ngroup == 0) error->all(FLERR,"Fix qeq/reax group has no atoms"); - /* - if (reaxc) - if (ngroup != reaxc->ngroup) - error->all(FLERR,"Fix qeq/reax group and pair reax/c have " - "different numbers of atoms"); - */ - // need a half neighbor list w/ Newton off and ghost neighbors // built whenever re-neighboring occurs @@ -377,9 +397,9 @@ void FixQEqReax::init_shielding() if (shld == NULL) memory->create(shld,ntypes+1,ntypes+1,"qeq:shielding"); - for( i = 1; i <= ntypes; ++i ) - for( j = 1; j <= ntypes; ++j ) - shld[i][j] = pow( gamma[i] * gamma[j], -1.5 ); + for (i = 1; i <= ntypes; ++i) + for (j = 1; j <= ntypes; ++j) + shld[i][j] = pow( gamma[i] * gamma[j], -1.5); } /* ---------------------------------------------------------------------- */ @@ -395,21 +415,21 @@ void FixQEqReax::init_taper() else if (swb < 5 && comm->me == 0) error->warning(FLERR,"Fix qeq/reax has very low Taper radius cutoff"); - d7 = pow( swb - swa, 7 ); - swa2 = SQR( swa ); - swa3 = CUBE( swa ); - swb2 = SQR( swb ); - swb3 = CUBE( swb ); + d7 = pow( swb - swa, 7); + swa2 = SQR( swa); + swa3 = CUBE( swa); + swb2 = SQR( swb); + swb3 = CUBE( swb); Tap[7] = 20.0 / d7; Tap[6] = -70.0 * (swa + swb) / d7; Tap[5] = 84.0 * (swa2 + 3.0*swa*swb + swb2) / d7; - Tap[4] = -35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3 ) / d7; - Tap[3] = 140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3 ) / d7; + Tap[4] = -35.0 * (swa3 + 9.0*swa2*swb + 9.0*swa*swb2 + swb3) / d7; + Tap[3] = 140.0 * (swa3*swb + 3.0*swa2*swb2 + swa*swb3) / d7; Tap[2] =-210.0 * (swa3*swb2 + swa2*swb3) / d7; Tap[1] = 140.0 * swa3 * swb3 / d7; Tap[0] = (-35.0*swa3*swb2*swb2 + 21.0*swa2*swb3*swb2 + - 7.0*swa*swb3*swb3 + swb3*swb3*swb ) / d7; + 7.0*swa*swb3*swb3 + swb3*swb3*swb) / d7; } /* ---------------------------------------------------------------------- */ @@ -437,6 +457,13 @@ void FixQEqReax::setup_pre_force_respa(int vflag, int ilevel) /* ---------------------------------------------------------------------- */ +void FixQEqReax::min_setup_pre_force(int vflag) +{ + setup_pre_force(vflag); +} + +/* ---------------------------------------------------------------------- */ + void FixQEqReax::init_storage() { int NN; @@ -446,7 +473,7 @@ void FixQEqReax::init_storage() else NN = list->inum + list->gnum; - for( int i = 0; i < NN; i++ ) { + for (int i = 0; i < NN; i++) { Hdia_inv[i] = 1. / eta[atom->type[i]]; b_s[i] = -chi[atom->type[i]]; b_t[i] = -1.0; @@ -463,7 +490,7 @@ void FixQEqReax::pre_force(int vflag) double t_start, t_end; if (update->ntimestep % nevery) return; - if( comm->me == 0 ) t_start = MPI_Wtime(); + if (comm->me == 0) t_start = MPI_Wtime(); n = atom->nlocal; N = atom->nlocal + atom->nghost; @@ -471,16 +498,19 @@ void FixQEqReax::pre_force(int vflag) // grow arrays if necessary // need to be atom->nmax in length - if( atom->nmax > nmax ) reallocate_storage(); - if( n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE ) + if (atom->nmax > nmax) reallocate_storage(); + if (n > n_cap*DANGER_ZONE || m_fill > m_cap*DANGER_ZONE) reallocate_matrix(); init_matvec(); - matvecs = CG(b_s, s); // CG on s - parallel - matvecs += CG(b_t, t); // CG on t - parallel + + matvecs_s = CG(b_s, s); // CG on s - parallel + matvecs_t = CG(b_t, t); // CG on t - parallel + matvecs = matvecs_s + matvecs_t; + calculate_Q(); - if( comm->me == 0 ) { + if (comm->me == 0) { t_end = MPI_Wtime(); qeq_time = t_end - t_start; } @@ -518,7 +548,7 @@ void FixQEqReax::init_matvec() ilist = list->ilist; } - for( ii = 0; ii < nn; ++ii ) { + for (ii = 0; ii < nn; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) { @@ -533,7 +563,7 @@ void FixQEqReax::init_matvec() /* quadratic extrapolation for s & t from previous solutions */ //s[i] = s_hist[i][2] + 3 * ( s_hist[i][0] - s_hist[i][1] ); - t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1] ); + t[i] = t_hist[i][2] + 3 * ( t_hist[i][0] - t_hist[i][1]); /* cubic extrapolation for s & t from previous solutions */ s[i] = 4*(s_hist[i][0]+s_hist[i][2])-(6*s_hist[i][1]+s_hist[i][3]); @@ -553,12 +583,12 @@ void FixQEqReax::compute_H() { int inum, jnum, *ilist, *jlist, *numneigh, **firstneigh; int i, j, ii, jj, flag; - double **x, SMALL = 0.0001; double dx, dy, dz, r_sqr; + const double SMALL = 0.0001; int *type = atom->type; tagint *tag = atom->tag; - x = atom->x; + double **x = atom->x; int *mask = atom->mask; if (reaxc) { @@ -576,14 +606,14 @@ void FixQEqReax::compute_H() // fill in the H matrix m_fill = 0; r_sqr = 0; - for( ii = 0; ii < inum; ii++ ) { + for (ii = 0; ii < inum; ii++) { i = ilist[ii]; if (mask[i] & groupbit) { jlist = firstneigh[i]; jnum = numneigh[i]; H.firstnbr[i] = m_fill; - for( jj = 0; jj < jnum; jj++ ) { + for (jj = 0; jj < jnum; jj++) { j = jlist[jj]; dx = x[j][0] - x[i][0]; @@ -605,9 +635,9 @@ void FixQEqReax::compute_H() } } - if( flag ) { + if (flag) { H.jlist[m_fill] = j; - H.val[m_fill] = calculate_H( sqrt(r_sqr), shld[type[i]][type[j]] ); + H.val[m_fill] = calculate_H( sqrt(r_sqr), shld[type[i]][type[j]]); m_fill++; } } @@ -618,7 +648,7 @@ void FixQEqReax::compute_H() if (m_fill >= H.m) { char str[128]; sprintf(str,"H matrix size has been exceeded: m_fill=%d H.m=%d\n", - m_fill, H.m ); + m_fill, H.m); error->warning(FLERR,str); error->all(FLERR,"Fix qeq/reax has insufficient QEq matrix size"); } @@ -626,7 +656,7 @@ void FixQEqReax::compute_H() /* ---------------------------------------------------------------------- */ -double FixQEqReax::calculate_H( double r, double gamma ) +double FixQEqReax::calculate_H( double r, double gamma) { double Taper, denom; @@ -646,7 +676,7 @@ double FixQEqReax::calculate_H( double r, double gamma ) /* ---------------------------------------------------------------------- */ -int FixQEqReax::CG( double *b, double *x ) +int FixQEqReax::CG( double *b, double *x) { int i, j, imax; double tmp, alpha, beta, b_norm; @@ -665,21 +695,21 @@ int FixQEqReax::CG( double *b, double *x ) imax = 200; pack_flag = 1; - sparse_matvec( &H, x, q ); - comm->reverse_comm_fix( this ); //Coll_Vector( q ); + sparse_matvec( &H, x, q); + comm->reverse_comm_fix(this); //Coll_Vector( q ); - vector_sum( r , 1., b, -1., q, nn ); + vector_sum( r , 1., b, -1., q, nn); - for( jj = 0; jj < nn; ++jj ) { + for (jj = 0; jj < nn; ++jj) { j = ilist[jj]; if (atom->mask[j] & groupbit) d[j] = r[j] * Hdia_inv[j]; //pre-condition } - b_norm = parallel_norm( b, nn ); + b_norm = parallel_norm( b, nn); sig_new = parallel_dot( r, d, nn); - for( i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i ) { + for (i = 1; i < imax && sqrt(sig_new) / b_norm > tolerance; ++i) { comm->forward_comm_fix(this); //Dist_vector( d ); sparse_matvec( &H, d, q ); comm->reverse_comm_fix(this); //Coll_vector( q ); @@ -691,7 +721,7 @@ int FixQEqReax::CG( double *b, double *x ) vector_add( r, -alpha, q, nn ); // pre-conditioning - for( jj = 0; jj < nn; ++jj ) { + for (jj = 0; jj < nn; ++jj) { j = ilist[jj]; if (atom->mask[j] & groupbit) p[j] = r[j] * Hdia_inv[j]; @@ -702,7 +732,6 @@ int FixQEqReax::CG( double *b, double *x ) beta = sig_new / sig_old; vector_sum( d, 1., p, beta, d, nn ); - } if (i >= imax && comm->me == 0) { @@ -718,7 +747,7 @@ int FixQEqReax::CG( double *b, double *x ) /* ---------------------------------------------------------------------- */ -void FixQEqReax::sparse_matvec( sparse_matrix *A, double *x, double *b ) +void FixQEqReax::sparse_matvec( sparse_matrix *A, double *x, double *b) { int i, j, itr_j; int nn, NN, ii; @@ -734,22 +763,22 @@ void FixQEqReax::sparse_matvec( sparse_matrix *A, double *x, double *b ) ilist = list->ilist; } - for( ii = 0; ii < nn; ++ii ) { + for (ii = 0; ii < nn; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) b[i] = eta[ atom->type[i] ] * x[i]; } - for( ii = nn; ii < NN; ++ii ) { + for (ii = nn; ii < NN; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) b[i] = 0; } - for( ii = 0; ii < nn; ++ii ) { + for (ii = 0; ii < nn; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) { - for( itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) { + for (itr_j=A->firstnbr[i]; itr_j<A->firstnbr[i]+A->numnbrs[i]; itr_j++) { j = A->jlist[itr_j]; b[i] += A->val[itr_j] * x[j]; b[j] += A->val[itr_j] * x[i]; @@ -778,17 +807,17 @@ void FixQEqReax::calculate_Q() ilist = list->ilist; } - s_sum = parallel_vector_acc( s, nn ); + s_sum = parallel_vector_acc( s, nn); t_sum = parallel_vector_acc( t, nn); u = s_sum / t_sum; - for( ii = 0; ii < nn; ++ii ) { + for (ii = 0; ii < nn; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) { q[i] = s[i] - u * t[i]; /* backup s & t */ - for( k = 4; k > 0; --k ) { + for (k = 4; k > 0; --k) { s_hist[i][k] = s_hist[i][k-1]; t_hist[i][k] = t_hist[i][k-1]; } @@ -798,7 +827,7 @@ void FixQEqReax::calculate_Q() } pack_flag = 4; - comm->forward_comm_fix( this ); //Dist_vector( atom->q ); + comm->forward_comm_fix(this); //Dist_vector( atom->q ); } /* ---------------------------------------------------------------------- */ @@ -808,15 +837,23 @@ int FixQEqReax::pack_forward_comm(int n, int *list, double *buf, { int m; - if( pack_flag == 1) + if (pack_flag == 1) for(m = 0; m < n; m++) buf[m] = d[list[m]]; - else if( pack_flag == 2 ) + else if (pack_flag == 2) for(m = 0; m < n; m++) buf[m] = s[list[m]]; - else if( pack_flag == 3 ) + else if (pack_flag == 3) for(m = 0; m < n; m++) buf[m] = t[list[m]]; - else if( pack_flag == 4 ) + else if (pack_flag == 4) for(m = 0; m < n; m++) buf[m] = atom->q[list[m]]; - + else if (pack_flag == 5) { + m = 0; + for(int i = 0; i < n; i++) { + int j = 2 * list[i]; + buf[m++] = d[j ]; + buf[m++] = d[j+1]; + } + return m; + } return n; } @@ -826,14 +863,23 @@ void FixQEqReax::unpack_forward_comm(int n, int first, double *buf) { int i, m; - if( pack_flag == 1) + if (pack_flag == 1) for(m = 0, i = first; m < n; m++, i++) d[i] = buf[m]; - else if( pack_flag == 2) + else if (pack_flag == 2) for(m = 0, i = first; m < n; m++, i++) s[i] = buf[m]; - else if( pack_flag == 3) + else if (pack_flag == 3) for(m = 0, i = first; m < n; m++, i++) t[i] = buf[m]; - else if( pack_flag == 4) + else if (pack_flag == 4) for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m]; + else if (pack_flag == 5) { + int last = first + n; + m = 0; + for(i = first; i < last; i++) { + int j = 2 * i; + d[j ] = buf[m++]; + d[j+1] = buf[m++]; + } + } } /* ---------------------------------------------------------------------- */ @@ -841,15 +887,35 @@ void FixQEqReax::unpack_forward_comm(int n, int first, double *buf) int FixQEqReax::pack_reverse_comm(int n, int first, double *buf) { int i, m; - for(m = 0, i = first; m < n; m++, i++) buf[m] = q[i]; - return n; + if (pack_flag == 5) { + m = 0; + int last = first + n; + for(i = first; i < last; i++) { + int indxI = 2 * i; + buf[m++] = q[indxI ]; + buf[m++] = q[indxI+1]; + } + return m; + } else { + for (m = 0, i = first; m < n; m++, i++) buf[m] = q[i]; + return n; + } } /* ---------------------------------------------------------------------- */ void FixQEqReax::unpack_reverse_comm(int n, int *list, double *buf) { - for(int m = 0; m < n; m++) q[list[m]] += buf[m]; + if (pack_flag == 5) { + int m = 0; + for(int i = 0; i < n; i++) { + int indxI = 2 * list[i]; + q[indxI ] += buf[m++]; + q[indxI+1] += buf[m++]; + } + } else { + for (int m = 0; m < n; m++) q[list[m]] += buf[m]; + } } /* ---------------------------------------------------------------------- @@ -866,6 +932,9 @@ double FixQEqReax::memory_usage() bytes += m_cap * sizeof(int); bytes += m_cap * sizeof(double); + if (dual_enabled) + bytes += atom->nmax*4 * sizeof(double); // double size for q, d, r, and p + return bytes; } @@ -915,7 +984,7 @@ int FixQEqReax::unpack_exchange(int nlocal, double *buf) /* ---------------------------------------------------------------------- */ -double FixQEqReax::parallel_norm( double *v, int n ) +double FixQEqReax::parallel_norm( double *v, int n) { int i; double my_sum, norm_sqr; @@ -930,15 +999,15 @@ double FixQEqReax::parallel_norm( double *v, int n ) my_sum = 0.0; norm_sqr = 0.0; - for( ii = 0; ii < n; ++ii ) { + for (ii = 0; ii < n; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) - my_sum += SQR( v[i] ); + my_sum += SQR( v[i]); } - MPI_Allreduce( &my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world ); + MPI_Allreduce( &my_sum, &norm_sqr, 1, MPI_DOUBLE, MPI_SUM, world); - return sqrt( norm_sqr ); + return sqrt( norm_sqr); } /* ---------------------------------------------------------------------- */ @@ -958,20 +1027,20 @@ double FixQEqReax::parallel_dot( double *v1, double *v2, int n) my_dot = 0.0; res = 0.0; - for( ii = 0; ii < n; ++ii ) { + for (ii = 0; ii < n; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) my_dot += v1[i] * v2[i]; } - MPI_Allreduce( &my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, world ); + MPI_Allreduce( &my_dot, &res, 1, MPI_DOUBLE, MPI_SUM, world); return res; } /* ---------------------------------------------------------------------- */ -double FixQEqReax::parallel_vector_acc( double *v, int n ) +double FixQEqReax::parallel_vector_acc( double *v, int n) { int i; double my_acc, res; @@ -986,13 +1055,13 @@ double FixQEqReax::parallel_vector_acc( double *v, int n ) my_acc = 0.0; res = 0.0; - for( ii = 0; ii < n; ++ii ) { + for (ii = 0; ii < n; ++ii) { i = ilist[ii]; if (atom->mask[i] & groupbit) my_acc += v[i]; } - MPI_Allreduce( &my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, world ); + MPI_Allreduce( &my_acc, &res, 1, MPI_DOUBLE, MPI_SUM, world); return res; } @@ -1000,7 +1069,7 @@ double FixQEqReax::parallel_vector_acc( double *v, int n ) /* ---------------------------------------------------------------------- */ void FixQEqReax::vector_sum( double* dest, double c, double* v, - double d, double* y, int k ) + double d, double* y, int k) { int kk; int *ilist; @@ -1010,7 +1079,7 @@ void FixQEqReax::vector_sum( double* dest, double c, double* v, else ilist = list->ilist; - for( --k; k>=0; --k ) { + for (--k; k>=0; --k) { kk = ilist[k]; if (atom->mask[kk] & groupbit) dest[kk] = c * v[kk] + d * y[kk]; @@ -1019,7 +1088,7 @@ void FixQEqReax::vector_sum( double* dest, double c, double* v, /* ---------------------------------------------------------------------- */ -void FixQEqReax::vector_add( double* dest, double c, double* v, int k ) +void FixQEqReax::vector_add( double* dest, double c, double* v, int k) { int kk; int *ilist; @@ -1029,10 +1098,9 @@ void FixQEqReax::vector_add( double* dest, double c, double* v, int k ) else ilist = list->ilist; - for( --k; k>=0; --k ) { + for (--k; k>=0; --k) { kk = ilist[k]; if (atom->mask[kk] & groupbit) dest[kk] += c * v[kk]; } - } diff --git a/src/USER-REAXC/fix_qeq_reax.h b/src/USER-REAXC/fix_qeq_reax.h index 7c3e8a8f96180e6354c0ac92bcbe14aaf696e431..19efcd2b03417a8a9ddc052c3f527939743a4a9b 100644 --- a/src/USER-REAXC/fix_qeq_reax.h +++ b/src/USER-REAXC/fix_qeq_reax.h @@ -39,15 +39,17 @@ class FixQEqReax : public Fix { FixQEqReax(class LAMMPS *, int, char **); ~FixQEqReax(); int setmask(); - void init(); + virtual void post_constructor(); + virtual void init(); void init_list(int,class NeighList *); - void init_storage(); + virtual void init_storage(); void setup_pre_force(int); - void pre_force(int); + virtual void pre_force(int); void setup_pre_force_respa(int, int); void pre_force_respa(int, int, int); + void min_setup_pre_force(int); void min_pre_force(int); int matvecs; @@ -99,25 +101,26 @@ class FixQEqReax : public Fix { //double **h; //double *hc, *hs; - void pertype_parameters(char*); + char *pertype_option; // argument to determine how per-type info is obtained + virtual void pertype_parameters(char*); void init_shielding(); void init_taper(); - void allocate_storage(); - void deallocate_storage(); + virtual void allocate_storage(); + virtual void deallocate_storage(); void reallocate_storage(); - void allocate_matrix(); + virtual void allocate_matrix(); void deallocate_matrix(); void reallocate_matrix(); - void init_matvec(); + virtual void init_matvec(); void init_H(); - void compute_H(); + virtual void compute_H(); double calculate_H(double,double); - void calculate_Q(); + virtual void calculate_Q(); - int CG(double*,double*); + virtual int CG(double*,double*); //int GMRES(double*,double*); - void sparse_matvec(sparse_matrix*,double*,double*); + virtual void sparse_matvec(sparse_matrix*,double*,double*); int pack_forward_comm(int, int *, double *, int, int *); void unpack_forward_comm(int, int, double *); @@ -129,12 +132,16 @@ class FixQEqReax : public Fix { int pack_exchange(int, double *); int unpack_exchange(int, double *); - double parallel_norm( double*, int ); - double parallel_dot( double*, double*, int ); - double parallel_vector_acc( double*, int ); + virtual double parallel_norm( double*, int ); + virtual double parallel_dot( double*, double*, int ); + virtual double parallel_vector_acc( double*, int ); - void vector_sum(double*,double,double*,double,double*,int); - void vector_add(double*, double, double*,int); + virtual void vector_sum(double*,double,double*,double,double*,int); + virtual void vector_add(double*, double, double*,int); + + // dual CG support + int dual_enabled; // 0: Original, separate s & t optimization; 1: dual optimization + int matvecs_s, matvecs_t; // Iteration count for each system }; } diff --git a/src/USER-REAXC/fix_reaxc.h b/src/USER-REAXC/fix_reaxc.h index e51a94e4a9512e9345f26e3bfa8bc32236197a41..0e173f5ecefdf221cbfe7038222e370a2db057f8 100644 --- a/src/USER-REAXC/fix_reaxc.h +++ b/src/USER-REAXC/fix_reaxc.h @@ -36,6 +36,7 @@ namespace LAMMPS_NS { class FixReaxC : public Fix { friend class PairReaxC; + friend class PairReaxCOMP; public: FixReaxC(class LAMMPS *,int, char **); diff --git a/src/USER-REAXC/fix_reaxc_bonds.cpp b/src/USER-REAXC/fix_reaxc_bonds.cpp index fe830b508e432c1f4b74228ad1757059eab083f3..09ca132635e83fde5bd95608a37db3fa71838d86 100644 --- a/src/USER-REAXC/fix_reaxc_bonds.cpp +++ b/src/USER-REAXC/fix_reaxc_bonds.cpp @@ -121,12 +121,9 @@ void FixReaxCBonds::setup(int vflag) void FixReaxCBonds::init() { - reaxc = (PairReaxC *) force->pair_match("reax/c",1); - if (reaxc == NULL) - reaxc = (PairReaxC *) force->pair_match("reax/c/kk",1); - + reaxc = (PairReaxC *) force->pair_match("reax/c",0); if (reaxc == NULL) error->all(FLERR,"Cannot use fix reax/c/bonds without " - "pair_style reax/c"); + "pair_style reax/c, reax/c/kk, or reax/c/omp"); } diff --git a/src/USER-REAXC/fix_reaxc_species.cpp b/src/USER-REAXC/fix_reaxc_species.cpp index 4e57dd1c4bdcf469db81500f9b4f53368c7a7777..df28a34fe884bbe28a02f5c6058cccf53e521fbf 100644 --- a/src/USER-REAXC/fix_reaxc_species.cpp +++ b/src/USER-REAXC/fix_reaxc_species.cpp @@ -13,7 +13,7 @@ /* ---------------------------------------------------------------------- Contributing authors: Ray Shan (Sandia, tnshan@sandia.gov) - Oleg Sergeev (VNIIA, sergeev@vniia.ru) + Oleg Sergeev (VNIIA, sergeev@vniia.ru) ------------------------------------------------------------------------- */ #include <stdlib.h> @@ -68,7 +68,7 @@ FixReaxCSpecies::FixReaxCSpecies(LAMMPS *lmp, int narg, char **arg) : nrepeat = atoi(arg[4]); global_freq = nfreq = atoi(arg[5]); - comm_forward = 5; + comm_forward = 4; if (nevery <= 0 || nrepeat <= 0 || nfreq <= 0) error->all(FLERR,"Illegal fix reax/c/species command"); @@ -133,12 +133,10 @@ FixReaxCSpecies::FixReaxCSpecies(LAMMPS *lmp, int narg, char **arg) : } x0 = NULL; - PBCconnected = NULL; clusterID = NULL; int ntmp = 1; memory->create(x0,ntmp,"reax/c/species:x0"); - memory->create(PBCconnected,ntmp,"reax/c/species:PBCconnected"); memory->create(clusterID,ntmp,"reax/c/species:clusterID"); vector_atom = clusterID; @@ -182,35 +180,35 @@ FixReaxCSpecies::FixReaxCSpecies(LAMMPS *lmp, int narg, char **arg) : jtype = atoi(arg[iarg+2]); bo_cut = atof(arg[iarg+3]); if (itype > ntypes || jtype > ntypes) - error->all(FLERR,"Illegal fix reax/c/species command"); + error->all(FLERR,"Illegal fix reax/c/species command"); if (itype <= 0 || jtype <= 0) - error->all(FLERR,"Illegal fix reax/c/species command"); + error->all(FLERR,"Illegal fix reax/c/species command"); if (bo_cut > 1.0 || bo_cut < 0.0) - error->all(FLERR,"Illegal fix reax/c/species command"); + error->all(FLERR,"Illegal fix reax/c/species command"); BOCut[itype][jtype] = bo_cut; BOCut[jtype][itype] = bo_cut; iarg += 4; - // modify element type names + // modify element type names } else if (strcmp(arg[iarg],"element") == 0) { if (iarg+ntypes+1 > narg) error->all(FLERR,"Illegal fix reax/c/species command"); eletype = (char**) malloc(ntypes*sizeof(char*)); for (int i = 0; i < ntypes; i ++) { eletype[i] = (char*) malloc(2*sizeof(char)); - strcpy(eletype[i],arg[iarg+1+i]); + strcpy(eletype[i],arg[iarg+1+i]); } eleflag = 1; iarg += ntypes + 1; - // position of molecules + // position of molecules } else if (strcmp(arg[iarg],"position") == 0) { if (iarg+3 > narg) error->all(FLERR,"Illegal fix reax/c/species command"); posflag = 1; posfreq = atoi(arg[iarg+1]); if (posfreq < nfreq || (posfreq%nfreq != 0)) - error->all(FLERR,"Illegal fix reax/c/species command"); + error->all(FLERR,"Illegal fix reax/c/species command"); filepos = new char[255]; strcpy(filepos,arg[iarg+2]); @@ -221,8 +219,8 @@ FixReaxCSpecies::FixReaxCSpecies(LAMMPS *lmp, int narg, char **arg) : pos = fopen(filepos, "w"); if (pos == NULL) error->one(FLERR,"Cannot open fix reax/c/species position file"); } - singlepos_opened = 1; - multipos = 0; + singlepos_opened = 1; + multipos = 0; } iarg += 3; } else error->all(FLERR,"Illegal fix reax/c/species command"); @@ -251,7 +249,6 @@ FixReaxCSpecies::~FixReaxCSpecies() memory->destroy(ele); memory->destroy(BOCut); memory->destroy(clusterID); - memory->destroy(PBCconnected); memory->destroy(x0); memory->destroy(nd); @@ -298,12 +295,9 @@ void FixReaxCSpecies::init() if (atom->tag_enable == 0) error->all(FLERR,"Cannot use fix reax/c/species unless atoms have IDs"); - reaxc = (PairReaxC *) force->pair_match("reax/c",1); - if (reaxc == NULL) - reaxc = (PairReaxC *) force->pair_match("reax/c/kk",1); - + reaxc = (PairReaxC *) force->pair_match("reax/c",0); if (reaxc == NULL) error->all(FLERR,"Cannot use fix reax/c/species without " - "pair_style reax/c"); + "pair_style reax/c, reax/c/kk, or reax/c/omp"); reaxc->fixspecies_flag = 1; @@ -392,14 +386,14 @@ void FixReaxCSpecies::create_fix() args[3] = tmparg[0]; args[4] = tmparg[1]; args[5] = tmparg[2]; - args[6] = (char *) "c_SPECATOM[1]"; // q, array_atoms[i][0] - args[7] = (char *) "c_SPECATOM[2]"; // x, 1 - args[8] = (char *) "c_SPECATOM[3]"; // y, 2 - args[9] = (char *) "c_SPECATOM[4]"; // z, 3 - args[10] = (char *) "c_SPECATOM[5]"; // vx, 4 - args[11] = (char *) "c_SPECATOM[6]"; // vy, 5 - args[12] = (char *) "c_SPECATOM[7]"; // vz, 6 - args[13] = (char *) "c_SPECATOM[8]"; // abo01, 7 + args[6] = (char *) "c_SPECATOM[1]"; // q, array_atoms[i][0] + args[7] = (char *) "c_SPECATOM[2]"; // x, 1 + args[8] = (char *) "c_SPECATOM[3]"; // y, 2 + args[9] = (char *) "c_SPECATOM[4]"; // z, 3 + args[10] = (char *) "c_SPECATOM[5]"; // vx, 4 + args[11] = (char *) "c_SPECATOM[6]"; // vy, 5 + args[12] = (char *) "c_SPECATOM[7]"; // vz, 6 + args[13] = (char *) "c_SPECATOM[8]"; // abo01, 7 args[14] = (char *) "c_SPECATOM[9]"; args[15] = (char *) "c_SPECATOM[10]"; args[16] = (char *) "c_SPECATOM[11]"; @@ -461,16 +455,13 @@ void FixReaxCSpecies::Output_ReaxC_Bonds(bigint ntimestep, FILE *fp) if (atom->nmax > nmax) { nmax = atom->nmax; memory->destroy(x0); - memory->destroy(PBCconnected); memory->destroy(clusterID); memory->create(x0,nmax,"reax/c/species:x0"); - memory->create(PBCconnected,nmax,"reax/c/species:PBCconnected"); memory->create(clusterID,nmax,"reax/c/species:clusterID"); vector_atom = clusterID; } for (int i = 0; i < nmax; i++) { - PBCconnected[i] = 0; x0[i].x = x0[i].y = x0[i].z = 0.0; } @@ -523,6 +514,8 @@ void FixReaxCSpecies::FindMolecule () int *ilist; double bo_tmp,bo_cut; double **spec_atom = f_SPECBOND->array_atom; + const double * const * const x = atom->x; + const int nlocal = atom->nlocal; inum = reaxc->list->inum; ilist = reaxc->list->ilist; @@ -548,35 +541,32 @@ void FixReaxCSpecies::FindMolecule () done = 1; for (ii = 0; ii < inum; ii++) { - i = ilist[ii]; - if (!(mask[i] & groupbit)) continue; + i = ilist[ii]; + if (!(mask[i] & groupbit)) continue; - itype = atom->type[i]; + itype = atom->type[i]; for (jj = 0; jj < MAXSPECBOND; jj++) { - j = reaxc->tmpid[i][jj]; + j = reaxc->tmpid[i][jj]; - if (j < i) continue; - if (!(mask[j] & groupbit)) continue; + if ((j == 0) || (j < i)) continue; + if (!(mask[j] & groupbit)) continue; - if (clusterID[i] == clusterID[j] && PBCconnected[i] == PBCconnected[j] - && x0[i].x == x0[j].x && x0[i].y == x0[j].y && x0[i].z == x0[j].z) continue; + if (clusterID[i] == clusterID[j] + && x0[i].x == x0[j].x + && x0[i].y == x0[j].y + && x0[i].z == x0[j].z) continue; jtype = atom->type[j]; - bo_cut = BOCut[itype][jtype]; - bo_tmp = spec_atom[i][jj+7]; + bo_cut = BOCut[itype][jtype]; + bo_tmp = spec_atom[i][jj+7]; - if (bo_tmp > bo_cut) { + if (bo_tmp > bo_cut) { clusterID[i] = clusterID[j] = MIN(clusterID[i], clusterID[j]); - PBCconnected[i] = PBCconnected[j] = MAX(PBCconnected[i], PBCconnected[j]); x0[i] = x0[j] = chAnchor(x0[i], x0[j]); - if ((fabs(spec_atom[i][1] - spec_atom[j][1]) > reaxc->control->bond_cut) - || (fabs(spec_atom[i][2] - spec_atom[j][2]) > reaxc->control->bond_cut) - || (fabs(spec_atom[i][3] - spec_atom[j][3]) > reaxc->control->bond_cut)) - PBCconnected[i] = PBCconnected[j] = 1; - done = 0; - } - } + done = 0; + } + } } if (!done) change = 1; if (done) break; @@ -612,13 +602,13 @@ void FixReaxCSpecies::SortMolecule(int &Nmole) MPI_Allreduce(&flag,&flagall,1,MPI_INT,MPI_SUM,world); if (flagall && me == 0) error->warning(FLERR,"Atom with cluster ID = 0 included in " - "fix reax/c/species group"); + "fix reax/c/species group"); MPI_Allreduce(&lo,&idlo,1,MPI_INT,MPI_MIN,world); MPI_Allreduce(&hi,&idhi,1,MPI_INT,MPI_MAX,world); if (idlo == ntotal) if (me == 0) error->warning(FLERR,"Atom with cluster ID = maxmol " - "included in fix reax/c/species group"); + "included in fix reax/c/species group"); int nlen = idhi - idlo + 1; memory->create(molmap,nlen,"reax/c/species:molmap"); @@ -798,7 +788,7 @@ void FixReaxCSpecies::OpenPos() *ptr = '\0'; if (padflag == 0) sprintf(filecurrent,"%s" BIGINT_FORMAT "%s", - filepos,ntimestep,ptr+1); + filepos,ntimestep,ptr+1); else { char bif[8],pad[16]; strcpy(bif,BIGINT_FORMAT); @@ -838,11 +828,11 @@ void FixReaxCSpecies::WritePos(int Nmole, int Nspec) if (me == 0) { fprintf(pos,"Timestep " BIGINT_FORMAT " NMole %d NSpec %d xlo %f " - "xhi %f ylo %f yhi %f zlo %f zhi %f\n", - update->ntimestep,Nmole, Nspec, - domain->boxlo[0],domain->boxhi[0], - domain->boxlo[1],domain->boxhi[1], - domain->boxlo[2],domain->boxhi[2]); + "xhi %f ylo %f yhi %f zlo %f zhi %f\n", + update->ntimestep,Nmole, Nspec, + domain->boxlo[0],domain->boxhi[0], + domain->boxlo[1],domain->boxhi[1], + domain->boxlo[2],domain->boxhi[2]); fprintf(pos,"ID\tAtom_Count\tType\tAve_q\t\tCoM_x\t\tCoM_y\t\tCoM_z\n"); } @@ -865,22 +855,20 @@ void FixReaxCSpecies::WritePos(int Nmole, int Nspec) if (cid == m) { itype = atom->type[i]-1; Name[itype] ++; - count ++; - avq += spec_atom[i][0]; - if (PBCconnected[i]) { - if ((x0[i].x - spec_atom[i][1]) > halfbox[0]) - spec_atom[i][1] += box[0]; - if ((spec_atom[i][1] - x0[i].x) > halfbox[0]) - spec_atom[i][1] -= box[0]; - if ((x0[i].y - spec_atom[i][2]) > halfbox[1]) - spec_atom[i][2] += box[1]; - if ((spec_atom[i][2] - x0[i].y) > halfbox[1]) - spec_atom[i][2] -= box[1]; - if ((x0[i].z - spec_atom[i][3]) > halfbox[2]) - spec_atom[i][3] += box[2]; - if ((spec_atom[i][3] - x0[i].z) > halfbox[2]) - spec_atom[i][3] -= box[2]; - } + count ++; + avq += spec_atom[i][0]; + if ((x0[i].x - spec_atom[i][1]) > halfbox[0]) + spec_atom[i][1] += box[0]; + if ((spec_atom[i][1] - x0[i].x) > halfbox[0]) + spec_atom[i][1] -= box[0]; + if ((x0[i].y - spec_atom[i][2]) > halfbox[1]) + spec_atom[i][2] += box[1]; + if ((spec_atom[i][2] - x0[i].y) > halfbox[1]) + spec_atom[i][2] -= box[1]; + if ((x0[i].z - spec_atom[i][3]) > halfbox[2]) + spec_atom[i][3] += box[2]; + if ((spec_atom[i][3] - x0[i].z) > halfbox[2]) + spec_atom[i][3] -= box[2]; for (n = 0; n < 3; n++) avx[n] += spec_atom[i][n+1]; } @@ -914,17 +902,17 @@ void FixReaxCSpecies::WritePos(int Nmole, int Nspec) if (count > 0) { avq /= count; for (k = 0; k < 3; k++) { - avx[k] /= count; + avx[k] /= count; if (avx[k] >= domain->boxhi[k]) avx[k] -= box[k]; if (avx[k] < domain->boxlo[k]) avx[k] += box[k]; - avx[k] -= domain->boxlo[k]; - avx[k] /= box[k]; + avx[k] -= domain->boxlo[k]; + avx[k] /= box[k]; } fprintf(pos,"\t%.8f \t%.8f \t%.8f \t%.8f", - avq,avx[0],avx[1],avx[2]); + avq,avx[0],avx[1],avx[2]); } fprintf(pos,"\n"); } @@ -966,11 +954,10 @@ int FixReaxCSpecies::pack_forward_comm(int n, int *list, double *buf, for (i = 0; i < n; i++) { j = list[i]; buf[m] = clusterID[j]; - buf[m+1] = (double)PBCconnected[j]; - buf[m+2] = x0[j].x; - buf[m+3] = x0[j].y; - buf[m+4] = x0[j].z; - m += 5; + buf[m+1] = x0[j].x; + buf[m+2] = x0[j].y; + buf[m+3] = x0[j].z; + m += 4; } return m; } @@ -985,11 +972,10 @@ void FixReaxCSpecies::unpack_forward_comm(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { clusterID[i] = buf[m]; - PBCconnected[i] = (int)buf[m+1]; - x0[i].x = buf[m+2]; - x0[i].y = buf[m+3]; - x0[i].z = buf[m+4]; - m += 5; + x0[i].x = buf[m+1]; + x0[i].y = buf[m+2]; + x0[i].z = buf[m+3]; + m += 4; } } @@ -999,7 +985,7 @@ double FixReaxCSpecies::memory_usage() { double bytes; - bytes = 5*nmax*sizeof(double); // clusterID + PBCconnected + x0 + bytes = 4*nmax*sizeof(double); // clusterID + x0 return bytes; } diff --git a/src/USER-REAXC/fix_reaxc_species.h b/src/USER-REAXC/fix_reaxc_species.h index 563a10f39db3913a49abbf97cb28b63975b64d51..23a470fd0ac63a37862bd032d51e953c261db3f9 100644 --- a/src/USER-REAXC/fix_reaxc_species.h +++ b/src/USER-REAXC/fix_reaxc_species.h @@ -52,7 +52,6 @@ class FixReaxCSpecies : public Fix { int Nmoltype, vector_nmole, vector_nspec; int *Name, *MolName, *NMol, *nd, *MolType, *molmap; double *clusterID; - int *PBCconnected; AtomCoord *x0; double bg_cut; diff --git a/src/USER-REAXC/pair_reaxc.cpp b/src/USER-REAXC/pair_reaxc.cpp index d51b0fc2f8bb28b9516218daf1bb433948ef7b9b..bf3b2e44674b6ce5e325dfecf4cf2c28b07fdb3d 100644 --- a/src/USER-REAXC/pair_reaxc.cpp +++ b/src/USER-REAXC/pair_reaxc.cpp @@ -211,6 +211,9 @@ void PairReaxC::settings(int narg, char **arg) control->thb_cutsq = 0.00001; control->bg_cut = 0.3; + // Initialize for when omp style included + control->nthreads = 1; + out_control->write_steps = 0; out_control->traj_method = 0; strcpy( out_control->traj_title, "default_title" ); @@ -227,7 +230,7 @@ void PairReaxC::settings(int narg, char **arg) system->mincap = MIN_CAP; system->safezone = SAFE_ZONE; system->saferzone = SAFER_ZONE; - + // process optional keywords int iarg = 1; @@ -256,7 +259,7 @@ void PairReaxC::settings(int narg, char **arg) system->safezone = force->numeric(FLERR,arg[iarg+1]); if (system->safezone < 0.0) error->all(FLERR,"Illegal pair_style reax/c safezone command"); - system->saferzone = system->safezone*1.2; + system->saferzone = system->safezone*1.2 + 0.2; iarg += 2; } else if (strcmp(arg[iarg],"mincap") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal pair_style reax/c command"); @@ -457,6 +460,9 @@ void PairReaxC::setup( ) ReAllocate( system, control, data, workspace, &lists, mpi_data ); } + + bigint local_ngroup = list->inum; + MPI_Allreduce( &local_ngroup, &ngroup, 1, MPI_LMP_BIGINT, MPI_SUM, world ); } /* ---------------------------------------------------------------------- */ diff --git a/src/USER-REAXC/pair_reaxc.h b/src/USER-REAXC/pair_reaxc.h index e3c9e63bdcf9ca4c8f18883c9b701d6d986afd01..91b44be66187347ba9de0544dfde165504c48fca 100644 --- a/src/USER-REAXC/pair_reaxc.h +++ b/src/USER-REAXC/pair_reaxc.h @@ -37,12 +37,18 @@ namespace LAMMPS_NS { class PairReaxC : public Pair { public: + PairReaxC(class LAMMPS *); + ~PairReaxC(); + void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + virtual void init_style(); + double init_one(int, int); + void *extract(const char *, int &); int fixbond_flag, fixspecies_flag; int **tmpid; double **tmpbo,**tmpr; - double *chi,*eta,*gamma; - int *map; control_params *control; reax_system *system; output_controls *out_control; @@ -51,21 +57,16 @@ class PairReaxC : public Pair { reax_list *lists; mpi_datatypes *mpi_data; - PairReaxC(class LAMMPS *); - ~PairReaxC(); - void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_style(); - double init_one(int, int); - void *extract(const char *, int &); + bigint ngroup; protected: double cutmax; int nelements; // # of unique elements char **elements; // names of unique elements + int *map; class FixReaxC *fix_reax; + double *chi,*eta,*gamma; int qeqflag; int setup_flag; int firstwarn; diff --git a/src/USER-REAXC/reaxc_allocate.cpp b/src/USER-REAXC/reaxc_allocate.cpp index 969912e082ed8e7419ded19ee33c0ded09587c80..0d9c51c87892e10c998f6740a4b84ccdebdf5ef8 100644 --- a/src/USER-REAXC/reaxc_allocate.cpp +++ b/src/USER-REAXC/reaxc_allocate.cpp @@ -31,6 +31,10 @@ #include "reaxc_tool_box.h" #include "reaxc_vector.h" +#if defined(_OPENMP) +#include <omp.h> +#endif + /* allocate space for my_atoms important: we cannot know the exact number of atoms that will fall into a process's box throughout the whole simulation. therefore @@ -49,6 +53,15 @@ int PreAllocate_Space( reax_system *system, control_params *control, system->my_atoms = (reax_atom*) scalloc( system->total_cap, sizeof(reax_atom), "my_atoms", comm ); + // Nullify some arrays only used in omp styles + // Should be safe to do here since called in pair->setup(); +#ifdef LMP_USER_OMP + workspace->CdDeltaReduction = NULL; + workspace->forceReduction = NULL; + workspace->valence_angle_atom_myoffset = NULL; + workspace->my_ext_pressReduction = NULL; +#endif + return SUCCESS; } @@ -174,13 +187,21 @@ void DeAllocate_Workspace( control_params *control, storage *workspace ) sfree( workspace->q2, "q2" ); sfree( workspace->p2, "p2" ); - /* integrator */ + /* integrator storage */ sfree( workspace->v_const, "v_const" ); /* force related storage */ sfree( workspace->f, "f" ); sfree( workspace->CdDelta, "CdDelta" ); + /* reductions */ +#ifdef LMP_USER_OMP + if(workspace->CdDeltaReduction) sfree( workspace->CdDeltaReduction, "cddelta_reduce" ); + if(workspace->forceReduction) sfree( workspace->forceReduction, "f_reduce" ); + if(workspace->valence_angle_atom_myoffset) sfree( workspace->valence_angle_atom_myoffset, "valence_angle_atom_myoffset"); + + if (control->virial && workspace->my_ext_pressReduction) sfree( workspace->my_ext_pressReduction, "ext_press_reduce"); +#endif } @@ -272,11 +293,25 @@ int Allocate_Workspace( reax_system *system, control_params *control, /* integrator storage */ workspace->v_const = (rvec*) smalloc( local_rvec, "v_const", comm ); - // /* force related storage */ + /* force related storage */ workspace->f = (rvec*) scalloc( total_cap, sizeof(rvec), "f", comm ); workspace->CdDelta = (double*) scalloc( total_cap, sizeof(double), "CdDelta", comm ); + // storage for reductions with multiple threads +#ifdef LMP_USER_OMP + workspace->CdDeltaReduction = (double *) scalloc(sizeof(double), total_cap*control->nthreads, + "cddelta_reduce", comm); + + workspace->forceReduction = (rvec *) scalloc(sizeof(rvec), total_cap*control->nthreads, + "forceReduction", comm); + + workspace->valence_angle_atom_myoffset = (int *) scalloc(sizeof(int), total_cap, "valence_angle_atom_myoffset", comm); + + if (control->virial) + workspace->my_ext_pressReduction = (rvec *) calloc(sizeof(rvec), control->nthreads); +#endif + return SUCCESS; } @@ -334,12 +369,29 @@ static int Reallocate_Bonds_List( reax_system *system, reax_list *bonds, } *total_bonds = (int)(MAX( *total_bonds * safezone, mincap*MIN_BONDS )); +#ifdef LMP_USER_OMP + for (i = 0; i < bonds->num_intrs; ++i) + sfree(bonds->select.bond_list[i].bo_data.CdboReduction, "CdboReduction"); +#endif + Delete_List( bonds, comm ); if(!Make_List(system->total_cap, *total_bonds, TYP_BOND, bonds, comm)) { fprintf( stderr, "not enough space for bonds list. terminating!\n" ); MPI_Abort( comm, INSUFFICIENT_MEMORY ); } +#ifdef LMP_USER_OMP +#if defined(_OPENMP) + int nthreads = omp_get_num_threads(); +#else + int nthreads = 1; +#endif + + for (i = 0; i < bonds->num_intrs; ++i) + bonds->select.bond_list[i].bo_data.CdboReduction = + (double*) smalloc(sizeof(double)*nthreads, "CdboReduction", comm); +#endif + return SUCCESS; } @@ -438,7 +490,7 @@ void ReAllocate( reax_system *system, control_params *control, Reallocate_Bonds_List( system, (*lists)+BONDS, &num_bonds, &est_3body, comm ); realloc->bonds = 0; - realloc->num_3body = MAX( realloc->num_3body, est_3body ); + realloc->num_3body = MAX( realloc->num_3body, est_3body ) * 2; } /* 3-body list */ diff --git a/src/USER-REAXC/reaxc_control.cpp b/src/USER-REAXC/reaxc_control.cpp index 4def41bc8c7c309f1f3c4c0a47e1d74a54f8a758..11a89020b80865ca553224c05eac93b9ae86802f 100644 --- a/src/USER-REAXC/reaxc_control.cpp +++ b/src/USER-REAXC/reaxc_control.cpp @@ -48,6 +48,7 @@ char Read_Control_File( char *control_file, control_params* control, control->nsteps = 0; control->dt = 0.25; control->nprocs = 1; + control->nthreads = 1; control->procs_by_dim[0] = 1; control->procs_by_dim[1] = 1; control->procs_by_dim[2] = 1; diff --git a/src/USER-REAXC/reaxc_types.h b/src/USER-REAXC/reaxc_types.h index b3e2f40f02d1dceb06b0434b84c55f8f052a5efd..547602feb488b6bbc12336fc9e1dfb708529f0c0 100644 --- a/src/USER-REAXC/reaxc_types.h +++ b/src/USER-REAXC/reaxc_types.h @@ -38,6 +38,40 @@ #include "sys/time.h" #include <time.h> +#if defined LMP_USER_OMP +#define OMP_TIMING 1 + +#ifdef OMP_TIMING +// pkcoff timing fields +enum { + COMPUTEINDEX=0, + COMPUTEWLINDEX, + COMPUTEBFINDEX, + COMPUTEQEQINDEX, + COMPUTENBFINDEX, + COMPUTEIFINDEX, + COMPUTETFINDEX, + COMPUTEBOINDEX, + COMPUTEBONDSINDEX, + COMPUTEATOMENERGYINDEX, + COMPUTEVALENCEANGLESBOINDEX, + COMPUTETORSIONANGLESBOINDEX, + COMPUTEHBONDSINDEX, + COMPUTECG1INDEX, + COMPUTECG2INDEX, + COMPUTECGCOMPUTEINDEX, + COMPUTECALCQINDEX, + COMPUTEINITMVINDEX, + COMPUTEMVCOMPINDEX, + LASTTIMINGINDEX +}; + +extern double ompTimingData[LASTTIMINGINDEX]; +extern int ompTimingCount[LASTTIMINGINDEX]; +extern int ompTimingCGCount[LASTTIMINGINDEX]; +#endif +#endif + /************* SOME DEFS - crucial for reax_types.h *********/ #define LAMMPS_REAX @@ -391,6 +425,7 @@ typedef struct { char sim_name[REAX_MAX_STR]; int nprocs; + int nthreads; ivec procs_by_dim; /* ensemble values: 0 : NVE @@ -451,7 +486,7 @@ typedef struct int lgflag; int enobondsflag; - + } control_params; @@ -616,6 +651,7 @@ typedef struct{ double C1dbopi, C2dbopi, C3dbopi, C4dbopi; double C1dbopi2, C2dbopi2, C3dbopi2, C4dbopi2; rvec dBOp, dln_BOp_s, dln_BOp_pi, dln_BOp_pi2; + double *CdboReduction; } bond_order_data; typedef struct { @@ -702,6 +738,13 @@ typedef struct double *CdDelta; // coefficient of dDelta rvec *f; + /* omp */ + rvec *forceReduction; + rvec *my_ext_pressReduction; + double *CdDeltaReduction; + int *valence_angle_atom_myoffset; + + reallocate_data realloc; } storage; diff --git a/src/USER-TALLY/compute_force_tally.cpp b/src/USER-TALLY/compute_force_tally.cpp index e97a1c751c323e4482d14477c1b76f75a448c2eb..cb7e3a4f23e54d1086b53bad14fba071c31e3dad 100644 --- a/src/USER-TALLY/compute_force_tally.cpp +++ b/src/USER-TALLY/compute_force_tally.cpp @@ -44,8 +44,7 @@ ComputeForceTally::ComputeForceTally(LAMMPS *lmp, int narg, char **arg) : extscalar = 1; peflag = 1; // we need Pair::ev_tally() to be run - did_compute = 0; - invoked_peratom = invoked_scalar = -1; + did_setup = invoked_peratom = invoked_scalar = -1; nmax = -1; fatom = NULL; vector = new double[size_peratom_cols]; @@ -69,55 +68,50 @@ void ComputeForceTally::init() else force->pair->add_tally_callback(this); - if (force->pair->single_enable == 0 || force->pair->manybody_flag) - error->warning(FLERR,"Compute force/tally used with incompatible pair style"); + if (comm->me == 0) { + if (force->pair->single_enable == 0 || force->pair->manybody_flag) + error->warning(FLERR,"Compute force/tally used with incompatible pair style"); - if ((comm->me == 0) && (force->bond || force->angle || force->dihedral - || force->improper || force->kspace)) - error->warning(FLERR,"Compute force/tally only called from pair style"); - - did_compute = -1; + if (force->bond || force->angle || force->dihedral + || force->improper || force->kspace) + error->warning(FLERR,"Compute force/tally only called from pair style"); + } + did_setup = -1; } - /* ---------------------------------------------------------------------- */ -void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton, - double, double, double fpair, - double dx, double dy, double dz) + +void ComputeForceTally::pair_setup_callback(int, int) { const int ntotal = atom->nlocal + atom->nghost; - const int * const mask = atom->mask; - // do setup work that needs to be done only once per timestep + // grow per-atom storage, if needed - if (did_compute != update->ntimestep) { - did_compute = update->ntimestep; + if (atom->nmax > nmax) { + memory->destroy(fatom); + nmax = atom->nmax; + memory->create(fatom,nmax,size_peratom_cols,"force/tally:fatom"); + array_atom = fatom; + } - // grow local force array if necessary - // needs to be atom->nmax in length + // clear storage - if (atom->nmax > nmax) { - memory->destroy(fatom); - nmax = atom->nmax; - memory->create(fatom,nmax,size_peratom_cols,"force/tally:fatom"); - array_atom = fatom; - } + for (int i=0; i < ntotal; ++i) + for (int j=0; j < size_peratom_cols; ++j) + fatom[i][j] = 0.0; - // clear storage as needed + for (int i=0; i < size_peratom_cols; ++i) + vector[i] = ftotal[i] = 0.0; - if (newton) { - for (int i=0; i < ntotal; ++i) - for (int j=0; j < size_peratom_cols; ++j) - fatom[i][j] = 0.0; - } else { - for (int i=0; i < atom->nlocal; ++i) - for (int j=0; j < size_peratom_cols; ++j) - fatom[i][j] = 0.0; - } + did_setup = update->ntimestep; +} - for (int i=0; i < size_peratom_cols; ++i) - vector[i] = ftotal[i] = 0.0; - } +/* ---------------------------------------------------------------------- */ +void ComputeForceTally::pair_tally_callback(int i, int j, int nlocal, int newton, + double, double, double fpair, + double dx, double dy, double dz) +{ + const int * const mask = atom->mask; if ( ((mask[i] & groupbit) && (mask[j] & groupbit2)) || ((mask[i] & groupbit2) && (mask[j] & groupbit)) ) { @@ -181,7 +175,8 @@ void ComputeForceTally::unpack_reverse_comm(int n, int *list, double *buf) double ComputeForceTally::compute_scalar() { invoked_scalar = update->ntimestep; - if ((did_compute != invoked_scalar) || (update->eflag_global != invoked_scalar)) + if ((did_setup != invoked_scalar) + || (update->eflag_global != invoked_scalar)) error->all(FLERR,"Energy was not tallied on needed timestep"); // sum accumulated forces across procs @@ -197,7 +192,8 @@ double ComputeForceTally::compute_scalar() void ComputeForceTally::compute_peratom() { invoked_peratom = update->ntimestep; - if ((did_compute != invoked_peratom) || (update->eflag_global != invoked_peratom)) + if ((did_setup != invoked_peratom) + || (update->eflag_global != invoked_peratom)) error->all(FLERR,"Energy was not tallied on needed timestep"); // collect contributions from ghost atoms @@ -205,6 +201,7 @@ void ComputeForceTally::compute_peratom() if (force->newton_pair) { comm->reverse_comm_compute(this); + // clear out ghost atom data after it has been collected to local atoms const int nall = atom->nlocal + atom->nghost; for (int i = atom->nlocal; i < nall; ++i) for (int j = 0; j < size_peratom_cols; ++j) diff --git a/src/USER-TALLY/compute_force_tally.h b/src/USER-TALLY/compute_force_tally.h index 0f7bc35a6d7ab774e3ef7574abb64bcf66fbdc6d..ae2f06a096f0516e513c0102693b0338039c3961 100644 --- a/src/USER-TALLY/compute_force_tally.h +++ b/src/USER-TALLY/compute_force_tally.h @@ -39,12 +39,12 @@ class ComputeForceTally : public Compute { void unpack_reverse_comm(int, int *, double *); double memory_usage(); + void pair_setup_callback(int, int); void pair_tally_callback(int, int, int, int, double, double, double, double, double, double); - private: - bigint did_compute; + bigint did_setup; int nmax,igroup2,groupbit2; double **fatom; double ftotal[3]; diff --git a/src/USER-TALLY/compute_heat_flux_tally.cpp b/src/USER-TALLY/compute_heat_flux_tally.cpp index 48cad538d5e09d5f9e32eff23ce3cbf02fc43afd..65f57b7678096e5a066f3710a54f2c4f70faedd3 100644 --- a/src/USER-TALLY/compute_heat_flux_tally.cpp +++ b/src/USER-TALLY/compute_heat_flux_tally.cpp @@ -43,12 +43,13 @@ ComputeHeatFluxTally::ComputeHeatFluxTally(LAMMPS *lmp, int narg, char **arg) : size_vector = 6; peflag = 1; // we need Pair::ev_tally() to be run - did_compute = 0; + did_setup = 0; invoked_peratom = invoked_scalar = -1; nmax = -1; stress = NULL; eatom = NULL; vector = new double[size_vector]; + heatj = new double[size_vector]; } /* ---------------------------------------------------------------------- */ @@ -56,6 +57,9 @@ ComputeHeatFluxTally::ComputeHeatFluxTally(LAMMPS *lmp, int narg, char **arg) : ComputeHeatFluxTally::~ComputeHeatFluxTally() { if (force && force->pair) force->pair->del_tally_callback(this); + memory->destroy(stress); + memory->destroy(eatom); + delete[] heatj; delete[] vector; } @@ -68,70 +72,56 @@ void ComputeHeatFluxTally::init() else force->pair->add_tally_callback(this); - if (force->pair->single_enable == 0 || force->pair->manybody_flag) - error->warning(FLERR,"Compute heat/flux/tally used with incompatible pair style"); + if (comm->me == 0) { + if (force->pair->single_enable == 0 || force->pair->manybody_flag) + error->warning(FLERR,"Compute heat/flux/tally used with incompatible pair style"); - if ((comm->me == 0) && (force->bond || force->angle || force->dihedral - || force->improper || force->kspace)) - error->warning(FLERR,"Compute heat/flux/tally only called from pair style"); - - did_compute = -1; + if (force->bond || force->angle || force->dihedral + || force->improper || force->kspace) + error->warning(FLERR,"Compute heat/flux/tally only called from pair style"); + } + did_setup = -1; } - /* ---------------------------------------------------------------------- */ -void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int newton, - double evdwl, double ecoul, double fpair, - double dx, double dy, double dz) +void ComputeHeatFluxTally::pair_setup_callback(int, int) { const int ntotal = atom->nlocal + atom->nghost; - const int * const mask = atom->mask; - // do setup work that needs to be done only once per timestep + // grow per-atom storage, if needed - if (did_compute != update->ntimestep) { - did_compute = update->ntimestep; + if (atom->nmax > nmax) { + memory->destroy(stress); + memory->destroy(eatom); + nmax = atom->nmax; + memory->create(stress,nmax,6,"heat/flux/tally:stress"); + memory->create(eatom,nmax,"heat/flux/tally:eatom"); + } - // grow local stress and eatom arrays if necessary - // needs to be atom->nmax in length + // clear storage - if (atom->nmax > nmax) { - memory->destroy(stress); - nmax = atom->nmax; - memory->create(stress,nmax,6,"heat/flux/tally:stress"); + for (int i=0; i < ntotal; ++i) { + eatom[i] = 0.0; + stress[i][0] = 0.0; + stress[i][1] = 0.0; + stress[i][2] = 0.0; + stress[i][3] = 0.0; + stress[i][4] = 0.0; + stress[i][5] = 0.0; + } - memory->destroy(eatom); - nmax = atom->nmax; - memory->create(eatom,nmax,"heat/flux/tally:eatom"); - } + for (int i=0; i < size_vector; ++i) + vector[i] = heatj[i] = 0.0; - // clear storage as needed - - if (newton) { - for (int i=0; i < ntotal; ++i) { - eatom[i] = 0.0; - stress[i][0] = 0.0; - stress[i][1] = 0.0; - stress[i][2] = 0.0; - stress[i][3] = 0.0; - stress[i][4] = 0.0; - stress[i][5] = 0.0; - } - } else { - for (int i=0; i < atom->nlocal; ++i) { - eatom[i] = 0.0; - stress[i][0] = 0.0; - stress[i][1] = 0.0; - stress[i][2] = 0.0; - stress[i][3] = 0.0; - stress[i][4] = 0.0; - stress[i][5] = 0.0; - } - } + did_setup = update->ntimestep; +} - for (int i=0; i < size_vector; ++i) - vector[i] = heatj[i] = 0.0; - } +/* ---------------------------------------------------------------------- */ +void ComputeHeatFluxTally::pair_tally_callback(int i, int j, int nlocal, int newton, + double evdwl, double ecoul, double fpair, + double dx, double dy, double dz) +{ + const int * const mask = atom->mask; if ( ((mask[i] & groupbit) && (mask[j] & groupbit2)) || ((mask[i] & groupbit2) && (mask[j] & groupbit)) ) { @@ -210,7 +200,7 @@ void ComputeHeatFluxTally::unpack_reverse_comm(int n, int *list, double *buf) void ComputeHeatFluxTally::compute_vector() { invoked_vector = update->ntimestep; - if ((did_compute != invoked_vector) || (update->eflag_global != invoked_vector)) + if ((did_setup != invoked_vector) || (update->eflag_global != invoked_vector)) error->all(FLERR,"Energy was not tallied on needed timestep"); // collect contributions from ghost atoms @@ -243,6 +233,7 @@ void ComputeHeatFluxTally::compute_vector() const double pfactor = 0.5 * force->mvv2e; double **v = atom->v; double *mass = atom->mass; + double *rmass = atom->rmass; int *type = atom->type; double jc[3] = {0.0,0.0,0.0}; @@ -250,17 +241,21 @@ void ComputeHeatFluxTally::compute_vector() for (int i = 0; i < nlocal; i++) { if (mask[i] & groupbit) { - double ke_i = pfactor * mass[type[i]] * - (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]); - jc[0] += (ke_i + eatom[i]) * v[i][0]; - jc[1] += (ke_i + eatom[i]) * v[i][1]; - jc[2] += (ke_i + eatom[i]) * v[i][2]; - jv[0] += stress[i][0]*v[i][0] + stress[i][3]*v[i][1] + - stress[i][4]*v[i][2]; - jv[1] += stress[i][3]*v[i][0] + stress[i][1]*v[i][1] + - stress[i][5]*v[i][2]; - jv[2] += stress[i][4]*v[i][0] + stress[i][5]*v[i][1] + - stress[i][2]*v[i][2]; + const double * const vi = v[i]; + const double * const si = stress[i]; + double ke_i; + + if (rmass) ke_i = pfactor * rmass[i]; + else ke_i *= pfactor * mass[type[i]]; + ke_i *= (vi[0]*vi[0] + vi[1]*vi[1] + vi[2]*vi[2]); + ke_i += eatom[i]; + + jc[0] += ke_i*vi[0]; + jc[1] += ke_i*vi[1]; + jc[2] += ke_i*vi[2]; + jv[0] += si[0]*vi[0] + si[3]*vi[1] + si[4]*vi[2]; + jv[1] += si[3]*vi[0] + si[1]*vi[1] + si[5]*vi[2]; + jv[2] += si[4]*vi[0] + si[5]*vi[1] + si[2]*vi[2]; } } diff --git a/src/USER-TALLY/compute_heat_flux_tally.h b/src/USER-TALLY/compute_heat_flux_tally.h index 8c6671cf1e59c04fa7ca25e765021233e41d135b..4158b2e29d9b16c04e93186b387eab0b2082d6c1 100644 --- a/src/USER-TALLY/compute_heat_flux_tally.h +++ b/src/USER-TALLY/compute_heat_flux_tally.h @@ -38,15 +38,16 @@ class ComputeHeatFluxTally : public Compute { void unpack_reverse_comm(int, int *, double *); double memory_usage(); + void pair_setup_callback(int, int); void pair_tally_callback(int, int, int, int, double, double, double, double, double, double); private: - bigint did_compute; + bigint did_setup; int nmax,igroup2,groupbit2; double **stress,*eatom; - double heatj[6]; + double *heatj; }; } diff --git a/src/USER-TALLY/compute_pe_mol_tally.cpp b/src/USER-TALLY/compute_pe_mol_tally.cpp index a30f2d6b9a9c9bc0d8edf509059a5f054abc0b49..25a172b7f81eb6cd68a75fcb520c401bb89ec1de 100644 --- a/src/USER-TALLY/compute_pe_mol_tally.cpp +++ b/src/USER-TALLY/compute_pe_mol_tally.cpp @@ -42,7 +42,7 @@ ComputePEMolTally::ComputePEMolTally(LAMMPS *lmp, int narg, char **arg) : extvector = 1; peflag = 1; // we need Pair::ev_tally() to be run - did_compute = invoked_vector = -1; + did_setup = invoked_vector = -1; vector = new double[size_vector]; } @@ -66,16 +66,24 @@ void ComputePEMolTally::init() if (atom->molecule_flag == 0) error->all(FLERR,"Compute pe/mol/tally requires molecule IDs"); - if (force->pair->single_enable == 0 || force->pair->manybody_flag) - error->warning(FLERR,"Compute pe/mol/tally used with incompatible pair style"); + if (comm->me == 0) { + if (force->pair->single_enable == 0 || force->pair->manybody_flag) + error->warning(FLERR,"Compute pe/mol/tally used with incompatible pair style"); - if ((comm->me == 0) && (force->bond || force->angle || force->dihedral - || force->improper || force->kspace)) - error->warning(FLERR,"Compute pe/mol/tally only called from pair style"); - - did_compute = -1; + if (force->bond || force->angle || force->dihedral + || force->improper || force->kspace) + error->warning(FLERR,"Compute pe/mol/tally only called from pair style"); + } + did_setup = -1; } +/* ---------------------------------------------------------------------- */ + +void ComputePEMolTally::pair_setup_callback(int, int) +{ + etotal[0] = etotal[1] = etotal[2] = etotal[3] = 0.0; + did_setup = update->ntimestep; +} /* ---------------------------------------------------------------------- */ void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton, @@ -85,14 +93,6 @@ void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton const int * const mask = atom->mask; const tagint * const molid = atom->molecule; - // do setup work that needs to be done only once per timestep - - if (did_compute != update->ntimestep) { - did_compute = update->ntimestep; - - etotal[0] = etotal[1] = etotal[2] = etotal[3] = 0.0; - } - if ( ((mask[i] & groupbit) && (mask[j] & groupbit2)) || ((mask[i] & groupbit2) && (mask[j] & groupbit)) ){ @@ -119,7 +119,7 @@ void ComputePEMolTally::pair_tally_callback(int i, int j, int nlocal, int newton void ComputePEMolTally::compute_vector() { invoked_vector = update->ntimestep; - if ((did_compute != invoked_vector) || (update->eflag_global != invoked_vector)) + if ((did_setup != invoked_vector) || (update->eflag_global != invoked_vector)) error->all(FLERR,"Energy was not tallied on needed timestep"); // sum accumulated energies across procs diff --git a/src/USER-TALLY/compute_pe_mol_tally.h b/src/USER-TALLY/compute_pe_mol_tally.h index b2c5ffab7fc3ee3d23b538c8d4e19cfbfacf8668..1b022a9ef578cfb4d2cdf817605ace0ca53bfc79 100644 --- a/src/USER-TALLY/compute_pe_mol_tally.h +++ b/src/USER-TALLY/compute_pe_mol_tally.h @@ -33,12 +33,13 @@ class ComputePEMolTally : public Compute { void init(); void compute_vector(); + void pair_setup_callback(int, int); void pair_tally_callback(int, int, int, int, double, double, double, double, double, double); private: - bigint did_compute; + bigint did_setup; int igroup2,groupbit2; double etotal[4]; }; diff --git a/src/USER-TALLY/compute_pe_tally.cpp b/src/USER-TALLY/compute_pe_tally.cpp index 2117f2cb15952e09189d0600a4b96ad17ed39f74..e7c0bdd03cca25e152d24ef35df936edcede0fd9 100644 --- a/src/USER-TALLY/compute_pe_tally.cpp +++ b/src/USER-TALLY/compute_pe_tally.cpp @@ -44,7 +44,7 @@ ComputePETally::ComputePETally(LAMMPS *lmp, int narg, char **arg) : extscalar = 1; peflag = 1; // we need Pair::ev_tally() to be run - did_compute = invoked_peratom = invoked_scalar = -1; + did_setup = invoked_peratom = invoked_scalar = -1; nmax = -1; eatom = NULL; vector = new double[size_peratom_cols]; @@ -68,55 +68,51 @@ void ComputePETally::init() else force->pair->add_tally_callback(this); - if (force->pair->single_enable == 0 || force->pair->manybody_flag) - error->warning(FLERR,"Compute pe/tally used with incompatible pair style"); + if (comm->me == 0) { + if (force->pair->single_enable == 0 || force->pair->manybody_flag) + error->warning(FLERR,"Compute pe/tally used with incompatible pair style"); - if ((comm->me == 0) && (force->bond || force->angle || force->dihedral - || force->improper || force->kspace)) - error->warning(FLERR,"Compute pe/tally only called from pair style"); - - did_compute = -1; + if (force->bond || force->angle || force->dihedral + || force->improper || force->kspace) + error->warning(FLERR,"Compute pe/tally only called from pair style"); + } + did_setup = -1; } - /* ---------------------------------------------------------------------- */ -void ComputePETally::pair_tally_callback(int i, int j, int nlocal, int newton, - double evdwl, double ecoul, double, - double, double, double) + +void ComputePETally::pair_setup_callback(int, int) { const int ntotal = atom->nlocal + atom->nghost; - const int * const mask = atom->mask; - // do setup work that needs to be done only once per timestep + // grow per-atom storage, if needed - if (did_compute != update->ntimestep) { - did_compute = update->ntimestep; + if (atom->nmax > nmax) { + memory->destroy(eatom); + nmax = atom->nmax; + memory->create(eatom,nmax,size_peratom_cols,"pe/tally:eatom"); + array_atom = eatom; + } - // grow local eatom array if necessary - // needs to be atom->nmax in length + // clear storage - if (atom->nmax > nmax) { - memory->destroy(eatom); - nmax = atom->nmax; - memory->create(eatom,nmax,size_peratom_cols,"pe/tally:eatom"); - array_atom = eatom; - } + for (int i=0; i < ntotal; ++i) + eatom[i][0] = eatom[i][1] = 0.0; - // clear storage as needed + vector[0] = etotal[0] = vector[1] = etotal[1] = 0.0; - if (newton) { - for (int i=0; i < ntotal; ++i) - eatom[i][0] = eatom[i][1] = 0.0; - } else { - for (int i=0; i < atom->nlocal; ++i) - eatom[i][0] = eatom[i][1] = 0.0; - } + did_setup = update->ntimestep; +} - vector[0] = etotal[0] = vector[1] = etotal[1] = 0.0; - } +/* ---------------------------------------------------------------------- */ +void ComputePETally::pair_tally_callback(int i, int j, int nlocal, int newton, + double evdwl, double ecoul, double, + double, double, double) +{ + const int * const mask = atom->mask; if ( ((mask[i] & groupbit) && (mask[j] & groupbit2)) - || ((mask[i] & groupbit2) && (mask[j] & groupbit)) ){ + || ((mask[i] & groupbit2) && (mask[j] & groupbit)) ) { evdwl *= 0.5; ecoul *= 0.5; if (newton || i < nlocal) { @@ -164,7 +160,8 @@ void ComputePETally::unpack_reverse_comm(int n, int *list, double *buf) double ComputePETally::compute_scalar() { invoked_scalar = update->ntimestep; - if ((did_compute != invoked_scalar) || (update->eflag_global != invoked_scalar)) + if ((did_setup != invoked_scalar) + || (update->eflag_global != invoked_scalar)) error->all(FLERR,"Energy was not tallied on needed timestep"); // sum accumulated energies across procs @@ -180,13 +177,16 @@ double ComputePETally::compute_scalar() void ComputePETally::compute_peratom() { invoked_peratom = update->ntimestep; - if ((did_compute != invoked_peratom) || (update->eflag_global != invoked_peratom)) + if ((did_setup != invoked_peratom) + || (update->eflag_global != invoked_peratom)) error->all(FLERR,"Energy was not tallied on needed timestep"); // collect contributions from ghost atoms if (force->newton_pair) { comm->reverse_comm_compute(this); + + // clear out ghost atom data after it has been collected to local atoms const int nall = atom->nlocal + atom->nghost; for (int i = atom->nlocal; i < nall; ++i) eatom[i][0] = eatom[i][1] = 0.0; diff --git a/src/USER-TALLY/compute_pe_tally.h b/src/USER-TALLY/compute_pe_tally.h index 2335bbeceeac278d45e9f62bfe8868a868a8071d..cd972e49dba3d0afce0e9387c7e0a461a9b1e881 100644 --- a/src/USER-TALLY/compute_pe_tally.h +++ b/src/USER-TALLY/compute_pe_tally.h @@ -39,12 +39,13 @@ class ComputePETally : public Compute { void unpack_reverse_comm(int, int *, double *); double memory_usage(); + void pair_setup_callback(int, int); void pair_tally_callback(int, int, int, int, double, double, double, double, double, double); private: - bigint did_compute; + bigint did_setup; int nmax,igroup2,groupbit2; double **eatom; double etotal[2]; diff --git a/src/USER-TALLY/compute_stress_tally.cpp b/src/USER-TALLY/compute_stress_tally.cpp index 66df9f6e4ffc28d9dc68587e1393c03a7caf3f34..28baafb9f8f451f37771ea84d3e4a1b432fcb62f 100644 --- a/src/USER-TALLY/compute_stress_tally.cpp +++ b/src/USER-TALLY/compute_stress_tally.cpp @@ -44,11 +44,11 @@ ComputeStressTally::ComputeStressTally(LAMMPS *lmp, int narg, char **arg) : extscalar = 0; peflag = 1; // we need Pair::ev_tally() to be run - did_compute = 0; - invoked_peratom = invoked_scalar = -1; + did_setup = invoked_peratom = invoked_scalar = -1; nmax = -1; stress = NULL; vector = new double[size_peratom_cols]; + virial = new double[size_peratom_cols]; } /* ---------------------------------------------------------------------- */ @@ -57,6 +57,7 @@ ComputeStressTally::~ComputeStressTally() { if (force && force->pair) force->pair->del_tally_callback(this); memory->destroy(stress); + delete[] virial; delete[] vector; } @@ -69,55 +70,50 @@ void ComputeStressTally::init() else force->pair->add_tally_callback(this); - if (force->pair->single_enable == 0 || force->pair->manybody_flag) - error->warning(FLERR,"Compute stress/tally used with incompatible pair style"); + if (comm->me == 0) { + if (force->pair->single_enable == 0 || force->pair->manybody_flag) + error->warning(FLERR,"Compute stress/tally used with incompatible pair style"); - if ((comm->me == 0) && (force->bond || force->angle || force->dihedral - || force->improper || force->kspace)) - error->warning(FLERR,"Compute stress/tally only called from pair style"); - - did_compute = -1; + if (force->bond || force->angle || force->dihedral + || force->improper || force->kspace) + error->warning(FLERR,"Compute stress/tally only called from pair style"); + } + did_setup = -1; } - /* ---------------------------------------------------------------------- */ -void ComputeStressTally::pair_tally_callback(int i, int j, int nlocal, int newton, - double, double, double fpair, - double dx, double dy, double dz) + +void ComputeStressTally::pair_setup_callback(int, int) { const int ntotal = atom->nlocal + atom->nghost; - const int * const mask = atom->mask; - // do setup work that needs to be done only once per timestep + // grow per-atom storage, if needed - if (did_compute != update->ntimestep) { - did_compute = update->ntimestep; + if (atom->nmax > nmax) { + memory->destroy(stress); + nmax = atom->nmax; + memory->create(stress,nmax,size_peratom_cols,"stress/tally:stress"); + array_atom = stress; + } - // grow local stress array if necessary - // needs to be atom->nmax in length + // clear storage - if (atom->nmax > nmax) { - memory->destroy(stress); - nmax = atom->nmax; - memory->create(stress,nmax,size_peratom_cols,"stress/tally:stress"); - array_atom = stress; - } + for (int i=0; i < ntotal; ++i) + for (int j=0; j < size_peratom_cols; ++j) + stress[i][j] = 0.0; - // clear storage as needed + for (int i=0; i < size_peratom_cols; ++i) + vector[i] = virial[i] = 0.0; - if (newton) { - for (int i=0; i < ntotal; ++i) - for (int j=0; j < size_peratom_cols; ++j) - stress[i][j] = 0.0; - } else { - for (int i=0; i < atom->nlocal; ++i) - for (int j=0; j < size_peratom_cols; ++j) - stress[i][j] = 0.0; - } + did_setup = update->ntimestep; +} - for (int i=0; i < size_peratom_cols; ++i) - vector[i] = virial[i] = 0.0; - } +/* ---------------------------------------------------------------------- */ +void ComputeStressTally::pair_tally_callback(int i, int j, int nlocal, int newton, + double, double, double fpair, + double dx, double dy, double dz) +{ + const int * const mask = atom->mask; if ( ((mask[i] & groupbit) && (mask[j] & groupbit2)) || ((mask[i] & groupbit2) && (mask[j] & groupbit)) ) { @@ -191,7 +187,8 @@ void ComputeStressTally::unpack_reverse_comm(int n, int *list, double *buf) double ComputeStressTally::compute_scalar() { invoked_scalar = update->ntimestep; - if ((did_compute != invoked_scalar) || (update->eflag_global != invoked_scalar)) + if ((did_setup != invoked_scalar) + || (update->eflag_global != invoked_scalar)) error->all(FLERR,"Energy was not tallied on needed timestep"); // sum accumulated forces across procs @@ -211,7 +208,8 @@ double ComputeStressTally::compute_scalar() void ComputeStressTally::compute_peratom() { invoked_peratom = update->ntimestep; - if ((did_compute != invoked_peratom) || (update->eflag_global != invoked_peratom)) + if ((did_setup != invoked_peratom) + || (update->eflag_global != invoked_peratom)) error->all(FLERR,"Energy was not tallied on needed timestep"); // collect contributions from ghost atoms diff --git a/src/USER-TALLY/compute_stress_tally.h b/src/USER-TALLY/compute_stress_tally.h index a677d2eef6b58909a104e272af70e9493df8e79c..22f27a4a41b4e04953b20a61b6f311e1e9b9d9b7 100644 --- a/src/USER-TALLY/compute_stress_tally.h +++ b/src/USER-TALLY/compute_stress_tally.h @@ -39,15 +39,16 @@ class ComputeStressTally : public Compute { void unpack_reverse_comm(int, int *, double *); double memory_usage(); + void pair_setup_callback(int, int); void pair_tally_callback(int, int, int, int, double, double, double, double, double, double); private: - bigint did_compute; + bigint did_setup; int nmax,igroup2,groupbit2; double **stress; - double virial[6]; + double *virial; }; } diff --git a/src/atom.cpp b/src/atom.cpp index 6fa1cd8ef8db1def4e705e371c42b5bfd9d7ac4b..df4db0a84205a8499ad78e85123ff72cd09b3c49 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -40,6 +40,10 @@ #include "memory.h" #include "error.h" +#ifdef LMP_USER_INTEL +#include "neigh_request.h" +#endif + using namespace LAMMPS_NS; using namespace MathConst; @@ -1882,6 +1886,53 @@ void Atom::setup_sort_bins() bininvy = nbiny / (bboxhi[1]-bboxlo[1]); bininvz = nbinz / (bboxhi[2]-bboxlo[2]); + #ifdef LMP_USER_INTEL + int intel_neigh = 0; + if (neighbor->nrequest) { + if (neighbor->requests[0]->intel) intel_neigh = 1; + } else if (neighbor->old_nrequest) + if (neighbor->old_requests[0]->intel) intel_neigh = 1; + if (intel_neigh && userbinsize == 0.0) { + if (neighbor->binsizeflag) bininv = 1.0/neighbor->binsize_user; + + double nx_low = neighbor->bboxlo[0]; + double ny_low = neighbor->bboxlo[1]; + double nz_low = neighbor->bboxlo[2]; + double nxbbox = neighbor->bboxhi[0] - nx_low; + double nybbox = neighbor->bboxhi[1] - ny_low; + double nzbbox = neighbor->bboxhi[2] - nz_low; + int nnbinx = static_cast<int> (nxbbox * bininv); + int nnbiny = static_cast<int> (nybbox * bininv); + int nnbinz = static_cast<int> (nzbbox * bininv); + if (domain->dimension == 2) nnbinz = 1; + + if (nnbinx == 0) nnbinx = 1; + if (nnbiny == 0) nnbiny = 1; + if (nnbinz == 0) nnbinz = 1; + + double binsizex = nxbbox/nnbinx; + double binsizey = nybbox/nnbiny; + double binsizez = nzbbox/nnbinz; + + bininvx = 1.0 / binsizex; + bininvy = 1.0 / binsizey; + bininvz = 1.0 / binsizez; + + int lxo = (bboxlo[0] - nx_low) * bininvx; + int lyo = (bboxlo[1] - ny_low) * bininvy; + int lzo = (bboxlo[2] - nz_low) * bininvz; + bboxlo[0] = nx_low + static_cast<double>(lxo) / bininvx; + bboxlo[1] = ny_low + static_cast<double>(lyo) / bininvy; + bboxlo[2] = nz_low + static_cast<double>(lzo) / bininvz; + nbinx = static_cast<int>((bboxhi[0] - bboxlo[0]) * bininvx) + 1; + nbiny = static_cast<int>((bboxhi[1] - bboxlo[1]) * bininvy) + 1; + nbinz = static_cast<int>((bboxhi[2] - bboxlo[2]) * bininvz) + 1; + bboxhi[0] = bboxlo[0] + static_cast<double>(nbinx) / bininvx; + bboxhi[1] = bboxlo[1] + static_cast<double>(nbiny) / bininvy; + bboxhi[2] = bboxlo[2] + static_cast<double>(nbinz) / bininvz; + } + #endif + if (1.0*nbinx*nbiny*nbinz > INT_MAX) error->one(FLERR,"Too many atom sorting bins"); diff --git a/src/change_box.cpp b/src/change_box.cpp index add1fe1d0a49b57c128b945729c25cd8e03d8272..c6ec078a01f647d4059b692b03250bc5bd52012d 100644 --- a/src/change_box.cpp +++ b/src/change_box.cpp @@ -316,6 +316,9 @@ void ChangeBox::command(int narg, char **arg) } else if (ops[m].style == REMAP) { + if (modify->check_rigid_group_overlap(groupbit)) + error->warning(FLERR,"Attempting to remap atoms in rigid bodies"); + // convert atoms to lamda coords, using last box state // convert atoms back to box coords, using current box state // save current box state diff --git a/src/comm.cpp b/src/comm.cpp index 871675ca8dc52ea1259a58a09f0afb869f168a71..88edf98ca60dffa3a646f2107ab144164aa2e228 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -678,10 +678,12 @@ int Comm::binary(double value, int n, double *vec) using original inbuf, which may have been updated for non-NULL outbuf, final updated inbuf is copied to it ok to specify outbuf = inbuf + the ptr argument is a pointer to the instance of calling class ------------------------------------------------------------------------- */ void Comm::ring(int n, int nper, void *inbuf, int messtag, - void (*callback)(int, char *), void *outbuf, int self) + void (*callback)(int, char *, void *), + void *outbuf, void *ptr, int self) { MPI_Request request; MPI_Status status; @@ -712,7 +714,7 @@ void Comm::ring(int n, int nper, void *inbuf, int messtag, MPI_Get_count(&status,MPI_CHAR,&nbytes); memcpy(buf,bufcopy,nbytes); } - if (self || loop < nprocs-1) callback(nbytes/nper,buf); + if (self || loop < nprocs-1) callback(nbytes/nper,buf,ptr); } if (outbuf) memcpy(outbuf,buf,nbytes); diff --git a/src/comm.h b/src/comm.h index 15b42111d897693de9fd916863e0c24be01d34b4..b0e71f54355d59a666b2de0d1f9719fe25dec30b 100644 --- a/src/comm.h +++ b/src/comm.h @@ -105,8 +105,8 @@ class Comm : protected Pointers { // non-virtual functions common to all Comm styles - void ring(int, int, void *, int, void (*)(int, char *), - void *, int self = 1); + void ring(int, int, void *, int, void (*)(int, char *, void *), + void *, void *, int self = 1); int read_lines_from_file(FILE *, int, int, char *); int read_lines_from_file_universe(FILE *, int, int, char *); diff --git a/src/compute.h b/src/compute.h index 7f12cd97e20d1c25b5d6b4e9eddffd81cd2657a5..f04ffebd61856d6637ad7b0c7a77f120526f5df7 100644 --- a/src/compute.h +++ b/src/compute.h @@ -135,6 +135,7 @@ class Compute : protected Pointers { virtual double memory_usage() {return 0.0;} + virtual void pair_setup_callback(int, int) {} virtual void pair_tally_callback(int, int, int, int, double, double, double, double, double, double) {} @@ -152,7 +153,7 @@ class Compute : protected Pointers { double **vbiasall; // stored velocity bias for all atoms int maxbias; // size of vbiasall array - inline int sbmask(int j) { + inline int sbmask(int j) const { return j >> SBBITS & 3; } diff --git a/src/compute_chunk_atom.cpp b/src/compute_chunk_atom.cpp index ac35629d08109324c9a117f4c7b997a5cbae7f94..f1052bb85acb39931a205c9edf1a9228a920310b 100644 --- a/src/compute_chunk_atom.cpp +++ b/src/compute_chunk_atom.cpp @@ -49,10 +49,6 @@ enum{LIMITMAX,LIMITEXACT}; #define IDMAX 1024*1024 #define INVOKED_PERATOM 8 -// allocate space for static class variable - -ComputeChunkAtom *ComputeChunkAtom::cptr; - /* ---------------------------------------------------------------------- */ ComputeChunkAtom::ComputeChunkAtom(LAMMPS *lmp, int narg, char **arg) : @@ -1088,8 +1084,7 @@ void ComputeChunkAtom::compress_chunk_ids() memory->destroy(listall); } else { - cptr = this; - comm->ring(n,sizeof(int),list,1,idring,NULL,0); + comm->ring(n,sizeof(int),list,1,idring,NULL,(void *)this,0); } memory->destroy(list); @@ -1121,8 +1116,9 @@ void ComputeChunkAtom::compress_chunk_ids() hash ends up storing all unique IDs across all procs ------------------------------------------------------------------------- */ -void ComputeChunkAtom::idring(int n, char *cbuf) +void ComputeChunkAtom::idring(int n, char *cbuf, void *ptr) { + ComputeChunkAtom *cptr = (ComputeChunkAtom *)ptr; tagint *list = (tagint *) cbuf; std::map<tagint,int> *hash = cptr->hash; for (int i = 0; i < n; i++) (*hash)[list[i]] = 0; diff --git a/src/compute_chunk_atom.h b/src/compute_chunk_atom.h index 9c64e9bc7a37587c5f3647f6bdb388a1ebd18f82..59c93b38f3798bcd59836e8c351410fcec9add07 100644 --- a/src/compute_chunk_atom.h +++ b/src/compute_chunk_atom.h @@ -107,11 +107,9 @@ class ComputeChunkAtom : public Compute { int *exclude; // 1 if atom is not assigned to any chunk std::map<tagint,int> *hash; // store original chunks IDs before compression - // static variable for ring communication callback to access class data - // callback functions for ring communication + // callback function for ring communication - static ComputeChunkAtom *cptr; - static void idring(int, char *); + static void idring(int, char *, void *); void assign_chunk_ids(); void compress_chunk_ids(); diff --git a/src/compute_hexorder_atom.cpp b/src/compute_hexorder_atom.cpp index 93b84080bc90a23a766e5db0031d65ce679ebb5d..013036f364a83320f0296d319b5a3c7c54492e41 100644 --- a/src/compute_hexorder_atom.cpp +++ b/src/compute_hexorder_atom.cpp @@ -248,7 +248,7 @@ inline void ComputeHexOrderAtom::calc_qn_complex(double delx, double dely, doubl double x = delx*rinv; double y = dely*rinv; std::complex<double> z(x, y); - std::complex<double> zn = pow(z, nnn); + std::complex<double> zn = pow(z, ndegree); u = real(zn); v = imag(zn); } @@ -259,9 +259,9 @@ inline void ComputeHexOrderAtom::calc_qn_complex(double delx, double dely, doubl inline void ComputeHexOrderAtom::calc_qn_trig(double delx, double dely, double &u, double &v) { double ntheta; if(fabs(delx) <= MY_EPSILON) { - if(dely > 0.0) ntheta = nnn * MY_PI / 2.0; - else ntheta = nnn * 3.0 * MY_PI / 2.0; - } else ntheta = nnn * atan(dely / delx); + if(dely > 0.0) ntheta = ndegree * MY_PI / 2.0; + else ntheta = ndegree * 3.0 * MY_PI / 2.0; + } else ntheta = ndegree * atan(dely / delx); u = cos(ntheta); v = sin(ntheta); } diff --git a/src/compute_orientorder_atom.cpp b/src/compute_orientorder_atom.cpp index 43f13ecc32607a3e49dd2028fb4c8c072b439920..90e2830e39e55881884bb6cb57f137ec387f62bf 100644 --- a/src/compute_orientorder_atom.cpp +++ b/src/compute_orientorder_atom.cpp @@ -102,23 +102,22 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg if (qlist[iw] > qmax) qmax = qlist[iw]; } iarg += nqlist; - if (strcmp(arg[iarg],"components") == 0) { - qlcompflag = 1; - if (iarg+2 > narg) - error->all(FLERR,"Illegal compute orientorder/atom command"); - qlcomp = force->numeric(FLERR,arg[iarg+1]); - if (qlcomp <= 0) - error->all(FLERR,"Illegal compute orientorder/atom command"); - iqlcomp = -1; - for (int iw = 0; iw < nqlist; iw++) - if (qlcomp == qlist[iw]) { - iqlcomp = iw; - break; - } - if (iqlcomp < 0) - error->all(FLERR,"Illegal compute orientorder/atom command"); - iarg += 2; - } + } else if (strcmp(arg[iarg],"components") == 0) { + qlcompflag = 1; + if (iarg+2 > narg) + error->all(FLERR,"Illegal compute orientorder/atom command"); + qlcomp = force->numeric(FLERR,arg[iarg+1]); + if (qlcomp <= 0) + error->all(FLERR,"Illegal compute orientorder/atom command"); + iqlcomp = -1; + for (int iw = 0; iw < nqlist; iw++) + if (qlcomp == qlist[iw]) { + iqlcomp = iw; + break; + } + if (iqlcomp < 0) + error->all(FLERR,"Illegal compute orientorder/atom command"); + iarg += 2; } else if (strcmp(arg[iarg],"cutoff") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal compute orientorder/atom command"); diff --git a/src/create_bonds.h b/src/create_bonds.h index 2936506b3fd0afac9b08557d8e40cf382addc325..24b1596e37ad68b8d6d5115a2a5e124a922e5a7f 100644 --- a/src/create_bonds.h +++ b/src/create_bonds.h @@ -30,7 +30,7 @@ class CreateBonds : protected Pointers { void command(int, char **); private: - inline int sbmask(int j) { + inline int sbmask(int j) const { return j >> SBBITS & 3; } }; diff --git a/src/delete_atoms.cpp b/src/delete_atoms.cpp index 116c2a2f1e057431644c82b2747d7d95eac6095c..825426b2b6759734da67a39fea0342c0305ab70b 100644 --- a/src/delete_atoms.cpp +++ b/src/delete_atoms.cpp @@ -33,10 +33,6 @@ using namespace LAMMPS_NS; -// allocate space for static class variable - -DeleteAtoms *DeleteAtoms::cptr; - /* ---------------------------------------------------------------------- */ DeleteAtoms::DeleteAtoms(LAMMPS *lmp) : Pointers(lmp) {} @@ -69,6 +65,15 @@ void DeleteAtoms::command(int narg, char **arg) else if (strcmp(arg[0],"porosity") == 0) delete_porosity(narg,arg); else error->all(FLERR,"Illegal delete_atoms command"); + if (allflag) { + int igroup = group->find("all"); + if ((igroup >= 0) && modify->check_rigid_group_overlap(group->bitmask[igroup])) + error->warning(FLERR,"Attempting to delete atoms in rigid bodies"); + } else { + if (modify->check_rigid_list_overlap(dlist)) + error->warning(FLERR,"Attempting to delete atoms in rigid bodies"); + } + // if allflag = 1, just reset atom->nlocal // else delete atoms one by one @@ -89,16 +94,16 @@ void DeleteAtoms::command(int narg, char **arg) int i = 0; while (i < nlocal) { if (dlist[i]) { - avec->copy(nlocal-1,i,1); - dlist[i] = dlist[nlocal-1]; - nlocal--; + avec->copy(nlocal-1,i,1); + dlist[i] = dlist[nlocal-1]; + nlocal--; } else i++; } - + atom->nlocal = nlocal; memory->destroy(dlist); } - + // if non-molecular system and compress flag set, // reset atom tags to be contiguous // set all atom IDs to 0, call tag_extend() @@ -201,7 +206,7 @@ void DeleteAtoms::delete_group(int narg, char **arg) allflag = 1; return; } - + // allocate and initialize deletion list int nlocal = atom->nlocal; @@ -464,8 +469,7 @@ void DeleteAtoms::delete_bond() for (int i = 0; i < nlocal; i++) if (dlist[i]) list[n++] = tag[i]; - cptr = this; - comm->ring(n,sizeof(tagint),list,1,bondring,NULL); + comm->ring(n,sizeof(tagint),list,1,bondring,NULL,(void *)this); delete hash; memory->destroy(list); @@ -503,8 +507,7 @@ void DeleteAtoms::delete_molecule() std::map<tagint,int>::iterator pos; for (pos = hash->begin(); pos != hash->end(); ++pos) list[n++] = pos->first; - cptr = this; - comm->ring(n,sizeof(tagint),list,1,molring,NULL); + comm->ring(n,sizeof(tagint),list,1,molring,NULL,(void *)this); delete hash; memory->destroy(list); @@ -576,37 +579,38 @@ void DeleteAtoms::recount_topology() callback from comm->ring() in delete_bond() ------------------------------------------------------------------------- */ -void DeleteAtoms::bondring(int nbuf, char *cbuf) +void DeleteAtoms::bondring(int nbuf, char *cbuf, void *ptr) { + DeleteAtoms *daptr = (DeleteAtoms *) ptr; tagint *list = (tagint *) cbuf; - std::map<tagint,int> *hash = cptr->hash; + std::map<tagint,int> *hash = daptr->hash; - int *num_bond = cptr->atom->num_bond; - int *num_angle = cptr->atom->num_angle; - int *num_dihedral = cptr->atom->num_dihedral; - int *num_improper = cptr->atom->num_improper; + int *num_bond = daptr->atom->num_bond; + int *num_angle = daptr->atom->num_angle; + int *num_dihedral = daptr->atom->num_dihedral; + int *num_improper = daptr->atom->num_improper; - int **bond_type = cptr->atom->bond_type; - tagint **bond_atom = cptr->atom->bond_atom; + int **bond_type = daptr->atom->bond_type; + tagint **bond_atom = daptr->atom->bond_atom; - int **angle_type = cptr->atom->angle_type; - tagint **angle_atom1 = cptr->atom->angle_atom1; - tagint **angle_atom2 = cptr->atom->angle_atom2; - tagint **angle_atom3 = cptr->atom->angle_atom3; + int **angle_type = daptr->atom->angle_type; + tagint **angle_atom1 = daptr->atom->angle_atom1; + tagint **angle_atom2 = daptr->atom->angle_atom2; + tagint **angle_atom3 = daptr->atom->angle_atom3; - int **dihedral_type = cptr->atom->dihedral_type; - tagint **dihedral_atom1 = cptr->atom->dihedral_atom1; - tagint **dihedral_atom2 = cptr->atom->dihedral_atom2; - tagint **dihedral_atom3 = cptr->atom->dihedral_atom3; - tagint **dihedral_atom4 = cptr->atom->dihedral_atom4; + int **dihedral_type = daptr->atom->dihedral_type; + tagint **dihedral_atom1 = daptr->atom->dihedral_atom1; + tagint **dihedral_atom2 = daptr->atom->dihedral_atom2; + tagint **dihedral_atom3 = daptr->atom->dihedral_atom3; + tagint **dihedral_atom4 = daptr->atom->dihedral_atom4; - int **improper_type = cptr->atom->improper_type; - tagint **improper_atom1 = cptr->atom->improper_atom1; - tagint **improper_atom2 = cptr->atom->improper_atom2; - tagint **improper_atom3 = cptr->atom->improper_atom3; - tagint **improper_atom4 = cptr->atom->improper_atom4; + int **improper_type = daptr->atom->improper_type; + tagint **improper_atom1 = daptr->atom->improper_atom1; + tagint **improper_atom2 = daptr->atom->improper_atom2; + tagint **improper_atom3 = daptr->atom->improper_atom3; + tagint **improper_atom4 = daptr->atom->improper_atom4; - int nlocal = cptr->atom->nlocal; + int nlocal = daptr->atom->nlocal; // cbuf = list of N deleted atom IDs from other proc, put them in hash @@ -692,13 +696,14 @@ void DeleteAtoms::bondring(int nbuf, char *cbuf) callback from comm->ring() in delete_molecule() ------------------------------------------------------------------------- */ -void DeleteAtoms::molring(int n, char *cbuf) +void DeleteAtoms::molring(int n, char *cbuf, void *ptr) { + DeleteAtoms *daptr = (DeleteAtoms *)ptr; tagint *list = (tagint *) cbuf; - int *dlist = cptr->dlist; - std::map<tagint,int> *hash = cptr->hash; - int nlocal = cptr->atom->nlocal; - tagint *molecule = cptr->atom->molecule; + int *dlist = daptr->dlist; + std::map<tagint,int> *hash = daptr->hash; + int nlocal = daptr->atom->nlocal; + tagint *molecule = daptr->atom->molecule; // cbuf = list of N molecule IDs from other proc, put them in hash diff --git a/src/delete_atoms.h b/src/delete_atoms.h index 62ba47d715d47efab94161362d21c5dcfa542847..9a091433a8f477f8674d60ee42abb6c5ab4421be 100644 --- a/src/delete_atoms.h +++ b/src/delete_atoms.h @@ -45,16 +45,14 @@ class DeleteAtoms : protected Pointers { void recount_topology(); void options(int, char **); - inline int sbmask(int j) { + inline int sbmask(int j) const { return j >> SBBITS & 3; } - // static variable for ring communication callback to access class data // callback functions for ring communication - static DeleteAtoms *cptr; - static void bondring(int, char *); - static void molring(int, char *); + static void bondring(int, char *, void *); + static void molring(int, char *, void *); }; } diff --git a/src/displace_atoms.cpp b/src/displace_atoms.cpp index 7db7e218391ff2c28f0a902b6a59e595c9877da8..a9aa5cf8558aa2c620d9e5ce99ab882d7eb7af4b 100644 --- a/src/displace_atoms.cpp +++ b/src/displace_atoms.cpp @@ -75,6 +75,9 @@ void DisplaceAtoms::command(int narg, char **arg) if (igroup == -1) error->all(FLERR,"Could not find displace_atoms group ID"); groupbit = group->bitmask[igroup]; + if (modify->check_rigid_group_overlap(groupbit)) + error->warning(FLERR,"Attempting to displace atoms in rigid bodies"); + int style = -1; if (strcmp(arg[1],"move") == 0) style = MOVE; else if (strcmp(arg[1],"ramp") == 0) style = RAMP; diff --git a/src/domain.cpp b/src/domain.cpp index 8ead12cd4e2b0f6e1289dfaf17c1614e2e334b05..427f7785e816c19d8d309abd35b8149113994691 100644 --- a/src/domain.cpp +++ b/src/domain.cpp @@ -60,6 +60,7 @@ enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files Domain::Domain(LAMMPS *lmp) : Pointers(lmp) { box_exist = 0; + box_change = 0; dimension = 3; nonperiodic = 0; @@ -1697,6 +1698,7 @@ int Domain::ownatom(int id, double *x, imageint *image, int shrinkexceed) void Domain::set_lattice(int narg, char **arg) { if (lattice) delete lattice; + lattice = NULL; lattice = new Lattice(lmp,narg,arg); } diff --git a/src/dump.cpp b/src/dump.cpp index f8896c8fee91cc8638a7559df43780b32a632e5c..44098298ba53918e9fb38b61a12ae97e46682eda 100644 --- a/src/dump.cpp +++ b/src/dump.cpp @@ -30,9 +30,12 @@ using namespace LAMMPS_NS; +#if defined(LMP_QSORT) // allocate space for static class variable - Dump *Dump::dumpptr; +#else +#include "mergesort.h" +#endif #define BIG 1.0e20 #define EPSILON 1.0e-6 @@ -82,7 +85,7 @@ Dump::Dump(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) buffer_flag = 0; padflag = 0; pbcflag = 0; - + maxbuf = maxids = maxsort = maxproc = 0; buf = bufsort = NULL; ids = idsort = NULL; @@ -168,13 +171,13 @@ Dump::~Dump() delete irregular; memory->destroy(sbuf); - + if (pbcflag) { memory->destroy(xpbc); memory->destroy(vpbc); memory->destroy(imagepbc); } - + if (multiproc) MPI_Comm_free(&clustercomm); // XTC style sets fp to NULL since it closes file in its destructor @@ -275,7 +278,7 @@ void Dump::init() } // preallocation for PBC copies if requested - + if (pbcflag && atom->nlocal > maxpbc) pbc_allocate(); } @@ -388,7 +391,7 @@ void Dump::write() atom->image = imagepbc; domain->pbc(); } - + // pack my data into buf // if sorting on IDs also request ID list from pack() // sort buf as needed @@ -689,6 +692,7 @@ void Dump::sort() index[idsort[i]-idlo] = i; } +#if defined(LMP_QSORT) if (!reorderflag) { dumpptr = this; for (i = 0; i < nme; i++) index[i] = i; @@ -696,6 +700,14 @@ void Dump::sort() else if (sortorder == ASCEND) qsort(index,nme,sizeof(int),bufcompare); else qsort(index,nme,sizeof(int),bufcompare_reverse); } +#else + if (!reorderflag) { + for (i = 0; i < nme; i++) index[i] = i; + if (sortcol == 0) merge_sort(index,nme,(void *)this,idcompare); + else if (sortorder == ASCEND) merge_sort(index,nme,(void *)this,bufcompare); + else merge_sort(index,nme,(void *)this,bufcompare_reverse); + } +#endif // reset buf size and maxbuf to largest of any post-sort nme values // this insures proc 0 can receive everyone's info @@ -716,6 +728,8 @@ void Dump::sort() memcpy(&buf[i*size_one],&bufsort[index[i]*size_one],nbytes); } +#if defined(LMP_QSORT) + /* ---------------------------------------------------------------------- compare two atom IDs called via qsort() in sort() method @@ -776,6 +790,65 @@ int Dump::bufcompare_reverse(const void *pi, const void *pj) return 0; } +#else + +/* ---------------------------------------------------------------------- + compare two atom IDs + called via merge_sort() in sort() method +------------------------------------------------------------------------- */ + +int Dump::idcompare(const int i, const int j, void *ptr) +{ + tagint *idsort = ((Dump *)ptr)->idsort; + if (idsort[i] < idsort[j]) return -1; + else if (idsort[i] > idsort[j]) return 1; + else return 0; +} + +/* ---------------------------------------------------------------------- + compare two buffer values with size_one stride + called via merge_sort() in sort() method + sort in ASCENDing order +------------------------------------------------------------------------- */ + +int Dump::bufcompare(const int i, const int j, void *ptr) +{ + Dump *dptr = (Dump *) ptr; + double *bufsort = dptr->bufsort; + const int size_one = dptr->size_one; + const int sortcolm1 = dptr->sortcolm1; + + const int ii=i*size_one + sortcolm1; + const int jj=j*size_one + sortcolm1; + + if (bufsort[ii] < bufsort[jj]) return -1; + else if (bufsort[ii] > bufsort[jj]) return 1; + else return 0; +} + +/* ---------------------------------------------------------------------- + compare two buffer values with size_one stride + called via merge_sort() in sort() method + sort in DESCENDing order +------------------------------------------------------------------------- */ + +int Dump::bufcompare_reverse(const int i, const int j, void *ptr) +{ + Dump *dptr = (Dump *) ptr; + double *bufsort = dptr->bufsort; + const int size_one = dptr->size_one; + const int sortcolm1 = dptr->sortcolm1; + + const int ii=i*size_one + sortcolm1; + const int jj=j*size_one + sortcolm1; + + if (bufsort[ii] < bufsort[jj]) return 1; + else if (bufsort[ii] > bufsort[jj]) return -1; + else return 0; +} + +#endif + /* ---------------------------------------------------------------------- process params common to all dumps here if unknown param, call modify_param specific to the dump diff --git a/src/dump.h b/src/dump.h index c4d93352013d50cb8f27878d71bfb3855e5969b8..3c1450854ecb4eb0df1cd6216c9b84931b91bb0f 100644 --- a/src/dump.h +++ b/src/dump.h @@ -33,9 +33,10 @@ class Dump : protected Pointers { int comm_forward; // size of forward communication (0 if none) int comm_reverse; // size of reverse communication (0 if none) +#if defined(LMP_USE_LIBC_QSORT) // static variable across all Dump objects - static Dump *dumpptr; // holds a ptr to Dump currently being used +#endif Dump(class LAMMPS *, int, char **); virtual ~Dump(); @@ -132,11 +133,17 @@ class Dump : protected Pointers { virtual int convert_string(int, double *) {return 0;} virtual void write_data(int, double *) = 0; void pbc_allocate(); - + void sort(); +#if defined(LMP_USE_LIBC_QSORT) static int idcompare(const void *, const void *); static int bufcompare(const void *, const void *); static int bufcompare_reverse(const void *, const void *); +#else + static int idcompare(const int, const int, void *); + static int bufcompare(const int, const int, void *); + static int bufcompare_reverse(const int, const int, void *); +#endif }; } diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index 171a90ba3a374682c8df92f30378183cccc9b6a3..705f1970afa1d15658ed014dd3168f24d5b96b7a 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -37,7 +37,7 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -enum{NONE,FINAL,DELTA,SCALE,VEL,ERATE,TRATE,VOLUME,WIGGLE,VARIABLE}; +enum{NONE=0,FINAL,DELTA,SCALE,VEL,ERATE,TRATE,VOLUME,WIGGLE,VARIABLE}; enum{ONE_FROM_ONE,ONE_FROM_TWO,TWO_FROM_ONE}; // same as domain.cpp, fix_nvt_sllod.cpp, compute_temp_deform.cpp @@ -52,6 +52,7 @@ rfix(NULL), irregular(NULL), set(NULL) if (narg < 4) error->all(FLERR,"Illegal fix deform command"); no_change_box = 1; + restart_global = 1; nevery = force->inumeric(FLERR,arg[3]); if (nevery <= 0) error->all(FLERR,"Illegal fix deform command"); @@ -59,12 +60,7 @@ rfix(NULL), irregular(NULL), set(NULL) // set defaults set = new Set[6]; - set[0].style = set[1].style = set[2].style = - set[3].style = set[4].style = set[5].style = NONE; - set[0].hstr = set[1].hstr = set[2].hstr = - set[3].hstr = set[4].hstr = set[5].hstr = NULL; - set[0].hratestr = set[1].hratestr = set[2].hratestr = - set[3].hratestr = set[4].hratestr = set[5].hratestr = NULL; + memset(set,0,6*sizeof(Set)); // parse arguments @@ -343,11 +339,9 @@ rfix(NULL), irregular(NULL), set(NULL) set[i].hi_initial = domain->boxhi[i]; set[i].vol_initial = domain->xprd * domain->yprd * domain->zprd; } - for (int i = 3; i < 6; i++) { - if (i == 5) set[i].tilt_initial = domain->xy; - else if (i == 4) set[i].tilt_initial = domain->xz; - else if (i == 3) set[i].tilt_initial = domain->yz; - } + set[3].tilt_initial = domain->yz; + set[4].tilt_initial = domain->xz; + set[5].tilt_initial = domain->xy; // reneighboring only forced if flips can occur due to shape changes @@ -955,6 +949,43 @@ void FixDeform::end_of_step() if (kspace_flag) force->kspace->setup(); } +/* ---------------------------------------------------------------------- + write Set data to restart file +------------------------------------------------------------------------- */ + +void FixDeform::write_restart(FILE *fp) +{ + if (comm->me == 0) { + int size = 6*sizeof(Set); + fwrite(&size,sizeof(int),1,fp); + fwrite(set,sizeof(Set),6,fp); + } +} + +/* ---------------------------------------------------------------------- + use selected state info from restart file to restart the Fix +------------------------------------------------------------------------- */ + +void FixDeform::restart(char *buf) +{ + int samestyle = 1; + Set *set_restart = (Set *) buf; + for (int i=0; i<6; ++i) { + // restore data from initial state + set[i].lo_initial = set_restart[i].lo_initial; + set[i].hi_initial = set_restart[i].hi_initial; + set[i].vol_initial = set_restart[i].vol_initial; + set[i].tilt_initial = set_restart[i].tilt_initial; + // check if style settings are consitent (should do the whole set?) + if (set[i].style != set_restart[i].style) + samestyle = 0; + if (set[i].substyle != set_restart[i].substyle) + samestyle = 0; + } + if (!samestyle) + error->all(FLERR,"Fix deform settings not consistent with restart"); +} + /* ---------------------------------------------------------------------- */ void FixDeform::options(int narg, char **arg) diff --git a/src/fix_deform.h b/src/fix_deform.h index cdda1b8547c0eb735b5e17e00957f14d915bb74f..4d440eb3c7a97d6dfd06b415ddf46f7a04f472f8 100644 --- a/src/fix_deform.h +++ b/src/fix_deform.h @@ -35,6 +35,8 @@ class FixDeform : public Fix { void init(); virtual void pre_exchange(); virtual void end_of_step(); + virtual void write_restart(FILE *); + virtual void restart(char *buf); double memory_usage(); protected: diff --git a/src/fix_heat.cpp b/src/fix_heat.cpp index d41aa4abea90d36adf7ee59f00441fa6d4ad47fd..97e0ed6a7fd76662087d4db5415d9808ec535c39 100644 --- a/src/fix_heat.cpp +++ b/src/fix_heat.cpp @@ -64,7 +64,7 @@ idregion(NULL), hstr(NULL), vheat(NULL), vscale(NULL) // optional args iregion = -1; - + int iarg = 5; while (iarg < narg) { if (strcmp(arg[iarg],"region") == 0) { @@ -126,6 +126,10 @@ void FixHeat::init() else error->all(FLERR,"Variable for fix heat is invalid style"); } + // check for rigid bodies in region (done here for performance reasons) + if (modify->check_rigid_region_overlap(groupbit,domain->regions[iregion])) + error->warning(FLERR,"Cannot apply fix heat to atoms in rigid bodies"); + // cannot have 0 atoms in group if (group->count(igroup) == 0) diff --git a/src/fix_lineforce.cpp b/src/fix_lineforce.cpp index f82ed957f73bbfd2f2fe54b9b41746af161564ca..1e78bf3ec390130d229509bdec35d17d4a52e9c1 100644 --- a/src/fix_lineforce.cpp +++ b/src/fix_lineforce.cpp @@ -29,6 +29,8 @@ using namespace FixConst; FixLineForce::FixLineForce(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { + dynamic_group_allow = 1; + if (narg != 6) error->all(FLERR,"Illegal fix lineforce command"); xdir = force->numeric(FLERR,arg[3]); ydir = force->numeric(FLERR,arg[4]); diff --git a/src/fix_planeforce.cpp b/src/fix_planeforce.cpp index 872bd9871610786074b8dd23bb3d18a7a42547e0..5e999c888cfdc9e2ab2b30a4e89bb8e9146f6da3 100644 --- a/src/fix_planeforce.cpp +++ b/src/fix_planeforce.cpp @@ -29,6 +29,8 @@ using namespace FixConst; FixPlaneForce::FixPlaneForce(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { + dynamic_group_allow = 1; + if (narg != 6) error->all(FLERR,"Illegal fix planeforce command"); xdir = force->numeric(FLERR,arg[3]); ydir = force->numeric(FLERR,arg[4]); diff --git a/src/fix_temp_berendsen.cpp b/src/fix_temp_berendsen.cpp index aff9a44977606a1b46716bed2f3e1b47f3b729ff..7b312cfb5f35dbc1777aeb9e63c3a530ed1e9d9c 100644 --- a/src/fix_temp_berendsen.cpp +++ b/src/fix_temp_berendsen.cpp @@ -128,6 +128,9 @@ void FixTempBerendsen::init() error->all(FLERR,"Temperature ID for fix temp/berendsen does not exist"); temperature = modify->compute[icompute]; + if (modify->check_rigid_group_overlap(groupbit)) + error->warning(FLERR,"Cannot thermostat atoms in rigid bodies"); + if (temperature->tempbias) which = BIAS; else which = NOBIAS; } diff --git a/src/fix_temp_csld.cpp b/src/fix_temp_csld.cpp index f24314ac80ef07d8a520816530deebb28d661d0d..63f27cdecb17968ecbc6f265ef78a45018a70535 100644 --- a/src/fix_temp_csld.cpp +++ b/src/fix_temp_csld.cpp @@ -155,6 +155,9 @@ void FixTempCSLD::init() error->all(FLERR,"Temperature ID for fix temp/csld does not exist"); temperature = modify->compute[icompute]; + if (modify->check_rigid_group_overlap(groupbit)) + error->warning(FLERR,"Cannot thermostat atoms in rigid bodies"); + if (temperature->tempbias) which = BIAS; else which = NOBIAS; } diff --git a/src/group.cpp b/src/group.cpp index 973fcbdcceea6a61f1073fc9889121dbdf472f36..76275f301d34272e6e6d1c8572c37840eb6c73f6 100644 --- a/src/group.cpp +++ b/src/group.cpp @@ -45,10 +45,6 @@ enum{LT,LE,GT,GE,EQ,NEQ,BETWEEN}; #define BIG 1.0e20 -// allocate space for static class variable - -Group *Group::cptr; - /* ---------------------------------------------------------------------- initialize group memory ------------------------------------------------------------------------- */ @@ -654,9 +650,8 @@ void Group::add_molecules(int igroup, int bit) std::map<tagint,int>::iterator pos; for (pos = hash->begin(); pos != hash->end(); ++pos) list[n++] = pos->first; - cptr = this; molbit = bit; - comm->ring(n,sizeof(tagint),list,1,molring,NULL); + comm->ring(n,sizeof(tagint),list,1,molring,NULL,(void *)this); delete hash; memory->destroy(list); @@ -669,14 +664,15 @@ void Group::add_molecules(int igroup, int bit) add atom to group flagged by molbit ------------------------------------------------------------------------- */ -void Group::molring(int n, char *cbuf) +void Group::molring(int n, char *cbuf, void *ptr) { + Group *gptr = (Group *) ptr; tagint *list = (tagint *) cbuf; - std::map<tagint,int> *hash = cptr->hash; - int nlocal = cptr->atom->nlocal; - tagint *molecule = cptr->atom->molecule; - int *mask = cptr->atom->mask; - int molbit = cptr->molbit; + std::map<tagint,int> *hash = gptr->hash; + int nlocal = gptr->atom->nlocal; + tagint *molecule = gptr->atom->molecule; + int *mask = gptr->atom->mask; + int molbit = gptr->molbit; hash->clear(); for (int i = 0; i < n; i++) (*hash)[list[i]] = 1; diff --git a/src/group.h b/src/group.h index 47114b1740cd1166c0ffde8dfe508b7f6deade9b..31e9b719f81bf14b35b76478f3fb2a3edac9dbfa 100644 --- a/src/group.h +++ b/src/group.h @@ -70,11 +70,9 @@ class Group : protected Pointers { int find_unused(); void add_molecules(int, int); - // static variable for ring communication callback to access class data // callback functions for ring communication - static Group *cptr; - static void molring(int, char *); + static void molring(int, char *, void *); int molbit; }; diff --git a/src/irregular.cpp b/src/irregular.cpp index d0210244fbd830b1f4d493c23af37907a8b3f4f4..6cd1b22c2fefc099d4ce1b976756a5fc53bc8941 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -23,11 +23,16 @@ using namespace LAMMPS_NS; +#if defined(LMP_QSORT) // allocate space for static class variable // prototype for non-class function - int *Irregular::proc_recv_copy; -int compare_standalone(const void *, const void *); +static int compare_standalone(const void *, const void *); +#else +#include "mergesort.h" +// prototype for non-class function +static int compare_standalone(const int, const int, void *); +#endif enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files @@ -423,8 +428,13 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) int *length_recv_ordered = new int[nrecv_proc]; for (i = 0; i < nrecv_proc; i++) order[i] = i; + +#if defined(LMP_QSORT) proc_recv_copy = proc_recv; qsort(order,nrecv_proc,sizeof(int),compare_standalone); +#else + merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone); +#endif int j; for (i = 0; i < nrecv_proc; i++) { @@ -450,6 +460,8 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) return nrecvsize; } +#if defined(LMP_QSORT) + /* ---------------------------------------------------------------------- comparison function invoked by qsort() accesses static class member proc_recv_copy, set before call to qsort() @@ -465,6 +477,23 @@ int compare_standalone(const void *iptr, const void *jptr) return 0; } +#else + +/* ---------------------------------------------------------------------- + comparison function invoked by merge_sort() + void pointer contains proc_recv list; +------------------------------------------------------------------------- */ + +int compare_standalone(const int i, const int j, void *ptr) +{ + int *proc_recv = (int *) ptr; + if (proc_recv[i] < proc_recv[j]) return -1; + if (proc_recv[i] > proc_recv[j]) return 1; + return 0; +} + +#endif + /* ---------------------------------------------------------------------- communicate atoms via PlanAtom sendbuf = list of atoms to send @@ -671,8 +700,13 @@ int Irregular::create_data(int n, int *proclist, int sortflag) int *num_recv_ordered = new int[nrecv_proc]; for (i = 0; i < nrecv_proc; i++) order[i] = i; + +#if defined(LMP_QSORT) proc_recv_copy = proc_recv; qsort(order,nrecv_proc,sizeof(int),compare_standalone); +#else + merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone); +#endif int j; for (i = 0; i < nrecv_proc; i++) { diff --git a/src/irregular.h b/src/irregular.h index ea0fee2eb83c4c3d0002633a362292d36a92757e..5b2a7718472dc6d35d8af16eaaf360cae77d6f33 100644 --- a/src/irregular.h +++ b/src/irregular.h @@ -21,9 +21,11 @@ namespace LAMMPS_NS { class Irregular : protected Pointers { public: +#if defined(LMP_USE_LIBC_QSORT) // static variable across all Irregular objects, for qsort callback static int *proc_recv_copy; +#endif Irregular(class LAMMPS *); ~Irregular(); diff --git a/src/mergesort.h b/src/mergesort.h new file mode 100644 index 0000000000000000000000000000000000000000..1df6cb4b819375e68fbdd2c745a4c7cee4a49009 --- /dev/null +++ b/src/mergesort.h @@ -0,0 +1,120 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_MERGESORT +#define LMP_MERGESORT + +#include <string.h> + +// custom hybrid upward merge sort implementation with support to pass +// an opaque pointer to the comparison function, e.g. for access to +// class members. this avoids having to use global variables. +// for improved performance, we employ an in-place insertion sort on +// chunks of up to 64 elements and switch to merge sort from then on. + +// part 1. insertion sort for pre-sorting of small chunks + +static void insertion_sort(int *index, int num, void *ptr, + int (*comp)(int, int, void*)) +{ + if (num < 2) return; + for (int i=1; i < num; ++i) { + int tmp = index[i]; + for (int j=i-1; j >= 0; --j) { + if ((*comp)(index[j],tmp,ptr) > 0) { + index[j+1] = index[j]; + } else { + index[j+1] = tmp; + break; + } + if (j == 0) index[0] = tmp; + } + } +} + +// part 2. merge two sublists + +static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi, + void *ptr, int (*comp)(int, int, void *)) +{ + int i = llo; + int l = llo; + int r = rlo; + while ((l < lhi) && (r < rhi)) { + if ((*comp)(buf[l],buf[r],ptr) < 0) + idx[i++] = buf[l++]; + else idx[i++] = buf[r++]; + } + + while (l < lhi) idx[i++] = buf[l++]; + while (r < rhi) idx[i++] = buf[r++]; +} + +// part 3: loop over sublists doubling in size with each iteration. +// pre-sort sublists with insertion sort for better performance. + +static void merge_sort(int *index, int num, void *ptr, + int (*comp)(int, int, void *)) +{ + if (num < 2) return; + + int chunk,i,j; + + // do insertion sort on chunks of up to 64 elements + + chunk = 64; + for (i=0; i < num; i += chunk) { + j = (i+chunk > num) ? num-i : chunk; + insertion_sort(index+i,j,ptr,comp); + } + + // already done? + + if (chunk >= num) return; + + // continue with merge sort on the pre-sorted chunks. + // we need an extra buffer for temporary storage and two + // pointers to operate on, so we can swap the pointers + // rather than copying to the hold buffer in each pass + + int *buf = new int[num]; + int *dest = index; + int *hold = buf; + + while (chunk < num) { + int m; + + // swap hold and destination buffer + + int *tmp = dest; dest = hold; hold = tmp; + + // merge from hold array to destiation array + + for (i=0; i < num-1; i += 2*chunk) { + j = i + 2*chunk; + if (j > num) j=num; + m = i+chunk; + if (m > num) m=num; + do_merge(dest,hold,i,m,m,j,ptr,comp); + } + chunk *= 2; + } + + // if the final sorted data is in buf, copy back to index + + if (dest == buf) memcpy(index,buf,sizeof(int)*num); + + delete[] buf; +} + +#endif diff --git a/src/modify.cpp b/src/modify.cpp index 7af4576038c33d855325fb224630658fa0e7764c..01de6b59284676069de3980362accd8aa35badd6 100644 --- a/src/modify.cpp +++ b/src/modify.cpp @@ -23,6 +23,7 @@ #include "group.h" #include "update.h" #include "domain.h" +#include "region.h" #include "input.h" #include "variable.h" #include "memory.h" @@ -995,6 +996,99 @@ int Modify::check_package(const char *package_fix_name) return 1; } + +/* ---------------------------------------------------------------------- + check if the group indicated by groupbit overlaps with any + currently existing rigid fixes. return 1 in this case otherwise 0 +------------------------------------------------------------------------- */ + +int Modify::check_rigid_group_overlap(int groupbit) +{ + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; + int dim; + + int n = 0; + for (int ifix = 0; ifix < nfix; ifix++) { + if (strncmp("rigid",fix[ifix]->style,5) == 0) { + const int * const body = (const int *)fix[ifix]->extract("body",dim); + if ((body == NULL) || (dim != 1)) break; + + for (int i=0; (i < nlocal) && (n == 0); ++i) + if ((mask[i] & groupbit) && (body[i] >= 0)) ++n; + } + } + + int n_all = 0; + MPI_Allreduce(&n,&n_all,1,MPI_INT,MPI_SUM,world); + + if (n_all > 0) return 1; + return 0; +} + +/* ---------------------------------------------------------------------- + check if the atoms in the group indicated by groupbit _and_ region + indicated by regionid overlap with any currently existing rigid fixes. + return 1 in this case, otherwise 0 +------------------------------------------------------------------------- */ + +int Modify::check_rigid_region_overlap(int groupbit, Region *reg) +{ + const int * const mask = atom->mask; + const double * const * const x = atom->x; + const int nlocal = atom->nlocal; + int dim; + + int n = 0; + reg->prematch(); + for (int ifix = 0; ifix < nfix; ifix++) { + if (strncmp("rigid",fix[ifix]->style,5) == 0) { + const int * const body = (const int *)fix[ifix]->extract("body",dim); + if ((body == NULL) || (dim != 1)) break; + + for (int i=0; (i < nlocal) && (n == 0); ++i) + if ((mask[i] & groupbit) && (body[i] >= 0) + && reg->match(x[i][0],x[i][1],x[i][2])) ++n; + } + } + + int n_all = 0; + MPI_Allreduce(&n,&n_all,1,MPI_INT,MPI_SUM,world); + + if (n_all > 0) return 1; + return 0; +} + +/* ---------------------------------------------------------------------- + check if the atoms in the selection list (length atom->nlocal, + content: 1 if atom is contained, 0 if not) overlap with currently + existing rigid fixes. return 1 in this case otherwise 0 +------------------------------------------------------------------------- */ + +int Modify::check_rigid_list_overlap(int *select) +{ + const int * const mask = atom->mask; + const int nlocal = atom->nlocal; + int dim; + + int n = 0; + for (int ifix = 0; ifix < nfix; ifix++) { + if (strncmp("rigid",fix[ifix]->style,5) == 0) { + const int * const body = (const int *)fix[ifix]->extract("body",dim); + if ((body == NULL) || (dim != 1)) break; + + for (int i=0; (i < nlocal) && (n == 0); ++i) + if ((body[i] >= 0) && select[i]) ++n; + } + } + + int n_all = 0; + MPI_Allreduce(&n,&n_all,1,MPI_INT,MPI_SUM,world); + + if (n_all > 0) return 1; + return 0; +} + /* ---------------------------------------------------------------------- add a new compute ------------------------------------------------------------------------- */ diff --git a/src/modify.h b/src/modify.h index 3ded3cbab6ea0d4a586358e6c850a03b8d9b4055..d825d5c4efbf543a709e766253a0a7e600cf8dcf 100644 --- a/src/modify.h +++ b/src/modify.h @@ -98,6 +98,9 @@ class Modify : protected Pointers { int find_fix(const char *); int find_fix_by_style(const char *); int check_package(const char *); + int check_rigid_group_overlap(int); + int check_rigid_region_overlap(int, class Region *); + int check_rigid_list_overlap(int *); void add_compute(int, char **, int trysuffix=1); void modify_compute(int, char **); diff --git a/src/npair.cpp b/src/npair.cpp index 3451cd6eae4dcbdc5fcd442eeadad919f287d550..9fbb4d219db3b6f8e357b43f1e70cc3b7ab29cea 100644 --- a/src/npair.cpp +++ b/src/npair.cpp @@ -123,7 +123,7 @@ void NPair::copy_bin_info() mbinxlo = nb->mbinxlo; mbinylo = nb->mbinylo; mbinzlo = nb->mbinzlo; - + bininvx = nb->bininvx; bininvy = nb->bininvy; bininvz = nb->bininvz; @@ -183,15 +183,15 @@ int NPair::exclusion(int i, int j, int itype, int jtype, if (nex_mol) { for (m = 0; m < nex_mol; m++) - // intra-chain: exclude i-j pair if in same molecule - // inter-chain: exclude i-j pair if in different molecules + // intra-chain: exclude i-j pair if in same molecule + // inter-chain: exclude i-j pair if in different molecules if (ex_mol_intra[m]) { if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && - molecule[i] == molecule[j]) return 1; + molecule[i] == molecule[j]) return 1; } else { - if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && - molecule[i] != molecule[j]) return 1; + if (mask[i] & ex_mol_bit[m] && mask[j] & ex_mol_bit[m] && + molecule[i] != molecule[j]) return 1; } } diff --git a/src/npair.h b/src/npair.h index 4e5e3f5dfd38619db6a76c3f84e230019dfb9d12..6941b86164c674a81ecfd282af718fafe6aa646a 100644 --- a/src/npair.h +++ b/src/npair.h @@ -79,7 +79,7 @@ class NPair : protected Pointers { double bininvx,bininvy,bininvz; int *atom2bin,*bins; int *binhead; - + // data from NStencil class int nstencil; diff --git a/src/npair_full_bin.h b/src/npair_full_bin.h index 432fb3cbf8ba32532e82b46816a1aba80550ea19..56c338e3603d5e765dacae5aa8de81a9b8379b5a 100644 --- a/src/npair_full_bin.h +++ b/src/npair_full_bin.h @@ -15,7 +15,7 @@ NPairStyle(full/bin, NPairFullBin, - NP_FULL | NP_BIN | NP_MOLONLY | + NP_FULL | NP_BIN | NP_MOLONLY | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) #else diff --git a/src/npair_full_bin_atomonly.h b/src/npair_full_bin_atomonly.h index f8c33d95588c3e814ab4b96190c113c7505085b0..0845d1ecef2418c09a8b1973dce069df9053d993 100644 --- a/src/npair_full_bin_atomonly.h +++ b/src/npair_full_bin_atomonly.h @@ -16,7 +16,7 @@ NPairStyle(full/bin/atomonly, NPairFullBinAtomonly, NP_FULL | NP_BIN | NP_ATOMONLY | - NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) + NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) #else diff --git a/src/npair_full_bin_ghost.h b/src/npair_full_bin_ghost.h index a09aab851235b097a9082d0f54c4a47ec13d87f1..c5a86e68af5a0d367d37a813b37c2938a6711ef2 100644 --- a/src/npair_full_bin_ghost.h +++ b/src/npair_full_bin_ghost.h @@ -15,7 +15,7 @@ NPairStyle(full/bin/ghost, NPairFullBinGhost, - NP_FULL | NP_BIN | NP_GHOST | NP_NEWTON | NP_NEWTOFF | + NP_FULL | NP_BIN | NP_GHOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) #else diff --git a/src/npair_full_nsq_ghost.h b/src/npair_full_nsq_ghost.h index 3e259ed0989771462b0ec3e3eb512ffd530b452f..58cd73c392dbfb380174ec8e2358321574a8487e 100644 --- a/src/npair_full_nsq_ghost.h +++ b/src/npair_full_nsq_ghost.h @@ -15,7 +15,7 @@ NPairStyle(full/nsq/ghost, NPairFullNsqGhost, - NP_FULL | NP_NSQ | NP_GHOST | NP_NEWTON | NP_NEWTOFF | + NP_FULL | NP_NSQ | NP_GHOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) #else diff --git a/src/npair_half_bin_atomonly_newton.cpp b/src/npair_half_bin_atomonly_newton.cpp index 6bbef0700a36e681d444217d447c2fc421133952..6da44b4a5cfc252ab8c449fd12ed36d282dfbb36 100644 --- a/src/npair_half_bin_atomonly_newton.cpp +++ b/src/npair_half_bin_atomonly_newton.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalfBinAtomonlyNewton::NPairHalfBinAtomonlyNewton(LAMMPS *lmp) : +NPairHalfBinAtomonlyNewton::NPairHalfBinAtomonlyNewton(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- diff --git a/src/npair_half_respa_bin_newton_tri.cpp b/src/npair_half_respa_bin_newton_tri.cpp index 38621224c49df568fb7ed6b44a8c69ab7e2a6e1b..4ec6685e1d4580024458891ae3da7a436b79753b 100644 --- a/src/npair_half_respa_bin_newton_tri.cpp +++ b/src/npair_half_respa_bin_newton_tri.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalfRespaBinNewtonTri::NPairHalfRespaBinNewtonTri(LAMMPS *lmp) : +NPairHalfRespaBinNewtonTri::NPairHalfRespaBinNewtonTri(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- diff --git a/src/npair_half_size_bin_newton.cpp b/src/npair_half_size_bin_newton.cpp index a8be3ce691092d7843e03a1147afc9a8ccc79836..4f4ecccb165d4289108abcbade6ff14d51ae9c8b 100644 --- a/src/npair_half_size_bin_newton.cpp +++ b/src/npair_half_size_bin_newton.cpp @@ -190,7 +190,7 @@ void NPairHalfSizeBinNewton::build(NeighList *list) nn += dnum; } } - + n++; } } diff --git a/src/npair_half_size_bin_newton_tri.cpp b/src/npair_half_size_bin_newton_tri.cpp index 1107f73026240fb75ac1bd1b1d79e239b3ee279d..559eb09a7a5f731e4a1398f36644bb0d8a03f7f5 100644 --- a/src/npair_half_size_bin_newton_tri.cpp +++ b/src/npair_half_size_bin_newton_tri.cpp @@ -27,7 +27,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairHalfSizeBinNewtonTri::NPairHalfSizeBinNewtonTri(LAMMPS *lmp) : +NPairHalfSizeBinNewtonTri::NPairHalfSizeBinNewtonTri(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- diff --git a/src/npair_skip_respa.h b/src/npair_skip_respa.h index 62077f85df0f3dce5dee255f9e20767401c95192..deff301909196cd6f8cb94c7a1db3c89319f0efc 100644 --- a/src/npair_skip_respa.h +++ b/src/npair_skip_respa.h @@ -15,8 +15,8 @@ NPairStyle(skip/half/respa, NPairSkipRespa, - NP_SKIP | NP_RESPA | NP_HALF | NP_FULL | - NP_NSQ | NP_BIN | NP_MULTI | + NP_SKIP | NP_RESPA | NP_HALF | NP_FULL | + NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) #else diff --git a/src/npair_skip_size.cpp b/src/npair_skip_size.cpp index 98e757e5c7cc1fd361b3e98666467d940adbe87f..e8d19dedcaf46b6461e9fd989d0596af89c6624c 100644 --- a/src/npair_skip_size.cpp +++ b/src/npair_skip_size.cpp @@ -32,7 +32,7 @@ NPairSkipSize::NPairSkipSize(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- build skip list for subset of types from parent list iskip and ijskip flag which atom types and type pairs to skip - if list requests it, preserve shear history via fix shear/history + if list requests it, preserve shear history via fix shear/history ------------------------------------------------------------------------- */ void NPairSkipSize::build(NeighList *list) diff --git a/src/npair_skip_size.h b/src/npair_skip_size.h index 9573396641ecc841a28e56c29c04a4b58adb13d2..b462c9dc974aaf5677f57e042b803594b6cebed6 100644 --- a/src/npair_skip_size.h +++ b/src/npair_skip_size.h @@ -15,7 +15,7 @@ NPairStyle(skip/half/size, NPairSkipSize, - NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | + NP_SKIP | NP_SIZE | NP_HALF | NP_FULL | NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) #else diff --git a/src/npair_skip_size_off2on.cpp b/src/npair_skip_size_off2on.cpp index 996e9939df4f555e0c1e2fc19542e358029e3ae8..da9dd57047444fd09e85ecfcb70335438b07aa60 100644 --- a/src/npair_skip_size_off2on.cpp +++ b/src/npair_skip_size_off2on.cpp @@ -33,7 +33,7 @@ NPairSkipSizeOff2on::NPairSkipSizeOff2on(LAMMPS *lmp) : NPair(lmp) {} build skip list for subset of types from parent list iskip and ijskip flag which atom types and type pairs to skip parent non-skip list used newton off, this skip list is newton on - if list requests it, preserve shear history via fix shear/history + if list requests it, preserve shear history via fix shear/history ------------------------------------------------------------------------- */ void NPairSkipSizeOff2on::build(NeighList *list) diff --git a/src/npair_skip_size_off2on.h b/src/npair_skip_size_off2on.h index 4b4e9a9c2938ccea1ab848cc769dd577f28685de..dab32f04ff10c8b7dc910f934ef5b4100dcdd689 100644 --- a/src/npair_skip_size_off2on.h +++ b/src/npair_skip_size_off2on.h @@ -15,8 +15,8 @@ NPairStyle(skip/size/off2on, NPairSkipSizeOff2on, - NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF | - NP_NSQ | NP_BIN | NP_MULTI | + NP_SKIP | NP_SIZE | NP_OFF2ON | NP_HALF | + NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) #else diff --git a/src/npair_skip_size_off2on_oneside.cpp b/src/npair_skip_size_off2on_oneside.cpp index a4c16255905afdf5da21c0d87cd67b3385134bc7..7377feec5bbefe495242deadff125c48b3fa404e 100644 --- a/src/npair_skip_size_off2on_oneside.cpp +++ b/src/npair_skip_size_off2on_oneside.cpp @@ -27,7 +27,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NPairSkipSizeOff2onOneside::NPairSkipSizeOff2onOneside(LAMMPS *lmp) : +NPairSkipSizeOff2onOneside::NPairSkipSizeOff2onOneside(LAMMPS *lmp) : NPair(lmp) {} /* ---------------------------------------------------------------------- @@ -35,7 +35,7 @@ NPairSkipSizeOff2onOneside::NPairSkipSizeOff2onOneside(LAMMPS *lmp) : iskip and ijskip flag which atom types and type pairs to skip parent non-skip list used newton off and was not onesided, this skip list is newton on and onesided - if list requests it, preserve shear history via fix shear/history + if list requests it, preserve shear history via fix shear/history ------------------------------------------------------------------------- */ void NPairSkipSizeOff2onOneside::build(NeighList *list) diff --git a/src/npair_skip_size_off2on_oneside.h b/src/npair_skip_size_off2on_oneside.h index 9f3c06e7bc68f05ca4b652cb98c27faf75154b6b..73448ca279c17ea7fa9eefa86f506b436791c708 100644 --- a/src/npair_skip_size_off2on_oneside.h +++ b/src/npair_skip_size_off2on_oneside.h @@ -15,8 +15,8 @@ NPairStyle(skip/size/off2on/oneside, NPairSkipSizeOff2onOneside, - NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF | - NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | + NP_SKIP | NP_SIZE | NP_OFF2ON | NP_ONESIDE | NP_HALF | + NP_NSQ | NP_BIN | NP_MULTI | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI) #else diff --git a/src/nstencil_full_bin_2d.cpp b/src/nstencil_full_bin_2d.cpp index 1f2b666dfbe6606565b23c22c9c9d63f2e93806e..0986c40dd442176d63056eea3eeeae9b2f16990d 100644 --- a/src/nstencil_full_bin_2d.cpp +++ b/src/nstencil_full_bin_2d.cpp @@ -28,7 +28,7 @@ NStencilFullBin2d::NStencilFullBin2d(LAMMPS *lmp) : NStencil(lmp) {} void NStencilFullBin2d::create() { int i,j; - + nstencil = 0; for (j = -sy; j <= sy; j++) diff --git a/src/nstencil_full_bin_2d.h b/src/nstencil_full_bin_2d.h index 18f848f275c9649b426385760af18eb53f81e66e..d85063596fe175491a56fc5bf925c404b4c894a3 100644 --- a/src/nstencil_full_bin_2d.h +++ b/src/nstencil_full_bin_2d.h @@ -15,7 +15,7 @@ NStencilStyle(full/bin/2d, NStencilFullBin2d, - NS_FULL | NS_BIN | NS_2D | + NS_FULL | NS_BIN | NS_2D | NS_NEWTON | NS_NEWTOFF | NS_ORTHO | NS_TRI) #else diff --git a/src/nstencil_full_bin_3d.h b/src/nstencil_full_bin_3d.h index d9acc9c5351c776e08ae8eb39165a69c5667d35f..facddd8ead40fa7cf38db26f4219a555f6a880db 100644 --- a/src/nstencil_full_bin_3d.h +++ b/src/nstencil_full_bin_3d.h @@ -15,7 +15,7 @@ NStencilStyle(full/bin/3d, NStencilFullBin3d, - NS_FULL | NS_BIN | NS_3D | + NS_FULL | NS_BIN | NS_3D | NS_NEWTON | NS_NEWTOFF | NS_ORTHO | NS_TRI) #else diff --git a/src/nstencil_full_ghost_bin_2d.h b/src/nstencil_full_ghost_bin_2d.h index af47913e7f5945f699c6cdb544a51efb0683254b..531c7d2eb1d064ab13564d1897002a5a2d43bf15 100644 --- a/src/nstencil_full_ghost_bin_2d.h +++ b/src/nstencil_full_ghost_bin_2d.h @@ -15,7 +15,7 @@ NStencilStyle(full/ghost/bin/2d, NStencilFullGhostBin2d, - NS_FULL | NS_GHOST | NS_BIN | NS_2D | + NS_FULL | NS_GHOST | NS_BIN | NS_2D | NS_NEWTON | NS_NEWTOFF | NS_ORTHO | NS_TRI) #else diff --git a/src/nstencil_full_ghost_bin_3d.h b/src/nstencil_full_ghost_bin_3d.h index beca6573de9a766679846339b1faa25931ac8c45..ed4ca6c4d6c8fad7ed6817ed68661525ede66ebd 100644 --- a/src/nstencil_full_ghost_bin_3d.h +++ b/src/nstencil_full_ghost_bin_3d.h @@ -15,7 +15,7 @@ NStencilStyle(full/ghost/bin/3d, NStencilFullGhostBin3d, - NS_FULL | NS_GHOST | NS_BIN | NS_3D | + NS_FULL | NS_GHOST | NS_BIN | NS_3D | NS_NEWTON | NS_NEWTOFF | NS_ORTHO | NS_TRI) #else diff --git a/src/nstencil_full_multi_2d.h b/src/nstencil_full_multi_2d.h index 8154144eda58ecdf559bc1e18d4552f592719960..f78eecc55fda8aae7d5354cacb02de00bcd1b4a5 100644 --- a/src/nstencil_full_multi_2d.h +++ b/src/nstencil_full_multi_2d.h @@ -15,7 +15,7 @@ NStencilStyle(full/multi/2d, NStencilFullMulti2d, - NS_FULL | NS_MULTI | NS_2D | + NS_FULL | NS_MULTI | NS_2D | NS_NEWTON | NS_NEWTOFF | NS_ORTHO | NS_TRI) #else diff --git a/src/nstencil_half_bin_2d_newtoff.cpp b/src/nstencil_half_bin_2d_newtoff.cpp index be5bc81dbfba8ed2e6b70cd3670e198f6f4bbc46..e51db6fe7a4161b4db8e7b1f1ef784687299544b 100644 --- a/src/nstencil_half_bin_2d_newtoff.cpp +++ b/src/nstencil_half_bin_2d_newtoff.cpp @@ -19,7 +19,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NStencilHalfBin2dNewtoff::NStencilHalfBin2dNewtoff(LAMMPS *lmp) : +NStencilHalfBin2dNewtoff::NStencilHalfBin2dNewtoff(LAMMPS *lmp) : NStencil(lmp) {} /* ---------------------------------------------------------------------- diff --git a/src/nstencil_half_bin_2d_newton_tri.cpp b/src/nstencil_half_bin_2d_newton_tri.cpp index 3a645a74345f7c1823bc9e3030d785ebebb2bed1..4f89b1c326661b428e11af25b063b2961076c540 100644 --- a/src/nstencil_half_bin_2d_newton_tri.cpp +++ b/src/nstencil_half_bin_2d_newton_tri.cpp @@ -19,7 +19,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NStencilHalfBin2dNewtonTri::NStencilHalfBin2dNewtonTri(LAMMPS *lmp) : +NStencilHalfBin2dNewtonTri::NStencilHalfBin2dNewtonTri(LAMMPS *lmp) : NStencil(lmp) {} /* ---------------------------------------------------------------------- diff --git a/src/nstencil_half_bin_3d_newtoff.cpp b/src/nstencil_half_bin_3d_newtoff.cpp index 44678b05df5bf5f5ed3c4a8f031bb4d6abf78ac3..433de400c20d63793e356647c951d86d4f21eca9 100644 --- a/src/nstencil_half_bin_3d_newtoff.cpp +++ b/src/nstencil_half_bin_3d_newtoff.cpp @@ -19,7 +19,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NStencilHalfBin3dNewtoff::NStencilHalfBin3dNewtoff(LAMMPS *lmp) : +NStencilHalfBin3dNewtoff::NStencilHalfBin3dNewtoff(LAMMPS *lmp) : NStencil(lmp) {} /* ---------------------------------------------------------------------- diff --git a/src/nstencil_half_bin_3d_newton_tri.cpp b/src/nstencil_half_bin_3d_newton_tri.cpp index 9e8c41f97a22e0fa88ac3320335e60cab532799d..691ce0bb8095dca0db721a5bc04f350aaa9cc63f 100644 --- a/src/nstencil_half_bin_3d_newton_tri.cpp +++ b/src/nstencil_half_bin_3d_newton_tri.cpp @@ -19,7 +19,7 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -NStencilHalfBin3dNewtonTri::NStencilHalfBin3dNewtonTri(LAMMPS *lmp) : +NStencilHalfBin3dNewtonTri::NStencilHalfBin3dNewtonTri(LAMMPS *lmp) : NStencil(lmp) {} /* ---------------------------------------------------------------------- diff --git a/src/nstencil_half_ghost_bin_2d_newtoff.h b/src/nstencil_half_ghost_bin_2d_newtoff.h index 3b70f0042a7fdd8ac1b0dc072b147570b2db6cd6..3286810c1c8e2d6c3c3867b2a163aae0eb594a47 100644 --- a/src/nstencil_half_ghost_bin_2d_newtoff.h +++ b/src/nstencil_half_ghost_bin_2d_newtoff.h @@ -15,7 +15,7 @@ NStencilStyle(half/ghost/bin/2d/newtoff, NStencilHalfGhostBin2dNewtoff, - NS_HALF | NS_GHOST | NS_BIN | NS_2D | + NS_HALF | NS_GHOST | NS_BIN | NS_2D | NS_NEWTOFF | NS_ORTHO | NS_TRI) #else diff --git a/src/pair.cpp b/src/pair.cpp index d90ed8bb43a90e422dccf6b39a296b092d2e938f..06792060ce507ad32ee9c97fd2481c47ca8a2a7d 100644 --- a/src/pair.cpp +++ b/src/pair.cpp @@ -814,6 +814,16 @@ void Pair::ev_setup(int eflag, int vflag, int alloc) if (vflag_atom == 0) vflag_either = 0; if (vflag_either == 0 && eflag_either == 0) evflag = 0; } else vflag_fdotr = 0; + + + // run ev_setup option for USER-TALLY computes + + if (num_tally_compute > 0) { + for (int k=0; k < num_tally_compute; ++k) { + Compute *c = list_tally_compute[k]; + c->pair_setup_callback(eflag,vflag); + } + } } /* ---------------------------------------------------------------------- diff --git a/src/pair.h b/src/pair.h index dd859e5f2a07ca2b99ba288302a03b2f17d1cbbe..b57004d965c535b7583597b04b01fd6c22e7a61d 100644 --- a/src/pair.h +++ b/src/pair.h @@ -245,7 +245,7 @@ class Pair : protected Pointers { ubuf(int arg) : i(arg) {} }; - inline int sbmask(int j) { + inline int sbmask(int j) const { return j >> SBBITS & 3; } }; diff --git a/src/pair_lj_smooth_linear.h b/src/pair_lj_smooth_linear.h index 0e3376b7894abf73180fbfcb8fc640fba5162f39..c18c442a185a390cf6e70782d22ac4b0a48bf56f 100644 --- a/src/pair_lj_smooth_linear.h +++ b/src/pair_lj_smooth_linear.h @@ -14,6 +14,7 @@ #ifdef PAIR_CLASS PairStyle(lj/smooth/linear,PairLJSmoothLinear) +PairStyle(lj/sf,PairLJSmoothLinear) #else diff --git a/src/set.cpp b/src/set.cpp index 4ed07d423b112fd59c51ee8e3f24c73dbe8ab1b3..59625b7e6cb086c47c1d5d48d124457cd1e30d5b 100644 --- a/src/set.cpp +++ b/src/set.cpp @@ -327,15 +327,18 @@ void Set::command(int narg, char **arg) ximageflag = yimageflag = zimageflag = 0; if (strcmp(arg[iarg+1],"NULL") != 0) { ximageflag = 1; - ximage = force->inumeric(FLERR,arg[iarg+1]); + if (strstr(arg[iarg+1],"v_") == arg[iarg+1]) varparse(arg[iarg+1],1); + else ximage = force->inumeric(FLERR,arg[iarg+1]); } if (strcmp(arg[iarg+2],"NULL") != 0) { yimageflag = 1; - yimage = force->inumeric(FLERR,arg[iarg+2]); + if (strstr(arg[iarg+2],"v_") == arg[iarg+2]) varparse(arg[iarg+2],2); + else yimage = force->inumeric(FLERR,arg[iarg+2]); } if (strcmp(arg[iarg+3],"NULL") != 0) { zimageflag = 1; - zimage = force->inumeric(FLERR,arg[iarg+3]); + if (strstr(arg[iarg+3],"v_") == arg[iarg+3]) varparse(arg[iarg+3],3); + else zimage = force->inumeric(FLERR,arg[iarg+3]); } if (ximageflag && ximage && !domain->xperiodic) error->all(FLERR, @@ -585,6 +588,28 @@ void Set::set(int keyword) } } + // check if properties of atoms in rigid bodies are updated + // that are cached as per-body data. + switch (keyword) { + case X: + case Y: + case Z: + case MOLECULE: + case MASS: + case ANGMOM: + case SHAPE: + case DIAMETER: + case DENSITY: + case QUAT: + case IMAGE: + if (modify->check_rigid_list_overlap(select)) + error->warning(FLERR,"Changing a property of atoms in rigid bodies " + "that has no effect unless rigid bodies are rebuild"); + break; + default: // assume no conflict for all other properties + break; + } + // loop over selected atoms AtomVecEllipsoid *avec_ellipsoid = @@ -789,6 +814,9 @@ void Set::set(int keyword) int xbox = (atom->image[i] & IMGMASK) - IMGMAX; int ybox = (atom->image[i] >> IMGBITS & IMGMASK) - IMGMAX; int zbox = (atom->image[i] >> IMG2BITS) - IMGMAX; + if (varflag1) ximage = static_cast<int>(xvalue); + if (varflag2) yimage = static_cast<int>(yvalue); + if (varflag3) zimage = static_cast<int>(zvalue); if (ximageflag) xbox = ximage; if (yimageflag) ybox = yimage; if (zimageflag) zbox = zimage; diff --git a/src/special.cpp b/src/special.cpp index 3fb5ec8077e21130009981b82f69958131072fe7..4697fc40a63b594841ce3448b6c032026f238460 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -27,10 +27,6 @@ using namespace LAMMPS_NS; -// allocate space for static class variable - -Special *Special::sptr; - /* ---------------------------------------------------------------------- */ Special::Special(LAMMPS *lmp) : Pointers(lmp) @@ -120,8 +116,7 @@ void Special::build() // when receive buffer, scan tags for atoms I own // when find one, increment nspecial count for that atom - sptr = this; - comm->ring(size,sizeof(tagint),buf,1,ring_one,NULL); + comm->ring(size,sizeof(tagint),buf,1,ring_one,NULL,(void *)this); memory->destroy(buf); } @@ -178,8 +173,7 @@ void Special::build() // when receive buffer, scan 2nd-atom tags for atoms I own // when find one, add 1st-atom tag to onetwo list for 2nd atom - sptr = this; - comm->ring(size,sizeof(tagint),buf,2,ring_two,NULL); + comm->ring(size,sizeof(tagint),buf,2,ring_two,NULL,(void *)this); memory->destroy(buf); } @@ -226,8 +220,7 @@ void Special::build() // when find one, increment 1-3 count by # of 1-2 neighbors of my atom, // subtracting one since my list will contain original atom - sptr = this; - comm->ring(size,sizeof(tagint),buf,3,ring_three,buf); + comm->ring(size,sizeof(tagint),buf,3,ring_three,buf,(void *)this); // extract count from buffer that has cycled back to me // nspecial[i][1] = # of 1-3 neighbors of atom i @@ -287,8 +280,7 @@ void Special::build() // exclude the atom whose tag = original // this process may include duplicates but they will be culled later - sptr = this; - comm->ring(size,sizeof(tagint),buf,4,ring_four,buf); + comm->ring(size,sizeof(tagint),buf,4,ring_four,buf,(void *)this); // fill onethree with buffer values that have been returned to me // sanity check: accumulated buf[i+3] count should equal @@ -343,8 +335,7 @@ void Special::build() // when find one, increment 1-4 count by # of 1-2 neighbors of my atom // may include duplicates and original atom but they will be culled later - sptr = this; - comm->ring(size,sizeof(tagint),buf,5,ring_five,buf); + comm->ring(size,sizeof(tagint),buf,5,ring_five,buf,(void *)this); // extract count from buffer that has cycled back to me // nspecial[i][2] = # of 1-4 neighbors of atom i @@ -402,8 +393,7 @@ void Special::build() // incrementing the count in buf(i+4) // this process may include duplicates but they will be culled later - sptr = this; - comm->ring(size,sizeof(tagint),buf,6,ring_six,buf); + comm->ring(size,sizeof(tagint),buf,6,ring_six,buf,(void *)this); // fill onefour with buffer values that have been returned to me // sanity check: accumulated buf[i+2] count should equal @@ -744,8 +734,7 @@ void Special::angle_trim() // when receive buffer, scan list of 1,3 atoms looking for atoms I own // when find one, scan its 1-3 neigh list and mark I,J as in an angle - sptr = this; - comm->ring(size,sizeof(tagint),buf,7,ring_seven,NULL); + comm->ring(size,sizeof(tagint),buf,7,ring_seven,NULL,(void *)this); // delete 1-3 neighbors if they are not flagged in dflag @@ -850,8 +839,7 @@ void Special::dihedral_trim() // when receive buffer, scan list of 1,4 atoms looking for atoms I own // when find one, scan its 1-4 neigh list and mark I,J as in a dihedral - sptr = this; - comm->ring(size,sizeof(tagint),buf,8,ring_eight,NULL); + comm->ring(size,sizeof(tagint),buf,8,ring_eight,NULL,(void *)this); // delete 1-4 neighbors if they are not flagged in dflag @@ -894,8 +882,9 @@ void Special::dihedral_trim() when find one, increment nspecial count for that atom ------------------------------------------------------------------------- */ -void Special::ring_one(int ndatum, char *cbuf) +void Special::ring_one(int ndatum, char *cbuf, void *ptr) { + Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; int **nspecial = atom->nspecial; int nlocal = atom->nlocal; @@ -914,8 +903,9 @@ void Special::ring_one(int ndatum, char *cbuf) when find one, add 1st-atom tag to onetwo list for 2nd atom ------------------------------------------------------------------------- */ -void Special::ring_two(int ndatum, char *cbuf) +void Special::ring_two(int ndatum, char *cbuf, void *ptr) { + Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; int nlocal = atom->nlocal; @@ -937,8 +927,9 @@ void Special::ring_two(int ndatum, char *cbuf) subtracting one since my list will contain original atom ------------------------------------------------------------------------- */ -void Special::ring_three(int ndatum, char *cbuf) +void Special::ring_three(int ndatum, char *cbuf, void *ptr) { + Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; int **nspecial = atom->nspecial; int nlocal = atom->nlocal; @@ -968,8 +959,9 @@ void Special::ring_three(int ndatum, char *cbuf) this process may include duplicates but they will be culled later ------------------------------------------------------------------------- */ -void Special::ring_four(int ndatum, char *cbuf) +void Special::ring_four(int ndatum, char *cbuf, void *ptr) { + Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; int **nspecial = atom->nspecial; int nlocal = atom->nlocal; @@ -1004,8 +996,9 @@ void Special::ring_four(int ndatum, char *cbuf) may include duplicates and original atom but they will be culled later ------------------------------------------------------------------------- */ -void Special::ring_five(int ndatum, char *cbuf) +void Special::ring_five(int ndatum, char *cbuf, void *ptr) { + Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; int **nspecial = atom->nspecial; int nlocal = atom->nlocal; @@ -1033,8 +1026,9 @@ void Special::ring_five(int ndatum, char *cbuf) this process may include duplicates but they will be culled later ------------------------------------------------------------------------- */ -void Special::ring_six(int ndatum, char *cbuf) +void Special::ring_six(int ndatum, char *cbuf, void *ptr) { + Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; int **nspecial = atom->nspecial; int nlocal = atom->nlocal; @@ -1065,8 +1059,9 @@ void Special::ring_six(int ndatum, char *cbuf) when find one, scan its 1-3 neigh list and mark I,J as in an angle ------------------------------------------------------------------------- */ -void Special::ring_seven(int ndatum, char *cbuf) +void Special::ring_seven(int ndatum, char *cbuf, void *ptr) { + Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; int **nspecial = atom->nspecial; int nlocal = atom->nlocal; @@ -1105,8 +1100,9 @@ void Special::ring_seven(int ndatum, char *cbuf) when find one, scan its 1-4 neigh list and mark I,J as in a dihedral ------------------------------------------------------------------------- */ -void Special::ring_eight(int ndatum, char *cbuf) +void Special::ring_eight(int ndatum, char *cbuf, void *ptr) { + Special *sptr = (Special *) ptr; Atom *atom = sptr->atom; int **nspecial = atom->nspecial; int nlocal = atom->nlocal; diff --git a/src/special.h b/src/special.h index fbaad5ea343e8b70872528ceb1a5cb040ae5e4c3..9f25200336c597276c9c9ed5d2427be12debd3ef 100644 --- a/src/special.h +++ b/src/special.h @@ -39,18 +39,16 @@ class Special : protected Pointers { void combine(); void fix_alteration(); - // static variable for ring communication callback to access class data // callback functions for ring communication - static Special *sptr; - static void ring_one(int, char *); - static void ring_two(int, char *); - static void ring_three(int, char *); - static void ring_four(int, char *); - static void ring_five(int, char *); - static void ring_six(int, char *); - static void ring_seven(int, char *); - static void ring_eight(int, char *); + static void ring_one(int, char *, void *); + static void ring_two(int, char *, void *); + static void ring_three(int, char *, void *); + static void ring_four(int, char *, void *); + static void ring_five(int, char *, void *); + static void ring_six(int, char *, void *); + static void ring_seven(int, char *, void *); + static void ring_eight(int, char *, void *); }; } diff --git a/src/thermo.cpp b/src/thermo.cpp index 18deecb1a8cb03ff619535f2c53f42dadc4e1d96..d4f7c5cc9e1434e86e8efb45dbcb7c0015ef8a25 100644 --- a/src/thermo.cpp +++ b/src/thermo.cpp @@ -402,6 +402,15 @@ void Thermo::compute(int flag) firststep = 1; } +/* ---------------------------------------------------------------------- + call function to compute property +------------------------------------------------------------------------- */ + +void Thermo::call_vfunc(int ifield) +{ + (this->*vfunc[ifield])(); +} + /* ---------------------------------------------------------------------- check for lost atoms, return current number of atoms ------------------------------------------------------------------------- */ diff --git a/src/thermo.h b/src/thermo.h index d87e8fce3d9e98cb26be277ea9aeeaf5e2f6984f..de2a46dec42f3a87c90c9fd7cac184a178c652ef 100644 --- a/src/thermo.h +++ b/src/thermo.h @@ -18,8 +18,13 @@ namespace LAMMPS_NS { +class DumpNetCDF; +class DumpNetCDFMPIIO; + class Thermo : protected Pointers { friend class MinCG; // accesses compute_pe + friend class DumpNetCDF; // accesses thermo properties + friend class DumpNetCDFMPIIO; // accesses thermo properties public: char *style; @@ -112,6 +117,7 @@ class Thermo : protected Pointers { typedef void (Thermo::*FnPtr)(); void addfield(const char *, FnPtr, int); FnPtr *vfunc; // list of ptrs to functions + void call_vfunc(int ifield); void compute_compute(); // functions that compute a single value void compute_fix(); // via calls to Compute,Fix,Variable classes diff --git a/src/velocity.cpp b/src/velocity.cpp index 82b6efbe1b8cadc2cb798feb301d7b2a3d9a146d..260a11bb4ee90dc2116907688e9b5bda4503e4cb 100644 --- a/src/velocity.cpp +++ b/src/velocity.cpp @@ -68,6 +68,12 @@ void Velocity::command(int narg, char **arg) if (igroup == -1) error->all(FLERR,"Could not find velocity group ID"); groupbit = group->bitmask[igroup]; + // check if velocities of atoms in rigid bodies are updated + + if (modify->check_rigid_group_overlap(groupbit)) + error->warning(FLERR,"Changing velocities of atoms in rigid bodies. " + "This has no effect unless rigid bodies are rebuild"); + // identify style if (strcmp(arg[1],"create") == 0) style = CREATE; diff --git a/src/version.h b/src/version.h index dc0ebe76b80c1ae1042af64e55319af63eb1eb56..c2cdc6afd6627eee4ef7f4f0cb36082f942d6c7e 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "19 May 2017" +#define LAMMPS_VERSION "20 Jun 2017" diff --git a/src/write_restart.cpp b/src/write_restart.cpp index 77e2cb05d951b11d81f9091a0c4b320163353f12..ad6c756558eb7870b4b03f0c35393930d0261f22 100644 --- a/src/write_restart.cpp +++ b/src/write_restart.cpp @@ -185,7 +185,7 @@ void WriteRestart::multiproc_options(int multiproc_caller, int mpiioflag_caller, if (strcmp(arg[iarg],"fileper") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal write_restart command"); if (!multiproc) - error->all(FLERR,"Cannot use write_restart fileper " + error->all(FLERR,"Cannot use write_restart fileper " "without % in restart file name"); int nper = force->inumeric(FLERR,arg[iarg+1]); if (nper <= 0) error->all(FLERR,"Illegal write_restart command"); @@ -203,7 +203,7 @@ void WriteRestart::multiproc_options(int multiproc_caller, int mpiioflag_caller, } else if (strcmp(arg[iarg],"nfile") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal write_restart command"); if (!multiproc) - error->all(FLERR,"Cannot use write_restart nfile " + error->all(FLERR,"Cannot use write_restart nfile " "without % in restart file name"); int nfile = force->inumeric(FLERR,arg[iarg+1]); if (nfile <= 0) error->all(FLERR,"Illegal write_restart command");