diff --git a/doc/src/Section_commands.txt b/doc/src/Section_commands.txt index e816c8831b7c1a2ca9a1f99039b1f6ced7d08997..22723c9a512a893a18bef5592698f0249071ca92 100644 --- a/doc/src/Section_commands.txt +++ b/doc/src/Section_commands.txt @@ -637,10 +637,10 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT. "rigid/nve (o)"_fix_rigid.html, "rigid/nvt (o)"_fix_rigid.html, "rigid/small (o)"_fix_rigid.html, -"rigid/small/nph (o)"_fix_rigid.html, -"rigid/small/npt (o)"_fix_rigid.html, -"rigid/small/nve (o)"_fix_rigid.html, -"rigid/small/nvt (o)"_fix_rigid.html, +"rigid/small/nph"_fix_rigid.html, +"rigid/small/npt"_fix_rigid.html, +"rigid/small/nve"_fix_rigid.html, +"rigid/small/nvt"_fix_rigid.html, "setforce (k)"_fix_setforce.html, "shake"_fix_shake.html, "spring"_fix_spring.html, @@ -1023,7 +1023,7 @@ KOKKOS, o = USER-OMP, t = OPT. "tri/lj"_pair_tri_lj.html, "vashishta (ko)"_pair_vashishta.html, "vashishta/table (o)"_pair_vashishta.html, -"yukawa (go)"_pair_yukawa.html, +"yukawa (gok)"_pair_yukawa.html, "yukawa/colloid (go)"_pair_yukawa_colloid.html, "zbl (go)"_pair_zbl.html :tb(c=4,ea=c) @@ -1045,6 +1045,7 @@ package"_Section_start.html#start_3. "edpd"_pair_meso.html, "eff/cut"_pair_eff.html, "exp6/rx"_pair_exp6_rx.html, +"extep"_pair_extep.html, "gauss/cut"_pair_gauss.html, "kolmogorov/crespi/z"_pair_kolmogorov_crespi_z.html, "lennard/mdf"_pair_mdf.html, diff --git a/doc/src/compute_dihedral_local.txt b/doc/src/compute_dihedral_local.txt index 91370a947a4191180b614a58788bf7e4d2f1b979..865e86fddb1483ccf7a3b1c3d79fec912b90ea04 100644 --- a/doc/src/compute_dihedral_local.txt +++ b/doc/src/compute_dihedral_local.txt @@ -27,8 +27,8 @@ compute 1 all dihedral/local phi :pre Define a computation that calculates properties of individual dihedral interactions. The number of datums generated, aggregated across all -processors, equals the number of angles in the system, modified by the -group parameter as explained below. +processors, equals the number of dihedral angles in the system, modified +by the group parameter as explained below. The value {phi} is the dihedral angle, as defined in the diagram on the "dihedral_style"_dihedral_style.html doc page. diff --git a/doc/src/create_atoms.txt b/doc/src/create_atoms.txt index 98c3c24a0b9635d5d9dd17f7ec84e85a60a3829d..ad13879c6dd5b74ab2640135896241d326c04f4b 100644 --- a/doc/src/create_atoms.txt +++ b/doc/src/create_atoms.txt @@ -36,9 +36,9 @@ keyword = {mol} or {basis} or {remap} or {var} or {set} or {units} :l {set} values = dim name dim = {x} or {y} or {z} name = name of variable to set with x, y, or z atom position - {rotate} values = Rx Ry Rz theta - Rx,Ry,Rz = rotation vector for single molecule + {rotate} values = theta Rx Ry Rz theta = rotation angle for single molecule (degrees) + Rx,Ry,Rz = rotation vector for single molecule {units} value = {lattice} or {box} {lattice} = the geometry is defined in lattice units {box} = the geometry is defined in simulation box units :pre @@ -227,28 +227,30 @@ the sinusoid would appear to be "smoother". Also note the use of the converts lattice spacings to distance. Click on the image for a larger version. +dimension 2 variable x equal 100 variable y equal 25 lattice hex 0.8442 region box block 0 $x 0 $y -0.5 0.5 create_box 1 box :pre -variable xx equal 0.0 -variable yy equal 0.0 +variable xx internal 0.0 +variable yy internal 0.0 variable v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0" -create_atoms 1 box var v set x xx set y yy :pre +create_atoms 1 box var v set x xx set y yy +write_dump all atom sinusoid.lammpstrj :pre :c,image(JPG/sinusoid_small.jpg,JPG/sinusoid.jpg) -The {rotate} keyword can be used with the {single} style, when adding -a single molecule to specify the orientation at which the molecule is -inserted. The axis of rotation is determined by the rotation vector -(Rx,Ry,Rz) that goes through the insertion point. The specified -{theta} determines the angle of rotation around that axis. Note that -the direction of rotation for the atoms around the rotation axis is -consistent with the right-hand rule: if your right-hand's thumb points -along {R}, then your fingers wrap around the axis in the direction of -rotation. +The {rotate} keyword can only be used with the {single} style and +when adding a single molecule. It allows to specify the orientation +at which the molecule is inserted. The axis of rotation is +determined by the rotation vector (Rx,Ry,Rz) that goes through the +insertion point. The specified {theta} determines the angle of +rotation around that axis. Note that the direction of rotation for +the atoms around the rotation axis is consistent with the right-hand +rule: if your right-hand's thumb points along {R}, then your fingers +wrap around the axis in the direction of rotation. The {units} keyword determines the meaning of the distance units used to specify the coordinates of the one particle created by the {single} diff --git a/doc/src/create_bonds.txt b/doc/src/create_bonds.txt index 5a878521693c968f09bbbbefbbca130a91832fa6..6af69214d3c4523318945c3bf70ac0547621dd64 100644 --- a/doc/src/create_bonds.txt +++ b/doc/src/create_bonds.txt @@ -18,7 +18,7 @@ style = {many} or {single/bond} or {single/angle} or {single/dihedral} :ule,l group2-ID = ID of second group, bonds will be between atoms in the 2 groups btype = bond type of created bonds rmin = minimum distance between pair of atoms to bond together - rmax = minimum distance between pair of atoms to bond together + rmax = maximum distance between pair of atoms to bond together {single/bond} args = btype batom1 batom2 btype = bond type of new bond batom1,batom2 = atom IDs for two atoms in bond diff --git a/doc/src/fix_rigid.txt b/doc/src/fix_rigid.txt index a5a631bd382e851c60cda8bfbe17c51eb0b73d18..eced602c4b3540bf95ef8fe17278e7baf697e4c0 100644 --- a/doc/src/fix_rigid.txt +++ b/doc/src/fix_rigid.txt @@ -7,11 +7,17 @@ :line fix rigid command :h3 +fix rigid/omp command :h3 fix rigid/nve command :h3 +fix rigid/nve/omp command :h3 fix rigid/nvt command :h3 +fix rigid/nvt/omp command :h3 fix rigid/npt command :h3 +fix rigid/npt/omp command :h3 fix rigid/nph command :h3 +fix rigid/nph/omp command :h3 fix rigid/small command :h3 +fix rigid/small/omp command :h3 fix rigid/nve/small command :h3 fix rigid/nvt/small command :h3 fix rigid/npt/small command :h3 @@ -28,7 +34,7 @@ bodystyle = {single} or {molecule} or {group} :l {molecule} args = none {custom} args = {i_propname} or {v_varname} i_propname = an integer property defined via fix property/atom - v_varname = an atom-style or atomfile-style variable + v_varname = an atom-style or atomfile-style variable {group} args = N groupID1 groupID2 ... N = # of groups groupID1, groupID2, ... = list of N group IDs :pre @@ -93,7 +99,7 @@ fix 1 clump rigid custom v_bodyid :pre fix 0 all property/atom i_bodyid read_restart data.rigid fix 0 NULL Bodies fix 1 clump rigid/small custom i_bodyid :pre - + [Description:] Treat one or more sets of atoms as independent rigid bodies. This diff --git a/doc/src/lammps.book b/doc/src/lammps.book index 0691f43e9b1c120df66b9e8ba372e33f9019eb80..09be3a3f9c335edddb7e1f2d49613ac1dd443457 100644 --- a/doc/src/lammps.book +++ b/doc/src/lammps.book @@ -443,6 +443,7 @@ pair_edip.html pair_eff.html pair_eim.html pair_exp6_rx.html +pair_extep.html pair_gauss.html pair_gayberne.html pair_gran.html diff --git a/doc/src/pair_extep.txt b/doc/src/pair_extep.txt new file mode 100644 index 0000000000000000000000000000000000000000..9a784e2501638ecda41031366b0777346115c94f --- /dev/null +++ b/doc/src/pair_extep.txt @@ -0,0 +1,40 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +pair_style extep command :h3 + +[Syntax:] + +pair_style extep :pre + +[Examples:] + +pair_style extep +pair_coeff * * BN.extep B N :pre + +[Description:] + +Style {extep} computes the Extended Tersoff Potential (ExTeP) +interactions as described in "(Los2017)"_#Los2017. + +:line + +[Restrictions:] none + +[Related commands:] + +"pair_tersoff" pair_tersoff.html + +[Default:] none + +:line + +:link(Los2017) +[(Los2017)] J. H. Los et al. "Extended Tersoff potential for boron nitride: +Energetics and elastic properties of pristine and defective h-BN", +Phys. Rev. B 96 (184108), 2017. diff --git a/doc/src/pair_yukawa.txt b/doc/src/pair_yukawa.txt index 61d6bde6a92ab34ea2a42527f59a3227ab27b163..e7c063ded9f41227210e17970dc9fb7cc59ed757 100644 --- a/doc/src/pair_yukawa.txt +++ b/doc/src/pair_yukawa.txt @@ -9,6 +9,7 @@ pair_style yukawa command :h3 pair_style yukawa/gpu command :h3 pair_style yukawa/omp command :h3 +pair_style yukawa/kk command :h3 [Syntax:] diff --git a/doc/src/pairs.txt b/doc/src/pairs.txt index ec21b7a02e429436da14ea8baa3c33ad66b1d8b2..ccd540bf44a6e1f37e374cf4118f7a53655bbbd6 100644 --- a/doc/src/pairs.txt +++ b/doc/src/pairs.txt @@ -32,6 +32,7 @@ Pair Styles :h1 pair_eff pair_eim pair_exp6_rx + pair_extep pair_gauss pair_gayberne pair_gran diff --git a/doc/src/print.txt b/doc/src/print.txt index 4c9e5b4d7611c7638d469646469935b18298f903..77e0c7cfd3d64ed7a6bfeb6dcd17b716f0b1e1f3 100644 --- a/doc/src/print.txt +++ b/doc/src/print.txt @@ -14,10 +14,11 @@ print string keyword value :pre string = text string to print, which may contain variables :ulb,l zero or more keyword/value pairs may be appended :l -keyword = {file} or {append} or {screen} :l +keyword = {file} or {append} or {screen} or {universe} :l {file} value = filename {append} value = filename - {screen} value = {yes} or {no} :pre + {screen} value = {yes} or {no} + {universe} value = {yes} or {no} :pre :ule [Examples:] @@ -26,6 +27,7 @@ print "Done with equilibration" file info.dat print Vol=$v append info.dat screen no print "The system volume is now $v" print 'The system volume is now $v' +print "NEB calculation 1 complete" screen no universe yes print """ System volume = $v System temperature = $t @@ -49,6 +51,11 @@ it does not exist. If the {screen} keyword is used, output to the screen and logfile can be turned on or off as desired. +If the {universe} keyword is used, output to the global screen and +logfile can be turned on or off as desired. In multi-partition +calculations, the {screen} option and the corresponding output only +apply to the screen and logfile of the individual partition. + If you want the print command to be executed multiple times (with changing variable values), there are 3 options. First, consider using the "fix print"_fix_print.html command, which will print a string @@ -74,4 +81,4 @@ thermodynamic properties, global values calculated by a [Default:] -The option defaults are no file output and screen = yes. +The option defaults are no file output, screen = yes, and universe = no. diff --git a/doc/src/replicate.txt b/doc/src/replicate.txt index 291558e0e527d41a6c0ae7a53b854cfd12eb476a..08523ecdd848a58ff04b8b63747fcd6c5c388914 100644 --- a/doc/src/replicate.txt +++ b/doc/src/replicate.txt @@ -10,9 +10,11 @@ replicate command :h3 [Syntax:] -replicate nx ny nz :pre +replicate nx ny nz {keyword} :pre -nx,ny,nz = replication factors in each dimension :ul +nx,ny,nz = replication factors in each dimension :ulb +optional {keyword} = {bbox} :l + {bbox} = only check atoms in replicas that overlap with a processor's subdomain :ule [Examples:] @@ -43,6 +45,12 @@ file that crosses a periodic boundary should be between two atoms with image flags that differ by 1. This will allow the bond to be unwrapped appropriately. +The optional keyword {bbox} uses a bounding box to only check atoms +in replicas that overlap with a processor's subdomain when assigning +atoms to processors, and thus can result in substantial speedups for +calculations using a large number of processors. It does require +temporarily using more memory. + [Restrictions:] A 2d simulation cannot be replicated in the z dimension. diff --git a/examples/USER/misc/extep/BN.data b/examples/USER/misc/extep/BN.data new file mode 100644 index 0000000000000000000000000000000000000000..3f51bdff6106035ac7f6c98c694e7c948d91ce56 --- /dev/null +++ b/examples/USER/misc/extep/BN.data @@ -0,0 +1,116 @@ +info: BN sample with r_BN=1.45 + +100 atoms +2 atom types + +0.0 21.75000000 xlo xhi +0.0 12.55736835 ylo yhi +0.0 50.00000000 zlo zhi + +Masses + +1 10.811 +2 14.0067 + +Atoms + + 1 1 0.00000000 0.00000000 0.00000000 + 2 2 1.45000000 0.00000000 0.00000000 + 3 1 2.17500000 1.25573684 0.00000000 + 4 2 3.62500000 1.25573684 0.00000000 + 5 1 0.00000000 2.51147367 0.00000000 + 6 2 1.45000000 2.51147367 0.00000000 + 7 1 2.17500000 3.76721051 0.00000000 + 8 2 3.62500000 3.76721051 0.00000000 + 9 1 0.00000000 5.02294734 0.00000000 + 10 2 1.45000000 5.02294734 0.00000000 + 11 1 2.17500000 6.27868418 0.00000000 + 12 2 3.62500000 6.27868418 0.00000000 + 13 1 0.00000000 7.53442101 0.00000000 + 14 2 1.45000000 7.53442101 0.00000000 + 15 1 2.17500000 8.79015785 0.00000000 + 16 2 3.62500000 8.79015785 0.00000000 + 17 1 0.00000000 10.04589468 0.00000000 + 18 2 1.45000000 10.04589468 0.00000000 + 19 1 2.17500000 11.30163152 0.00000000 + 20 2 3.62500000 11.30163152 0.00000000 + 21 1 4.35000000 0.00000000 0.00000000 + 22 2 5.80000000 0.00000000 0.00000000 + 23 1 6.52500000 1.25573684 0.00000000 + 24 2 7.97500000 1.25573684 0.00000000 + 25 1 4.35000000 2.51147367 0.00000000 + 26 2 5.80000000 2.51147367 0.00000000 + 27 1 6.52500000 3.76721051 0.00000000 + 28 2 7.97500000 3.76721051 0.00000000 + 29 1 4.35000000 5.02294734 0.00000000 + 30 2 5.80000000 5.02294734 0.00000000 + 31 1 6.52500000 6.27868418 0.00000000 + 32 2 7.97500000 6.27868418 0.00000000 + 33 1 4.35000000 7.53442101 0.00000000 + 34 2 5.80000000 7.53442101 0.00000000 + 35 1 6.52500000 8.79015785 0.00000000 + 36 2 7.97500000 8.79015785 0.00000000 + 37 1 4.35000000 10.04589468 0.00000000 + 38 2 5.80000000 10.04589468 0.00000000 + 39 1 6.52500000 11.30163152 0.00000000 + 40 2 7.97500000 11.30163152 0.00000000 + 41 1 8.70000000 0.00000000 0.00000000 + 42 2 10.15000000 0.00000000 0.00000000 + 43 1 10.87500000 1.25573684 0.00000000 + 44 2 12.32500000 1.25573684 0.00000000 + 45 1 8.70000000 2.51147367 0.00000000 + 46 2 10.15000000 2.51147367 0.00000000 + 47 1 10.87500000 3.76721051 0.00000000 + 48 2 12.32500000 3.76721051 0.00000000 + 49 1 8.70000000 5.02294734 0.00000000 + 50 2 10.15000000 5.02294734 0.00000000 + 51 1 10.87500000 6.27868418 0.00000000 + 52 2 12.32500000 6.27868418 0.00000000 + 53 1 8.70000000 7.53442101 0.00000000 + 54 2 10.15000000 7.53442101 0.00000000 + 55 1 10.87500000 8.79015785 0.00000000 + 56 2 12.32500000 8.79015785 0.00000000 + 57 1 8.70000000 10.04589468 0.00000000 + 58 2 10.15000000 10.04589468 0.00000000 + 59 1 10.87500000 11.30163152 0.00000000 + 60 2 12.32500000 11.30163152 0.00000000 + 61 1 13.05000000 0.00000000 0.00000000 + 62 2 14.50000000 0.00000000 0.00000000 + 63 1 15.22500000 1.25573684 0.00000000 + 64 2 16.67500000 1.25573684 0.00000000 + 65 1 13.05000000 2.51147367 0.00000000 + 66 2 14.50000000 2.51147367 0.00000000 + 67 1 15.22500000 3.76721051 0.00000000 + 68 2 16.67500000 3.76721051 0.00000000 + 69 1 13.05000000 5.02294734 0.00000000 + 70 2 14.50000000 5.02294734 0.00000000 + 71 1 15.22500000 6.27868418 0.00000000 + 72 2 16.67500000 6.27868418 0.00000000 + 73 1 13.05000000 7.53442101 0.00000000 + 74 2 14.50000000 7.53442101 0.00000000 + 75 1 15.22500000 8.79015785 0.00000000 + 76 2 16.67500000 8.79015785 0.00000000 + 77 1 13.05000000 10.04589468 0.00000000 + 78 2 14.50000000 10.04589468 0.00000000 + 79 1 15.22500000 11.30163152 0.00000000 + 80 2 16.67500000 11.30163152 0.00000000 + 81 1 17.40000000 0.00000000 0.00000000 + 82 2 18.85000000 0.00000000 0.00000000 + 83 1 19.57500000 1.25573684 0.00000000 + 84 2 21.02500000 1.25573684 0.00000000 + 85 1 17.40000000 2.51147367 0.00000000 + 86 2 18.85000000 2.51147367 0.00000000 + 87 1 19.57500000 3.76721051 0.00000000 + 88 2 21.02500000 3.76721051 0.00000000 + 89 1 17.40000000 5.02294734 0.00000000 + 90 2 18.85000000 5.02294734 0.00000000 + 91 1 19.57500000 6.27868418 0.00000000 + 92 2 21.02500000 6.27868418 0.00000000 + 93 1 17.40000000 7.53442101 0.00000000 + 94 2 18.85000000 7.53442101 0.00000000 + 95 1 19.57500000 8.79015785 0.00000000 + 96 2 21.02500000 8.79015785 0.00000000 + 97 1 17.40000000 10.04589468 0.00000000 + 98 2 18.85000000 10.04589468 0.00000000 + 99 1 19.57500000 11.30163152 0.00000000 +100 2 21.02500000 11.30163152 0.00000000 diff --git a/examples/USER/misc/extep/in.extep-bn b/examples/USER/misc/extep/in.extep-bn new file mode 100644 index 0000000000000000000000000000000000000000..bb7ed0ea281b1242c5b79f9aa2b36fafb8c6368d --- /dev/null +++ b/examples/USER/misc/extep/in.extep-bn @@ -0,0 +1,29 @@ +# Initialization +units metal +boundary p p p +atom_style atomic +processors * * 1 + +# System and atom definition +read_data BN.data # read lammps data file + +# Neighbor update settings +neighbor 2.0 bin +neigh_modify every 1 +neigh_modify delay 0 +neigh_modify check yes + +# Potential +pair_style extep +pair_coeff * * ../../../../potentials/BN.extep B N + +# Output +thermo 10 +thermo_style custom step time etotal pe temp lx ly lz pxx pyy pzz +thermo_modify line one format float %14.8g + +# Setup NPT MD run +timestep 0.0001 # ps +velocity all create 300.0 12345 +fix thermos all npt temp 300 300 1.0 x 0 0 1.0 y 0 0 1.0 +run 1000 diff --git a/examples/USER/misc/extep/log.23Oct17.extep-bn.g++.1 b/examples/USER/misc/extep/log.23Oct17.extep-bn.g++.1 new file mode 100644 index 0000000000000000000000000000000000000000..392e5c4f0e38a061fde9e5cafce9db6463f5af74 --- /dev/null +++ b/examples/USER/misc/extep/log.23Oct17.extep-bn.g++.1 @@ -0,0 +1,180 @@ +LAMMPS (23 Oct 2017) + using 1 OpenMP thread(s) per MPI task +# Initialization +units metal +boundary p p p +atom_style atomic +processors * * 1 + +# System and atom definition +read_data BN.data # read lammps data file + orthogonal box = (0 0 0) to (21.75 12.5574 50) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 100 atoms + +# Neighbor update settings +neighbor 2.0 bin +neigh_modify every 1 +neigh_modify delay 0 +neigh_modify check yes + +# Potential +pair_style extep +pair_coeff * * ../../../../potentials/BN.extep B N +Reading potential file ../../../../potentials/BN.extep with DATE: 2017-11-28 + +# Output +thermo 10 +thermo_style custom step time etotal pe temp lx ly lz pxx pyy pzz +thermo_modify line one format float %14.8g + +# Setup NPT MD run +timestep 0.0001 # ps +velocity all create 300.0 12345 +fix thermos all npt temp 300 300 1.0 x 0 0 1.0 y 0 0 1.0 +run 1000 +Neighbor list info ... + update every 1 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.2 + ghost atom cutoff = 4.2 + binsize = 2.1, bins = 11 6 24 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair extep, perpetual + attributes: full, newton on, ghost + pair build: full/bin/ghost + stencil: full/ghost/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.97 | 2.97 | 2.97 Mbytes +Step Time TotEng PotEng Temp Lx Ly Lz Pxx Pyy Pzz + 0 0 -665.11189 -668.95092 300 21.75 12.557368 50 -1638.8315 -1636.7368 321.73163 + 10 0.001 -665.11194 -668.81065 289.03491 21.749944 12.557333 50 -1391.3771 -1841.1723 316.66669 + 20 0.002 -665.1121 -668.4273 259.06599 21.749789 12.557222 50 -1137.0171 -1980.5977 301.79466 + 30 0.003 -665.11237 -667.90117 217.93027 21.749552 12.557029 50 -912.51949 -2055.822 278.00774 + 40 0.004 -665.11278 -667.36471 175.97662 21.74925 12.556752 50 -755.38643 -2078.0669 246.62816 + 50 0.005 -665.11333 -666.94254 142.94321 21.748894 12.556389 50 -694.93153 -2062.1349 209.26356 + 60 0.006 -665.11405 -666.71476 125.08741 21.748487 12.55594 50 -744.6962 -2019.9093 167.70563 + 70 0.007 -665.11494 -666.69555 123.51632 21.748026 12.555408 50 -898.67863 -1956.2845 123.88845 + 80 0.008 -665.116 -666.83408 134.25892 21.7475 12.554796 50 -1132.5952 -1868.738 79.87581 + 90 0.009 -665.1172 -667.03647 149.98053 21.746893 12.554106 50 -1409.6896 -1750.4875 37.821017 + 100 0.01 -665.11853 -667.20002 162.65705 21.746185 12.553344 50 -1689.1599 -1595.9411 -0.14399002 + 110 0.011 -665.11997 -667.24752 166.25742 21.745356 12.552516 50 -1934.6334 -1406.3665 -32.091026 + 120 0.012 -665.12148 -667.15088 158.58671 21.744389 12.55163 50 -2120.4014 -1193.6117 -56.50543 + 130 0.013 -665.12306 -666.93754 141.7922 21.743271 12.550694 50 -2234.0841 -980.32815 -72.45885 + 140 0.014 -665.1247 -666.67903 121.4631 21.741993 12.549719 50 -2275.5656 -796.26701 -79.693692 + 150 0.015 -665.1264 -666.46562 104.65306 21.740553 12.54871 50 -2253.08 -671.5409 -78.603431 + 160 0.016 -665.1282 -666.37541 97.462619 21.738952 12.547674 50 -2178.0108 -628.83531 -70.130423 + 170 0.017 -665.13011 -666.44775 102.96665 21.737195 12.546611 50 -2060.2073 -677.02227 -55.623931 + 180 0.018 -665.13215 -666.67004 120.17784 21.735292 12.54552 50 -1905.36 -808.22824 -36.699042 + 190 0.019 -665.13431 -666.98201 144.38814 21.733253 12.544396 50 -1715.2526 -999.2481 -15.117617 + 200 0.02 -665.13656 -667.29591 168.74214 21.731091 12.543231 50 -1490.6934 -1216.735 7.3107732 + 210 0.021 -665.13885 -667.52511 186.47391 21.728823 12.542015 50 -1235.9283 -1424.4324 28.822782 + 220 0.022 -665.14112 -667.61153 193.0492 21.726467 12.540741 50 -962.70697 -1590.2885 47.801678 + 230 0.023 -665.14332 -667.54317 187.53534 21.724043 12.539402 50 -692.12856 -1691.6537 62.881768 + 240 0.024 -665.1454 -667.35665 172.79772 21.72157 12.537993 50 -453.02755 -1717.6064 73.041858 + 250 0.025 -665.14735 -667.12424 154.48373 21.719064 12.536514 50 -276.81709 -1668.3598 77.670868 + 260 0.026 -665.14918 -666.92939 139.11409 21.716539 12.534967 50 -190.03656 -1552.4049 76.59734 + 270 0.027 -665.15091 -666.83859 131.88391 21.714 12.533357 50 -206.85537 -1382.4915 70.085105 + 280 0.028 -665.15258 -666.87889 134.90214 21.711446 12.53169 50 -324.01795 -1171.7578 58.801327 + 290 0.029 -665.15421 -667.02881 146.49028 21.708869 12.529975 50 -520.0146 -931.26466 43.758636 + 300 0.03 -665.1558 -667.22646 161.81084 21.706255 12.528222 50 -758.87113 -669.74523 26.225956 + 310 0.031 -665.15734 -667.39183 174.61368 21.703587 12.526442 50 -997.42782 -395.56111 7.601897 + 320 0.032 -665.15878 -667.45546 179.47345 21.700849 12.524646 50 -1193.9402 -119.86797 -10.744258 + 330 0.033 -665.16008 -667.38312 173.71901 21.698026 12.522846 50 -1315.6446 140.7451 -27.638433 + 340 0.034 -665.16118 -667.18792 158.37888 21.695112 12.521051 50 -1343.5396 363.95099 -42.231049 + 350 0.035 -665.16207 -666.92571 137.81938 21.692103 12.519271 50 -1273.6625 524.73453 -54.046178 + 360 0.036 -665.16274 -666.67543 118.20885 21.689004 12.517514 50 -1115.1514 601.37143 -62.932702 + 370 0.037 -665.1632 -666.5115 105.36258 21.685827 12.515781 50 -886.11568 582.42087 -68.942158 + 380 0.038 -665.16348 -666.47849 102.76116 21.682589 12.514072 50 -608.71321 472.04732 -72.193259 + 390 0.039 -665.1636 -666.57728 110.47178 21.679308 12.512382 50 -304.85697 291.41908 -72.787214 + 400 0.04 -665.16356 -666.76741 125.33244 21.676006 12.510704 50 6.3732307 75.407852 -70.806087 + 410 0.041 -665.16336 -666.98363 142.24457 21.672705 12.50903 50 309.23046 -134.40319 -66.378966 + 420 0.042 -665.16298 -667.15939 156.00935 21.669426 12.507351 50 590.16982 -298.16702 -59.767469 + 430 0.043 -665.16239 -667.24843 163.01313 21.66619 12.50566 50 836.19535 -385.22443 -51.420249 + 440 0.044 -665.16157 -667.23746 162.2204 21.663014 12.503955 50 1033.943 -378.7816 -41.969885 + 450 0.045 -665.1605 -667.14707 155.24066 21.659911 12.502234 50 1170.3399 -277.11556 -32.175503 + 460 0.046 -665.15917 -667.0218 145.55489 21.656891 12.500503 50 1234.9026 -91.620499 -22.833423 + 470 0.047 -665.15761 -666.91366 137.22578 21.65396 12.498768 50 1222.9519 157.31306 -14.680548 + 480 0.048 -665.15585 -666.86462 133.53159 21.651114 12.497041 50 1138.5551 445.2926 -8.3071781 + 490 0.049 -665.15393 -666.89359 135.9458 21.64835 12.495333 50 996.00682 748.51842 -4.0872169 + 500 0.05 -665.15188 -666.99142 143.75058 21.645657 12.493655 50 819.08561 1046.9785 -2.1306918 + 510 0.051 -665.14975 -667.12519 154.36991 21.643022 12.49202 50 637.99022 1325.7112 -2.2650822 + 520 0.052 -665.14756 -667.25 164.29491 21.640432 12.49044 50 484.54509 1574.1916 -4.0528391 + 530 0.053 -665.14531 -667.32459 170.29969 21.637878 12.488923 50 386.77357 1784.4858 -6.8479114 + 540 0.054 -665.143 -667.32552 170.55254 21.635352 12.48748 50 364.14599 1949.2189 -9.8841824 + 550 0.055 -665.14064 -667.25527 165.24765 21.632853 12.486117 50 424.6565 2060.4607 -12.37851 + 560 0.056 -665.13822 -667.14127 156.52756 21.630385 12.484837 50 564.3912 2110.2547 -13.62742 + 570 0.057 -665.13576 -667.0259 147.70502 21.627958 12.483643 50 769.54354 2092.8157 -13.082914 + 580 0.058 -665.13327 -666.95107 142.05154 21.625586 12.482535 50 1020.1218 2007.6508 -10.405617 + 590 0.059 -665.13079 -666.94279 141.59877 21.623287 12.481508 50 1294.1274 1862.3568 -5.5031153 + 600 0.06 -665.12832 -667.00189 146.40928 21.621079 12.480557 50 1570.9478 1673.8456 1.4410957 + 610 0.061 -665.12591 -667.10417 154.59072 21.618982 12.479674 50 1833.1388 1467.2639 9.9561573 + 620 0.062 -665.12355 -667.20973 163.02368 21.617015 12.478851 50 2066.4951 1272.6732 19.310607 + 630 0.063 -665.12128 -667.27744 168.49239 21.615193 12.47808 50 2259.0193 1120.2758 28.59477 + 640 0.064 -665.11911 -667.27898 168.7823 21.613531 12.477355 50 2399.792 1035.3525 36.8539 + 650 0.065 -665.11707 -667.20773 163.37438 21.612037 12.476673 50 2478.6675 1034.0481 43.239368 + 660 0.066 -665.11518 -667.0802 153.55598 21.610718 12.476033 50 2487.2505 1120.8274 47.131883 + 670 0.067 -665.11345 -666.93026 141.97434 21.609573 12.475439 50 2420.9786 1288.0136 48.201717 + 680 0.068 -665.11191 -666.79864 131.80955 21.608598 12.474897 50 2281.6131 1517.4002 46.399066 + 690 0.069 -665.11056 -666.72065 125.82027 21.607784 12.474418 50 2079.2055 1783.5346 41.895586 + 700 0.07 -665.10941 -666.71578 125.5291 21.607116 12.474011 50 1832.7039 2057.9076 35.011051 + 710 0.071 -665.10848 -666.78203 130.77932 21.606577 12.473687 50 1568.7275 2313.0601 26.153491 + 720 0.072 -665.10776 -666.89681 139.80468 21.606148 12.473458 50 1318.5189 2525.6808 15.783637 + 730 0.073 -665.10727 -667.0243 149.80574 21.605812 12.47333 50 1113.5537 2678.1859 4.3967762 + 740 0.074 -665.10701 -667.12698 157.85016 21.605555 12.473311 50 980.633 2758.9123 -7.4930622 + 750 0.075 -665.10697 -667.17729 161.78497 21.605368 12.473404 50 937.45086 2761.5936 -19.376492 + 760 0.076 -665.10714 -667.1654 160.84249 21.605247 12.473609 50 989.5724 2684.9256 -30.776106 + 770 0.077 -665.1075 -667.10061 155.75086 21.605196 12.473922 50 1129.4775 2532.7048 -41.263677 + 780 0.078 -665.10803 -667.00654 148.35835 21.605226 12.474338 50 1337.8663 2314.4556 -50.455407 + 790 0.079 -665.10869 -666.91242 140.9515 21.605349 12.474848 50 1586.9099 2045.9808 -57.988114 + 800 0.08 -665.10946 -666.84375 135.52533 21.605585 12.475441 50 1844.7038 1749.1281 -63.495405 + 810 0.081 -665.11032 -666.81538 133.24173 21.60595 12.476105 50 2079.9601 1450.3113 -66.60795 + 820 0.082 -665.11127 -666.82877 134.21424 21.606461 12.476828 50 2266.0059 1177.7937 -66.990929 + 830 0.083 -665.1123 -666.87353 137.6312 21.607131 12.477599 50 2383.4351 958.19752 -64.411861 + 840 0.084 -665.11343 -666.93214 142.12323 21.607968 12.478409 50 2421.1969 812.91475 -58.816538 + 850 0.085 -665.11467 -666.98597 146.2321 21.608975 12.479253 50 2376.3483 755.06052 -50.389393 + 860 0.086 -665.11603 -667.02075 148.84448 21.610149 12.480128 50 2252.9811 787.43069 -39.585062 + 870 0.087 -665.1175 -667.03045 149.48743 21.611481 12.481034 50 2060.884 901.76342 -27.129117 + 880 0.088 -665.11907 -667.01838 148.42091 21.612958 12.481978 50 1814.3354 1079.4855 -13.988401 + 890 0.089 -665.12073 -666.99552 146.50471 21.614562 12.482966 50 1531.1565 1293.9709 -1.305884 + 900 0.09 -665.12247 -666.97639 144.87389 21.616275 12.484007 50 1231.9005 1514.0741 9.7083525 + 910 0.091 -665.12426 -666.97371 144.52455 21.618074 12.485109 50 938.90089 1708.364 17.929974 + 920 0.092 -665.12609 -666.99389 145.95889 21.61994 12.486281 50 674.90767 1849.2415 22.497207 + 930 0.093 -665.12794 -667.03498 149.02559 21.621853 12.487528 50 461.18604 1916.1468 22.971745 + 940 0.094 -665.12977 -667.08777 153.00718 21.6238 12.488852 50 315.19601 1897.3867 19.43758 + 950 0.095 -665.13156 -667.13925 156.8903 21.62577 12.490254 50 248.20946 1790.5667 12.504818 + 960 0.096 -665.13326 -667.17668 159.68273 21.627757 12.491728 50 263.35912 1601.9528 3.2123256 + 970 0.097 -665.13485 -667.19079 160.6611 21.629764 12.493267 50 354.58496 1345.1489 -7.1487162 + 980 0.098 -665.13628 -667.17758 159.5175 21.631796 12.494862 50 506.7626 1039.346 -17.249179 + 990 0.099 -665.13753 -667.13942 156.43758 21.633864 12.496499 50 697.06054 707.26671 -25.92737 + 1000 0.1 -665.13859 -667.0853 152.12472 21.635982 12.498164 50 897.38498 372.94791 -32.344697 +Loop time of 0.463574 on 1 procs for 1000 steps with 100 atoms + +Performance: 18.638 ns/day, 1.288 hours/ns, 2157.152 timesteps/s +99.0% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.44776 | 0.44776 | 0.44776 | 0.0 | 96.59 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.0023057 | 0.0023057 | 0.0023057 | 0.0 | 0.50 +Output | 0.0015752 | 0.0015752 | 0.0015752 | 0.0 | 0.34 +Modify | 0.010602 | 0.010602 | 0.010602 | 0.0 | 2.29 +Other | | 0.001331 | | | 0.29 + +Nlocal: 100 ave 100 max 100 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 360 ave 360 max 360 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 1800 ave 1800 max 1800 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 1800 +Ave neighs/atom = 18 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/USER/misc/extep/log.23Oct17.extep-bn.g++.4 b/examples/USER/misc/extep/log.23Oct17.extep-bn.g++.4 new file mode 100644 index 0000000000000000000000000000000000000000..c5c9236649e84972c774ac9772da0062e4b3059f --- /dev/null +++ b/examples/USER/misc/extep/log.23Oct17.extep-bn.g++.4 @@ -0,0 +1,180 @@ +LAMMPS (23 Oct 2017) + using 1 OpenMP thread(s) per MPI task +# Initialization +units metal +boundary p p p +atom_style atomic +processors * * 1 + +# System and atom definition +read_data BN.data # read lammps data file + orthogonal box = (0 0 0) to (21.75 12.5574 50) + 2 by 2 by 1 MPI processor grid + reading atoms ... + 100 atoms + +# Neighbor update settings +neighbor 2.0 bin +neigh_modify every 1 +neigh_modify delay 0 +neigh_modify check yes + +# Potential +pair_style extep +pair_coeff * * ../../../../potentials/BN.extep B N +Reading potential file ../../../../potentials/BN.extep with DATE: 2017-11-28 + +# Output +thermo 10 +thermo_style custom step time etotal pe temp lx ly lz pxx pyy pzz +thermo_modify line one format float %14.8g + +# Setup NPT MD run +timestep 0.0001 # ps +velocity all create 300.0 12345 +fix thermos all npt temp 300 300 1.0 x 0 0 1.0 y 0 0 1.0 +run 1000 +Neighbor list info ... + update every 1 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.2 + ghost atom cutoff = 4.2 + binsize = 2.1, bins = 11 6 24 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair extep, perpetual + attributes: full, newton on, ghost + pair build: full/bin/ghost + stencil: full/ghost/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.943 | 2.943 | 2.943 Mbytes +Step Time TotEng PotEng Temp Lx Ly Lz Pxx Pyy Pzz + 0 0 -665.11189 -668.95092 300 21.75 12.557368 50 -1638.8315 -1636.7368 321.73163 + 10 0.001 -665.11194 -668.81065 289.03491 21.749944 12.557333 50 -1391.3771 -1841.1723 316.66669 + 20 0.002 -665.1121 -668.4273 259.06599 21.749789 12.557222 50 -1137.0171 -1980.5977 301.79466 + 30 0.003 -665.11237 -667.90117 217.93027 21.749552 12.557029 50 -912.51949 -2055.822 278.00774 + 40 0.004 -665.11278 -667.36471 175.97662 21.74925 12.556752 50 -755.38643 -2078.0669 246.62816 + 50 0.005 -665.11333 -666.94254 142.94321 21.748894 12.556389 50 -694.93153 -2062.1349 209.26356 + 60 0.006 -665.11405 -666.71476 125.08741 21.748487 12.55594 50 -744.6962 -2019.9093 167.70563 + 70 0.007 -665.11494 -666.69555 123.51632 21.748026 12.555408 50 -898.67863 -1956.2845 123.88845 + 80 0.008 -665.116 -666.83408 134.25892 21.7475 12.554796 50 -1132.5952 -1868.738 79.87581 + 90 0.009 -665.1172 -667.03647 149.98053 21.746893 12.554106 50 -1409.6896 -1750.4875 37.821017 + 100 0.01 -665.11853 -667.20002 162.65705 21.746185 12.553344 50 -1689.1599 -1595.9411 -0.14399002 + 110 0.011 -665.11997 -667.24752 166.25742 21.745356 12.552516 50 -1934.6334 -1406.3665 -32.091026 + 120 0.012 -665.12148 -667.15088 158.58671 21.744389 12.55163 50 -2120.4014 -1193.6117 -56.50543 + 130 0.013 -665.12306 -666.93754 141.7922 21.743271 12.550694 50 -2234.0841 -980.32815 -72.45885 + 140 0.014 -665.1247 -666.67903 121.4631 21.741993 12.549719 50 -2275.5656 -796.26701 -79.693692 + 150 0.015 -665.1264 -666.46562 104.65306 21.740553 12.54871 50 -2253.08 -671.5409 -78.603431 + 160 0.016 -665.1282 -666.37541 97.462619 21.738952 12.547674 50 -2178.0108 -628.83531 -70.130423 + 170 0.017 -665.13011 -666.44775 102.96665 21.737195 12.546611 50 -2060.2073 -677.02227 -55.623931 + 180 0.018 -665.13215 -666.67004 120.17784 21.735292 12.54552 50 -1905.36 -808.22824 -36.699042 + 190 0.019 -665.13431 -666.98201 144.38814 21.733253 12.544396 50 -1715.2526 -999.2481 -15.117617 + 200 0.02 -665.13656 -667.29591 168.74214 21.731091 12.543231 50 -1490.6934 -1216.735 7.3107732 + 210 0.021 -665.13885 -667.52511 186.47391 21.728823 12.542015 50 -1235.9283 -1424.4324 28.822782 + 220 0.022 -665.14112 -667.61153 193.0492 21.726467 12.540741 50 -962.70697 -1590.2885 47.801678 + 230 0.023 -665.14332 -667.54317 187.53534 21.724043 12.539402 50 -692.12856 -1691.6537 62.881768 + 240 0.024 -665.1454 -667.35665 172.79772 21.72157 12.537993 50 -453.02755 -1717.6064 73.041858 + 250 0.025 -665.14735 -667.12424 154.48373 21.719064 12.536514 50 -276.81709 -1668.3598 77.670868 + 260 0.026 -665.14918 -666.92939 139.11409 21.716539 12.534967 50 -190.03656 -1552.4049 76.59734 + 270 0.027 -665.15091 -666.83859 131.88391 21.714 12.533357 50 -206.85537 -1382.4915 70.085105 + 280 0.028 -665.15258 -666.87889 134.90214 21.711446 12.53169 50 -324.01795 -1171.7578 58.801327 + 290 0.029 -665.15421 -667.02881 146.49028 21.708869 12.529975 50 -520.0146 -931.26466 43.758636 + 300 0.03 -665.1558 -667.22646 161.81084 21.706255 12.528222 50 -758.87113 -669.74523 26.225956 + 310 0.031 -665.15734 -667.39183 174.61368 21.703587 12.526442 50 -997.42782 -395.56111 7.601897 + 320 0.032 -665.15878 -667.45546 179.47345 21.700849 12.524646 50 -1193.9402 -119.86797 -10.744258 + 330 0.033 -665.16008 -667.38312 173.71901 21.698026 12.522846 50 -1315.6446 140.7451 -27.638433 + 340 0.034 -665.16118 -667.18792 158.37888 21.695112 12.521051 50 -1343.5396 363.95099 -42.231049 + 350 0.035 -665.16207 -666.92571 137.81938 21.692103 12.519271 50 -1273.6625 524.73453 -54.046178 + 360 0.036 -665.16274 -666.67543 118.20885 21.689004 12.517514 50 -1115.1514 601.37143 -62.932702 + 370 0.037 -665.1632 -666.5115 105.36258 21.685827 12.515781 50 -886.11568 582.42087 -68.942158 + 380 0.038 -665.16348 -666.47849 102.76116 21.682589 12.514072 50 -608.71321 472.04732 -72.193259 + 390 0.039 -665.1636 -666.57728 110.47178 21.679308 12.512382 50 -304.85697 291.41908 -72.787214 + 400 0.04 -665.16356 -666.76741 125.33244 21.676006 12.510704 50 6.3732307 75.407852 -70.806087 + 410 0.041 -665.16336 -666.98363 142.24457 21.672705 12.50903 50 309.23046 -134.40319 -66.378966 + 420 0.042 -665.16298 -667.15939 156.00935 21.669426 12.507351 50 590.16982 -298.16702 -59.767469 + 430 0.043 -665.16239 -667.24843 163.01313 21.66619 12.50566 50 836.19535 -385.22443 -51.420249 + 440 0.044 -665.16157 -667.23746 162.2204 21.663014 12.503955 50 1033.943 -378.7816 -41.969885 + 450 0.045 -665.1605 -667.14707 155.24066 21.659911 12.502234 50 1170.3399 -277.11556 -32.175503 + 460 0.046 -665.15917 -667.0218 145.55489 21.656891 12.500503 50 1234.9026 -91.620499 -22.833423 + 470 0.047 -665.15761 -666.91366 137.22578 21.65396 12.498768 50 1222.9519 157.31306 -14.680548 + 480 0.048 -665.15585 -666.86462 133.53159 21.651114 12.497041 50 1138.5551 445.2926 -8.3071781 + 490 0.049 -665.15393 -666.89359 135.9458 21.64835 12.495333 50 996.00682 748.51842 -4.0872169 + 500 0.05 -665.15188 -666.99142 143.75058 21.645657 12.493655 50 819.08561 1046.9785 -2.1306918 + 510 0.051 -665.14975 -667.12519 154.36991 21.643022 12.49202 50 637.99022 1325.7112 -2.2650822 + 520 0.052 -665.14756 -667.25 164.29491 21.640432 12.49044 50 484.54509 1574.1916 -4.0528391 + 530 0.053 -665.14531 -667.32459 170.29969 21.637878 12.488923 50 386.77357 1784.4858 -6.8479114 + 540 0.054 -665.143 -667.32552 170.55254 21.635352 12.48748 50 364.14599 1949.2189 -9.8841824 + 550 0.055 -665.14064 -667.25527 165.24765 21.632853 12.486117 50 424.6565 2060.4607 -12.37851 + 560 0.056 -665.13822 -667.14127 156.52756 21.630385 12.484837 50 564.3912 2110.2547 -13.62742 + 570 0.057 -665.13576 -667.0259 147.70502 21.627958 12.483643 50 769.54354 2092.8157 -13.082914 + 580 0.058 -665.13327 -666.95107 142.05154 21.625586 12.482535 50 1020.1218 2007.6508 -10.405617 + 590 0.059 -665.13079 -666.94279 141.59877 21.623287 12.481508 50 1294.1274 1862.3568 -5.5031153 + 600 0.06 -665.12832 -667.00189 146.40928 21.621079 12.480557 50 1570.9478 1673.8456 1.4410957 + 610 0.061 -665.12591 -667.10417 154.59072 21.618982 12.479674 50 1833.1388 1467.2639 9.9561573 + 620 0.062 -665.12355 -667.20973 163.02368 21.617015 12.478851 50 2066.4951 1272.6732 19.310607 + 630 0.063 -665.12128 -667.27744 168.49239 21.615193 12.47808 50 2259.0193 1120.2758 28.59477 + 640 0.064 -665.11911 -667.27898 168.7823 21.613531 12.477355 50 2399.792 1035.3525 36.8539 + 650 0.065 -665.11707 -667.20773 163.37438 21.612037 12.476673 50 2478.6675 1034.0481 43.239368 + 660 0.066 -665.11518 -667.0802 153.55598 21.610718 12.476033 50 2487.2505 1120.8274 47.131883 + 670 0.067 -665.11345 -666.93026 141.97434 21.609573 12.475439 50 2420.9786 1288.0136 48.201717 + 680 0.068 -665.11191 -666.79864 131.80955 21.608598 12.474897 50 2281.6131 1517.4002 46.399066 + 690 0.069 -665.11056 -666.72065 125.82027 21.607784 12.474418 50 2079.2055 1783.5346 41.895586 + 700 0.07 -665.10941 -666.71578 125.5291 21.607116 12.474011 50 1832.7039 2057.9076 35.011051 + 710 0.071 -665.10848 -666.78203 130.77932 21.606577 12.473687 50 1568.7275 2313.0601 26.153491 + 720 0.072 -665.10776 -666.89681 139.80468 21.606148 12.473458 50 1318.5189 2525.6808 15.783637 + 730 0.073 -665.10727 -667.0243 149.80574 21.605812 12.47333 50 1113.5537 2678.1859 4.3967762 + 740 0.074 -665.10701 -667.12698 157.85016 21.605555 12.473311 50 980.633 2758.9123 -7.4930622 + 750 0.075 -665.10697 -667.17729 161.78497 21.605368 12.473404 50 937.45086 2761.5936 -19.376492 + 760 0.076 -665.10714 -667.1654 160.84249 21.605247 12.473609 50 989.5724 2684.9256 -30.776106 + 770 0.077 -665.1075 -667.10061 155.75086 21.605196 12.473922 50 1129.4775 2532.7048 -41.263677 + 780 0.078 -665.10803 -667.00654 148.35835 21.605226 12.474338 50 1337.8663 2314.4556 -50.455407 + 790 0.079 -665.10869 -666.91242 140.9515 21.605349 12.474848 50 1586.9099 2045.9808 -57.988114 + 800 0.08 -665.10946 -666.84375 135.52533 21.605585 12.475441 50 1844.7038 1749.1281 -63.495405 + 810 0.081 -665.11032 -666.81538 133.24173 21.60595 12.476105 50 2079.9601 1450.3113 -66.60795 + 820 0.082 -665.11127 -666.82877 134.21424 21.606461 12.476828 50 2266.0059 1177.7937 -66.990929 + 830 0.083 -665.1123 -666.87353 137.6312 21.607131 12.477599 50 2383.4351 958.19752 -64.411861 + 840 0.084 -665.11343 -666.93214 142.12323 21.607968 12.478409 50 2421.1969 812.91475 -58.816538 + 850 0.085 -665.11467 -666.98597 146.2321 21.608975 12.479253 50 2376.3483 755.06052 -50.389393 + 860 0.086 -665.11603 -667.02075 148.84448 21.610149 12.480128 50 2252.9811 787.43069 -39.585062 + 870 0.087 -665.1175 -667.03045 149.48743 21.611481 12.481034 50 2060.884 901.76342 -27.129117 + 880 0.088 -665.11907 -667.01838 148.42091 21.612958 12.481978 50 1814.3354 1079.4855 -13.988401 + 890 0.089 -665.12073 -666.99552 146.50471 21.614562 12.482966 50 1531.1565 1293.9709 -1.305884 + 900 0.09 -665.12247 -666.97639 144.87389 21.616275 12.484007 50 1231.9005 1514.0741 9.7083525 + 910 0.091 -665.12426 -666.97371 144.52455 21.618074 12.485109 50 938.90089 1708.364 17.929974 + 920 0.092 -665.12609 -666.99389 145.95889 21.61994 12.486281 50 674.90767 1849.2415 22.497207 + 930 0.093 -665.12794 -667.03498 149.02559 21.621853 12.487528 50 461.18604 1916.1468 22.971745 + 940 0.094 -665.12977 -667.08777 153.00718 21.6238 12.488852 50 315.19601 1897.3867 19.43758 + 950 0.095 -665.13156 -667.13925 156.8903 21.62577 12.490254 50 248.20946 1790.5667 12.504818 + 960 0.096 -665.13326 -667.17668 159.68273 21.627757 12.491728 50 263.35912 1601.9528 3.2123256 + 970 0.097 -665.13485 -667.19079 160.6611 21.629764 12.493267 50 354.58496 1345.1489 -7.1487162 + 980 0.098 -665.13628 -667.17758 159.5175 21.631796 12.494862 50 506.7626 1039.346 -17.249179 + 990 0.099 -665.13753 -667.13942 156.43758 21.633864 12.496499 50 697.06054 707.26671 -25.92737 + 1000 0.1 -665.13859 -667.0853 152.12472 21.635982 12.498164 50 897.38498 372.94791 -32.344697 +Loop time of 0.174508 on 4 procs for 1000 steps with 100 atoms + +Performance: 49.511 ns/day, 0.485 hours/ns, 5730.393 timesteps/s +98.8% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.12409 | 0.12834 | 0.13408 | 1.1 | 73.54 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.016369 | 0.021358 | 0.025324 | 2.7 | 12.24 +Output | 0.0023892 | 0.0025101 | 0.0028272 | 0.4 | 1.44 +Modify | 0.01733 | 0.018302 | 0.018958 | 0.5 | 10.49 +Other | | 0.003995 | | | 2.29 + +Nlocal: 25 ave 26 max 24 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Nghost: 179 ave 180 max 178 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +FullNghs: 450 ave 468 max 432 min +Histogram: 2 0 0 0 0 0 0 0 0 2 + +Total # of neighbors = 1800 +Ave neighs/atom = 18 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/gcmc/in.gcmc.co2 b/examples/gcmc/in.gcmc.co2 index d11ef72fddc80a732684b8eba32a14c3356c4f30..128f05b489885885fdb68c9953572d71e6d44f76 100644 --- a/examples/gcmc/in.gcmc.co2 +++ b/examples/gcmc/in.gcmc.co2 @@ -67,6 +67,15 @@ variable tfac equal 5.0/3.0 # (3 trans + 2 rot)/(3 trans) fix mygcmc all gcmc 100 100 0 0 54341 ${temp} ${mu} ${disp} mol & co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt +# atom counts + +variable carbon atom "type==1" +variable oxygen atom "type==2" +group carbon dynamic all var carbon +group oxygen dynamic all var oxygen +variable nC equal count(carbon) +variable nO equal count(oxygen) + # output variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) @@ -74,7 +83,7 @@ variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) compute_modify thermo_temp dynamic/dof yes -thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc v_nC v_nO thermo 1000 # run diff --git a/examples/gcmc/in.gcmc.h2o b/examples/gcmc/in.gcmc.h2o index 7ffaafa9753b22e949efa55ecd482c9ffd2b6fdc..2c03b1ab78cc9dad8626ef76241e2dae1ab37955 100644 --- a/examples/gcmc/in.gcmc.h2o +++ b/examples/gcmc/in.gcmc.h2o @@ -72,6 +72,15 @@ variable tfac equal 5.0/3.0 # (3 trans + 2 rot)/(3 trans) fix mygcmc all gcmc 100 100 0 0 54341 ${temp} ${mu} ${disp} mol & h2omol tfac_insert ${tfac} group h2o shake wshake +# atom counts + +variable oxygen atom "type==1" +variable hydrogen atom "type==2" +group oxygen dynamic all var oxygen +group hydrogen dynamic all var hydrogen +variable nO equal count(oxygen) +variable nH equal count(hydrogen) + # output variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) @@ -79,7 +88,7 @@ variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) compute_modify thermo_temp dynamic/dof yes -thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc v_nO v_nH thermo 1000 # run diff --git a/examples/gcmc/in.gcmc.lj b/examples/gcmc/in.gcmc.lj index fc9afdb7f837d07984516421ff6e4bb8a5cef9ab..3fe78efb252d2274a03b812f0bd972db2666952c 100644 --- a/examples/gcmc/in.gcmc.lj +++ b/examples/gcmc/in.gcmc.lj @@ -33,6 +33,12 @@ mass * 1.0 fix mygcmc all gcmc 1 100 100 1 29494 ${temp} ${mu} ${disp} +# atom count + +variable type1 atom "type==1" +group type1 dynamic all var type1 +variable n1 equal count(type1) + # averaging variable rho equal density @@ -40,10 +46,11 @@ variable p equal press variable nugget equal 1.0e-8 variable lambda equal 1.0 variable muex equal ${mu}-${temp}*ln(density*${lambda}+${nugget}) -fix ave all ave/time 10 100 1000 v_rho v_p v_muex ave one file rho_vs_p.dat +fix ave all ave/time 10 100 1000 v_rho v_p v_muex v_n1 ave one file rho_vs_p.dat variable rhoav equal f_ave[1] variable pav equal f_ave[2] variable muexav equal f_ave[3] +variable n1av equal f_ave[4] # output @@ -51,7 +58,7 @@ variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+${nugget}) variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+${nugget}) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+${nugget}) compute_modify thermo_temp dynamic yes -thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_rhoav v_pav v_muexav +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_rhoav v_pav v_muexav v_n1av thermo 1000 # run diff --git a/examples/gcmc/log.6Jul17.gcmc.co2.g++.1 b/examples/gcmc/log.23Oct17.gcmc.co2.g++.1 similarity index 81% rename from examples/gcmc/log.6Jul17.gcmc.co2.g++.1 rename to examples/gcmc/log.23Oct17.gcmc.co2.g++.1 index f9e494c43fdcd1ccd557a5366c0a6145daf69be1..e7b7c6afda0f28e4bb4b7c7dd5aaa5b86635d39d 100644 --- a/examples/gcmc/log.6Jul17.gcmc.co2.g++.1 +++ b/examples/gcmc/log.23Oct17.gcmc.co2.g++.1 @@ -1,4 +1,4 @@ -LAMMPS (6 Jul 2017) +LAMMPS (23 Oct 2017) using 1 OpenMP thread(s) per MPI task # GCMC for CO2 molecular fluid, rigid/small/nvt dynamics # Rigid CO2 TraPPE model @@ -46,6 +46,7 @@ Read molecule co2mol: 0 impropers with 0 types create_atoms 0 box mol co2mol 464563 units box Created 24 atoms + Time spent = 0.00196958 secs # rigid CO2 TraPPE model @@ -87,6 +88,17 @@ fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 ${disp} mol fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert 1.66666666666667 group co2 rigid myrigidnvt +# atom counts + +variable carbon atom "type==1" +variable oxygen atom "type==2" +group carbon dynamic all var carbon +dynamic group carbon defined +group oxygen dynamic all var oxygen +dynamic group oxygen defined +variable nC equal count(carbon) +variable nO equal count(oxygen) + # output variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) @@ -94,7 +106,7 @@ variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) compute_modify thermo_temp dynamic/dof yes -thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc v_nC v_nO thermo 1000 # run @@ -124,45 +136,45 @@ Neighbor list info ... stencil: half/bin/3d/newton bin: standard Per MPI rank memory allocation (min/avg/max) = 15.62 | 15.62 | 15.62 Mbytes -Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc - 0 364.27579 4238.8631 -9.6809388 13.391989 0.5846359 24 0 0 0 0 +Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc v_nC v_nO + 0 364.27579 4238.8631 -9.6809388 13.391989 0.5846359 24 0 0 0 0 8 16 WARNING: Using kspace solver on system with no charge (../kspace.cpp:289) - 1000 420.43475 1722.4052 -9.6956123 15.456579 0.5846359 24 0.20879341 0.20713005 0 0 - 2000 302.29516 -547.83641 -22.017674 14.11699 0.73079488 30 0.1742478 0.1678018 0 0 - 3000 316.6934 -1080.2672 -8.2218891 10.069364 0.51155641 21 0.13544917 0.13720634 0 0 - 4000 246.81618 -679.83642 -14.577244 10.29997 0.65771539 27 0.1568939 0.15860229 0 0 - 5000 260.22849 -896.29914 -16.097593 10.859684 0.65771539 27 0.13138744 0.13547049 0 0 - 6000 291.70796 -1521.99 -22.303136 13.622574 0.73079488 30 0.12615476 0.12717694 0 0 - 7000 236.02638 -599.92186 -27.580831 13.367447 0.87695385 36 0.119703 0.12145398 0 0 - 8000 321.45341 688.10577 -10.09204 11.817696 0.5846359 24 0.10917411 0.11032646 0 0 - 9000 502.85382 -302.31056 -0.22330142 0.99927447 0.073079488 3 0.1254105 0.12905828 0 0 - 10000 249.98239 -510.0091 -32.815145 15.399767 0.95003334 39 0.1274504 0.12875623 0 0 - 11000 247.59424 -1129.0274 -25.320205 12.792544 0.80387436 33 0.11739076 0.11916784 0 0 - 12000 0 -20.39554 -0.14872889 -0 0 0 0.1254933 0.12920375 0 0 - 13000 1272.2738 -474.79484 -0.29450485 8.8489483 0.14615898 6 0.13767133 0.14112496 0 0 - 14000 516.54246 -36.296516 -5.0012009 11.291243 0.36539744 15 0.15632744 0.15955377 0 0 - 15000 307.09233 1951.9301 -14.820362 12.815375 0.65771539 27 0.15393544 0.15716192 0 0 - 16000 198.31989 -559.48443 -30.459487 11.231925 0.87695385 36 0.1482565 0.15025652 0 0 - 17000 246.99311 657.85683 -18.579206 11.53442 0.73079488 30 0.14143958 0.14375423 0 0 - 18000 467.13468 167.03738 -1.0945268 5.569759 0.21923846 9 0.13847359 0.14098533 0 0 - 19000 359.54027 -1413.5407 -12.156233 13.217895 0.5846359 24 0.15169146 0.15294205 0 0 - 20000 227.79597 -1204.5652 -23.24144 10.637925 0.73079488 30 0.14917199 0.15022946 0 0 -Loop time of 20.153 on 1 procs for 20000 steps with 30 atoms - -Performance: 85.744 ns/day, 0.280 hours/ns, 992.408 timesteps/s -99.3% CPU use with 1 MPI tasks x 1 OpenMP threads + 1000 420.43475 1722.4052 -9.6956123 15.456579 0.5846359 24 0.20879341 0.20713005 0 0 8 16 + 2000 302.29516 -547.83641 -22.017674 14.11699 0.73079488 30 0.1742478 0.1678018 0 0 10 20 + 3000 316.6934 -1080.2672 -8.2218891 10.069364 0.51155641 21 0.13544917 0.13720634 0 0 7 14 + 4000 246.81618 -679.83642 -14.577244 10.29997 0.65771539 27 0.1568939 0.15860229 0 0 9 18 + 5000 260.22849 -896.29914 -16.097593 10.859684 0.65771539 27 0.13138744 0.13547049 0 0 9 18 + 6000 291.70796 -1521.99 -22.303136 13.622574 0.73079488 30 0.12615476 0.12717694 0 0 10 20 + 7000 236.02638 -599.92186 -27.580831 13.367447 0.87695385 36 0.119703 0.12145398 0 0 12 24 + 8000 321.45341 688.10577 -10.09204 11.817696 0.5846359 24 0.10917411 0.11032646 0 0 8 16 + 9000 502.85382 -302.31056 -0.22330142 0.99927447 0.073079488 3 0.1254105 0.12905828 0 0 1 2 + 10000 249.98239 -510.0091 -32.815145 15.399767 0.95003334 39 0.1274504 0.12875623 0 0 13 26 + 11000 247.59424 -1129.0274 -25.320205 12.792544 0.80387436 33 0.11739076 0.11916784 0 0 11 22 + 12000 0 -20.39554 -0.14872889 -0 0 0 0.1254933 0.12920375 0 0 0 0 + 13000 1272.2738 -474.79484 -0.29450485 8.8489483 0.14615898 6 0.13767133 0.14112496 0 0 2 4 + 14000 516.54246 -36.296516 -5.0012009 11.291243 0.36539744 15 0.15632744 0.15955377 0 0 5 10 + 15000 307.09233 1951.9301 -14.820362 12.815375 0.65771539 27 0.15393544 0.15716192 0 0 9 18 + 16000 198.31989 -559.48443 -30.459487 11.231925 0.87695385 36 0.1482565 0.15025652 0 0 12 24 + 17000 246.99311 657.85683 -18.579206 11.53442 0.73079488 30 0.14143958 0.14375423 0 0 10 20 + 18000 467.13468 167.03738 -1.0945268 5.569759 0.21923846 9 0.13847359 0.14098533 0 0 3 6 + 19000 359.54027 -1413.5407 -12.156233 13.217895 0.5846359 24 0.15169146 0.15294205 0 0 8 16 + 20000 227.79597 -1204.5652 -23.24144 10.637925 0.73079488 30 0.14917199 0.15022946 0 0 10 20 +Loop time of 20.6928 on 1 procs for 20000 steps with 30 atoms + +Performance: 83.507 ns/day, 0.287 hours/ns, 966.519 timesteps/s +99.2% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 2.5352 | 2.5352 | 2.5352 | 0.0 | 12.58 -Bond | 0.026112 | 0.026112 | 0.026112 | 0.0 | 0.13 -Kspace | 0.25 | 0.25 | 0.25 | 0.0 | 1.24 -Neigh | 0.10364 | 0.10364 | 0.10364 | 0.0 | 0.51 -Comm | 0.22907 | 0.22907 | 0.22907 | 0.0 | 1.14 -Output | 0.0013065 | 0.0013065 | 0.0013065 | 0.0 | 0.01 -Modify | 16.957 | 16.957 | 16.957 | 0.0 | 84.14 -Other | | 0.05061 | | | 0.25 +Pair | 2.5462 | 2.5462 | 2.5462 | 0.0 | 12.30 +Bond | 0.029783 | 0.029783 | 0.029783 | 0.0 | 0.14 +Kspace | 0.26167 | 0.26167 | 0.26167 | 0.0 | 1.26 +Neigh | 0.10705 | 0.10705 | 0.10705 | 0.0 | 0.52 +Comm | 0.23409 | 0.23409 | 0.23409 | 0.0 | 1.13 +Output | 0.0013416 | 0.0013416 | 0.0013416 | 0.0 | 0.01 +Modify | 17.458 | 17.458 | 17.458 | 0.0 | 84.37 +Other | | 0.05433 | | | 0.26 Nlocal: 30 ave 30 max 30 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/gcmc/log.6Jul17.gcmc.co2.g++.4 b/examples/gcmc/log.23Oct17.gcmc.co2.g++.4 similarity index 81% rename from examples/gcmc/log.6Jul17.gcmc.co2.g++.4 rename to examples/gcmc/log.23Oct17.gcmc.co2.g++.4 index 0df25430d26c07b98085c31a2ac162c90f5bd763..b344c7b0685eb0e19deb26e363e9cc43cd292f24 100644 --- a/examples/gcmc/log.6Jul17.gcmc.co2.g++.4 +++ b/examples/gcmc/log.23Oct17.gcmc.co2.g++.4 @@ -1,4 +1,4 @@ -LAMMPS (6 Jul 2017) +LAMMPS (23 Oct 2017) using 1 OpenMP thread(s) per MPI task # GCMC for CO2 molecular fluid, rigid/small/nvt dynamics # Rigid CO2 TraPPE model @@ -46,6 +46,7 @@ Read molecule co2mol: 0 impropers with 0 types create_atoms 0 box mol co2mol 464563 units box Created 24 atoms + Time spent = 0.00261331 secs # rigid CO2 TraPPE model @@ -87,6 +88,17 @@ fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 ${disp} mol fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert 1.66666666666667 group co2 rigid myrigidnvt +# atom counts + +variable carbon atom "type==1" +variable oxygen atom "type==2" +group carbon dynamic all var carbon +dynamic group carbon defined +group oxygen dynamic all var oxygen +dynamic group oxygen defined +variable nC equal count(carbon) +variable nO equal count(oxygen) + # output variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) @@ -94,7 +106,7 @@ variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) compute_modify thermo_temp dynamic/dof yes -thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc v_nC v_nO thermo 1000 # run @@ -124,45 +136,45 @@ Neighbor list info ... stencil: half/bin/3d/newton bin: standard Per MPI rank memory allocation (min/avg/max) = 15.41 | 15.41 | 15.41 Mbytes -Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc - 0 386.52184 23582.465 -3.2433417 14.209828 0.5846359 24 0 0 0 0 +Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc v_nC v_nO + 0 386.52184 23582.465 -3.2433417 14.209828 0.5846359 24 0 0 0 0 8 16 WARNING: Using kspace solver on system with no charge (../kspace.cpp:289) - 1000 335.66829 -3.7743052 -4.6268612 7.3374649 0.36539744 15 0.20601899 0.20787963 0 0 - 2000 459.73529 238.91592 -0.42937831 5.4815343 0.21923846 9 0.30392058 0.30105616 0 0 - 3000 255.47773 -479.67802 -36.850434 15.738299 0.95003334 39 0.22220744 0.2197582 0 0 - 4000 182.70803 -1059.2262 -43.044833 12.163134 1.0231128 42 0.16781689 0.16716177 0 0 - 5000 234.00907 -1821.0444 -46.04795 15.578317 1.0231128 42 0.13498091 0.13704201 0 0 - 6000 163.42759 -774.67294 -49.686261 11.691518 1.0961923 45 0.11401677 0.11296973 0 0 - 7000 171.64616 -355.23516 -49.323434 12.27947 1.0961923 45 0.098302308 0.098552065 0 0 - 8000 251.29791 -905.47863 -37.841209 15.480807 0.95003334 39 0.086856972 0.08638658 0 0 - 9000 143.69498 -849.95393 -49.073188 10.279858 1.0961923 45 0.078261061 0.077955243 0 0 - 10000 239.35727 -1158.1879 -43.562047 15.934355 1.0231128 42 0.070789792 0.070807529 0 0 - 11000 169.51213 -1574.7885 -51.125228 12.126803 1.0961923 45 0.065008734 0.06498871 0 0 - 12000 181.39739 160.11631 -46.850937 12.977068 1.0961923 45 0.059648717 0.059514803 0 0 - 13000 164.14601 -1107.7629 -50.726722 11.742914 1.0961923 45 0.055207333 0.055097701 0 0 - 14000 287.26285 418.51463 -41.664766 19.123497 1.0231128 42 0.051346789 0.051222285 0 0 - 15000 256.94593 -532.36615 -41.651618 17.105257 1.0231128 42 0.047870301 0.047861685 0 0 - 16000 166.92132 151.15933 -39.957018 11.11219 1.0231128 42 0.045205599 0.045042211 0 0 - 17000 163.22452 -1299.8119 -42.677558 10.866089 1.0231128 42 0.043122086 0.042993687 0 0 - 18000 158.01154 475.77329 -48.803162 11.304057 1.0961923 45 0.041016683 0.040647229 0 0 - 19000 138.49297 -1585.1508 -47.517099 9.9077098 1.0961923 45 0.038929287 0.038436764 0 0 - 20000 173.84439 -1362.6301 -53.002743 12.436731 1.0961923 45 0.036973919 0.036523816 0 0 -Loop time of 31.8386 on 4 procs for 20000 steps with 45 atoms - -Performance: 54.274 ns/day, 0.442 hours/ns, 628.168 timesteps/s -98.5% CPU use with 4 MPI tasks x 1 OpenMP threads + 1000 335.66829 -3.7743052 -4.6268612 7.3374649 0.36539744 15 0.20601899 0.20787963 0 0 5 10 + 2000 459.73529 238.91592 -0.42937831 5.4815343 0.21923846 9 0.30392058 0.30105616 0 0 3 6 + 3000 255.47773 -479.67802 -36.850434 15.738299 0.95003334 39 0.22220744 0.2197582 0 0 13 26 + 4000 182.70803 -1059.2262 -43.044833 12.163134 1.0231128 42 0.16781689 0.16716177 0 0 14 28 + 5000 234.00907 -1821.0444 -46.04795 15.578317 1.0231128 42 0.13498091 0.13704201 0 0 14 28 + 6000 163.42759 -774.67294 -49.686261 11.691518 1.0961923 45 0.11401677 0.11296973 0 0 15 30 + 7000 171.64616 -355.23516 -49.323434 12.27947 1.0961923 45 0.098302308 0.098552065 0 0 15 30 + 8000 251.29791 -905.47863 -37.841209 15.480807 0.95003334 39 0.086856972 0.08638658 0 0 13 26 + 9000 143.69498 -849.95393 -49.073188 10.279858 1.0961923 45 0.078261061 0.077955243 0 0 15 30 + 10000 239.35727 -1158.1879 -43.562047 15.934355 1.0231128 42 0.070789792 0.070807529 0 0 14 28 + 11000 169.51213 -1574.7885 -51.125228 12.126803 1.0961923 45 0.065008734 0.06498871 0 0 15 30 + 12000 181.39739 160.11631 -46.850937 12.977068 1.0961923 45 0.059648717 0.059514803 0 0 15 30 + 13000 164.14601 -1107.7629 -50.726722 11.742914 1.0961923 45 0.055207333 0.055097701 0 0 15 30 + 14000 287.26285 418.51463 -41.664766 19.123497 1.0231128 42 0.051346789 0.051222285 0 0 14 28 + 15000 256.94593 -532.36615 -41.651618 17.105257 1.0231128 42 0.047870301 0.047861685 0 0 14 28 + 16000 166.92132 151.15933 -39.957018 11.11219 1.0231128 42 0.045205599 0.045042211 0 0 14 28 + 17000 163.22452 -1299.8119 -42.677558 10.866089 1.0231128 42 0.043122086 0.042993687 0 0 14 28 + 18000 158.01154 475.77329 -48.803162 11.304057 1.0961923 45 0.041016683 0.040647229 0 0 15 30 + 19000 138.49297 -1585.1508 -47.517099 9.9077098 1.0961923 45 0.038929287 0.038436764 0 0 15 30 + 20000 173.84439 -1362.6301 -53.002743 12.436731 1.0961923 45 0.036973919 0.036523816 0 0 15 30 +Loop time of 32.4481 on 4 procs for 20000 steps with 45 atoms + +Performance: 53.254 ns/day, 0.451 hours/ns, 616.369 timesteps/s +98.4% CPU use with 4 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 1.1546 | 1.6687 | 2.1338 | 29.5 | 5.24 -Bond | 0.019769 | 0.020369 | 0.02132 | 0.4 | 0.06 -Kspace | 0.53392 | 0.99911 | 1.5116 | 37.8 | 3.14 -Neigh | 0.06737 | 0.067842 | 0.068412 | 0.2 | 0.21 -Comm | 1.9408 | 1.9582 | 1.9733 | 1.1 | 6.15 -Output | 0.0019503 | 0.0020472 | 0.0022476 | 0.3 | 0.01 -Modify | 26.974 | 26.99 | 27.001 | 0.2 | 84.77 -Other | | 0.1322 | | | 0.42 +Pair | 1.1687 | 1.6702 | 2.1864 | 30.8 | 5.15 +Bond | 0.018828 | 0.020035 | 0.020975 | 0.6 | 0.06 +Kspace | 0.57506 | 1.0931 | 1.5898 | 37.7 | 3.37 +Neigh | 0.068863 | 0.069524 | 0.070128 | 0.2 | 0.21 +Comm | 2.0735 | 2.0865 | 2.0979 | 0.7 | 6.43 +Output | 0.0025017 | 0.0025966 | 0.0027781 | 0.2 | 0.01 +Modify | 27.335 | 27.344 | 27.363 | 0.2 | 84.27 +Other | | 0.1621 | | | 0.50 Nlocal: 11.25 ave 14 max 8 min Histogram: 1 0 0 0 0 1 1 0 0 1 @@ -177,4 +189,4 @@ Ave special neighs/atom = 2 Neighbor list builds = 20394 Dangerous builds = 0 -Total wall time: 0:00:31 +Total wall time: 0:00:32 diff --git a/examples/gcmc/log.6Jul17.gcmc.h2o.g++.1 b/examples/gcmc/log.23Oct17.gcmc.h2o.g++.1 similarity index 77% rename from examples/gcmc/log.6Jul17.gcmc.h2o.g++.1 rename to examples/gcmc/log.23Oct17.gcmc.h2o.g++.1 index 3b1606e65d6979747e5f22a920e516f691e052c2..bc7c3af4541e58dea45b6d917d2645a2fb64683c 100644 --- a/examples/gcmc/log.6Jul17.gcmc.h2o.g++.1 +++ b/examples/gcmc/log.23Oct17.gcmc.h2o.g++.1 @@ -1,4 +1,4 @@ -LAMMPS (6 Jul 2017) +LAMMPS (23 Oct 2017) using 1 OpenMP thread(s) per MPI task # fix gcmc example with fix shake @@ -51,6 +51,7 @@ Read molecule h2omol: 0 impropers with 0 types create_atoms 0 box mol h2omol 464563 units box Created 24 atoms + Time spent = 0.00201297 secs # rigid SPC/E water model @@ -100,9 +101,9 @@ Per MPI rank memory allocation (min/avg/max) = 11.88 | 11.88 | 11.88 Mbytes Step Temp E_pair E_mol TotEng Press 0 338 -4.1890564 9.2628112e-06 18.98377 739.06991 100 338 -30.182886 0.85607237 -6.1539961 -2535.3207 -Loop time of 0.0525794 on 1 procs for 100 steps with 24 atoms +Loop time of 0.0507543 on 1 procs for 100 steps with 24 atoms -99.4% CPU use with 1 MPI tasks x 1 OpenMP threads +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads Minimization stats: Stopping criterion = max iterations @@ -116,14 +117,14 @@ Minimization stats: MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.044199 | 0.044199 | 0.044199 | 0.0 | 84.06 -Bond | 0.00049019 | 0.00049019 | 0.00049019 | 0.0 | 0.93 -Kspace | 0.0031631 | 0.0031631 | 0.0031631 | 0.0 | 6.02 -Neigh | 0.00046444 | 0.00046444 | 0.00046444 | 0.0 | 0.88 -Comm | 0.0034101 | 0.0034101 | 0.0034101 | 0.0 | 6.49 -Output | 1.9073e-05 | 1.9073e-05 | 1.9073e-05 | 0.0 | 0.04 +Pair | 0.042597 | 0.042597 | 0.042597 | 0.0 | 83.93 +Bond | 0.00047708 | 0.00047708 | 0.00047708 | 0.0 | 0.94 +Kspace | 0.0031135 | 0.0031135 | 0.0031135 | 0.0 | 6.13 +Neigh | 0.00045919 | 0.00045919 | 0.00045919 | 0.0 | 0.90 +Comm | 0.0032997 | 0.0032997 | 0.0032997 | 0.0 | 6.50 +Output | 1.359e-05 | 1.359e-05 | 1.359e-05 | 0.0 | 0.03 Modify | 0 | 0 | 0 | 0.0 | 0.00 -Other | | 0.0008333 | | | 1.58 +Other | | 0.0007946 | | | 1.57 Nlocal: 24 ave 24 max 24 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -164,22 +165,22 @@ Per MPI rank memory allocation (min/avg/max) = 11.63 | 11.63 | 11.63 Mbytes Step Temp E_pair E_mol TotEng Press 0 518.26667 -30.182886 0 -7.0100684 993.1985 1000 326.9865 -62.258445 0 -47.638175 -5.3440813 -Loop time of 0.14263 on 1 procs for 1000 steps with 24 atoms +Loop time of 0.141449 on 1 procs for 1000 steps with 24 atoms -Performance: 605.764 ns/day, 0.040 hours/ns, 7011.155 timesteps/s -99.5% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 610.819 ns/day, 0.039 hours/ns, 7069.663 timesteps/s +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.10849 | 0.10849 | 0.10849 | 0.0 | 76.07 -Bond | 0.00015426 | 0.00015426 | 0.00015426 | 0.0 | 0.11 -Kspace | 0.01205 | 0.01205 | 0.01205 | 0.0 | 8.45 -Neigh | 0.0046577 | 0.0046577 | 0.0046577 | 0.0 | 3.27 -Comm | 0.011531 | 0.011531 | 0.011531 | 0.0 | 8.08 -Output | 1.6212e-05 | 1.6212e-05 | 1.6212e-05 | 0.0 | 0.01 -Modify | 0.0037699 | 0.0037699 | 0.0037699 | 0.0 | 2.64 -Other | | 0.001957 | | | 1.37 +Pair | 0.10788 | 0.10788 | 0.10788 | 0.0 | 76.27 +Bond | 0.00018954 | 0.00018954 | 0.00018954 | 0.0 | 0.13 +Kspace | 0.011867 | 0.011867 | 0.011867 | 0.0 | 8.39 +Neigh | 0.0045254 | 0.0045254 | 0.0045254 | 0.0 | 3.20 +Comm | 0.011277 | 0.011277 | 0.011277 | 0.0 | 7.97 +Output | 1.5497e-05 | 1.5497e-05 | 1.5497e-05 | 0.0 | 0.01 +Modify | 0.00383 | 0.00383 | 0.00383 | 0.0 | 2.71 +Other | | 0.001868 | | | 1.32 Nlocal: 24 ave 24 max 24 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -201,6 +202,17 @@ fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 ${disp} mol fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol h2omol tfac_insert ${tfac} group h2o shake wshake fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol h2omol tfac_insert 1.66666666666667 group h2o shake wshake +# atom counts + +variable oxygen atom "type==1" +variable hydrogen atom "type==2" +group oxygen dynamic all var oxygen +dynamic group oxygen defined +group hydrogen dynamic all var hydrogen +dynamic group hydrogen defined +variable nO equal count(oxygen) +variable nH equal count(hydrogen) + # output variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) @@ -208,7 +220,7 @@ variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) compute_modify thermo_temp dynamic/dof yes -thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc v_nO v_nH thermo 1000 # run @@ -226,44 +238,44 @@ WARNING: Fix gcmc using full_energy option (../fix_gcmc.cpp:445) 0 atoms in group FixGCMC:rotation_gas_atoms:mygcmc WARNING: Neighbor exclusions used with KSpace solver may give inconsistent Coulombic energies (../neighbor.cpp:472) Per MPI rank memory allocation (min/avg/max) = 11.63 | 11.63 | 11.63 Mbytes -Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc - 1000 326.9865 -4.3509713 -62.258445 14.62027 0.23910963 24 0 0 0 0 - 2000 116.99793 -5344.1527 -286.61595 17.088682 0.74721761 75 0.048183096 0.013941446 0 0 - 3000 106.86746 -3920.4926 -361.60598 18.794545 0.89666113 90 0.035637919 0.012768883 0 0 - 4000 75.002668 540.46846 -414.8511 14.531966 0.98632724 99 0.025963651 0.0093451705 0 0 - 5000 79.924788 -2131.1173 -437.21216 15.962121 1.0162159 102 0.019879728 0.0070418993 0 0 - 6000 95.552773 -3647.0233 -438.24276 19.083253 1.0162159 102 0.015753613 0.0056885133 0 0 - 7000 79.501736 -2071.5369 -440.77351 15.877631 1.0162159 102 0.01326216 0.0046915318 0 0 - 8000 62.567091 -3102.9616 -442.21884 12.495541 1.0162159 102 0.011305503 0.0040437885 0 0 - 9000 68.324047 -3812.7866 -440.46835 13.645287 1.0162159 102 0.0099549538 0.0035157329 0 0 - 10000 83.857631 -2158.2659 -444.8183 16.747566 1.0162159 102 0.0088200922 0.0031354281 0 0 - 11000 68.350984 -2084.0789 -440.14081 13.650667 1.0162159 102 0.0081331455 0.0030247424 0 0 - 12000 76.867315 -1585.6723 -443.36199 15.3515 1.0162159 102 0.0073845932 0.0027532534 0 0 - 13000 59.74266 -2211.0211 -446.07791 11.931462 1.0162159 102 0.0067756276 0.0025213898 0 0 - 14000 81.154979 -907.0176 -441.53368 16.207808 1.0162159 102 0.0062527642 0.0023280719 0 0 - 15000 66.814346 -2804.5134 -455.80704 13.7421 1.0461046 105 0.0059590528 0.0021576214 0 0 - 16000 71.42983 -3930.4004 -458.43218 14.691394 1.0461046 105 0.0055547473 0.0020163729 0 0 - 17000 89.624855 -3569.8136 -455.18164 18.433672 1.0461046 105 0.0052173265 0.0018867687 0 0 - 18000 63.519962 -1882.8157 -456.58939 13.064525 1.0461046 105 0.0049082049 0.0017765986 0 0 - 19000 71.872698 -2243.5046 -454.93359 14.782481 1.0461046 105 0.0046439115 0.0016748361 0 0 - 20000 73.660765 -2285.3173 -476.35473 15.589381 1.0759934 108 0.0045124933 0.0015837653 0 0 - 21000 95.675868 987.92089 -475.46736 20.248603 1.0759934 108 0.004285814 0.0015049513 0 0 -Loop time of 226.155 on 1 procs for 20000 steps with 108 atoms - -Performance: 7.641 ns/day, 3.141 hours/ns, 88.435 timesteps/s -99.2% CPU use with 1 MPI tasks x 1 OpenMP threads +Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc v_nO v_nH + 1000 326.9865 -4.3509713 -62.258445 14.62027 0.23910963 24 0 0 0 0 8 16 + 2000 116.99793 -5344.1527 -286.61595 17.088682 0.74721761 75 0.048183096 0.013941446 0 0 25 50 + 3000 106.86746 -3920.4926 -361.60598 18.794545 0.89666113 90 0.035637919 0.012768883 0 0 30 60 + 4000 75.002668 540.46846 -414.8511 14.531966 0.98632724 99 0.025963651 0.0093451705 0 0 33 66 + 5000 79.924788 -2131.1173 -437.21216 15.962121 1.0162159 102 0.019879728 0.0070418993 0 0 34 68 + 6000 95.552773 -3647.0233 -438.24276 19.083253 1.0162159 102 0.015753613 0.0056885133 0 0 34 68 + 7000 79.501736 -2071.5369 -440.77351 15.877631 1.0162159 102 0.01326216 0.0046915318 0 0 34 68 + 8000 62.567091 -3102.9616 -442.21884 12.495541 1.0162159 102 0.011305503 0.0040437885 0 0 34 68 + 9000 68.324047 -3812.7866 -440.46835 13.645287 1.0162159 102 0.0099549538 0.0035157329 0 0 34 68 + 10000 83.857631 -2158.2659 -444.8183 16.747566 1.0162159 102 0.0088200922 0.0031354281 0 0 34 68 + 11000 68.350984 -2084.0789 -440.14081 13.650667 1.0162159 102 0.0081331455 0.0030247424 0 0 34 68 + 12000 76.867315 -1585.6723 -443.36199 15.3515 1.0162159 102 0.0073845932 0.0027532534 0 0 34 68 + 13000 59.74266 -2211.0211 -446.07791 11.931462 1.0162159 102 0.0067756276 0.0025213898 0 0 34 68 + 14000 81.154979 -907.0176 -441.53368 16.207808 1.0162159 102 0.0062527642 0.0023280719 0 0 34 68 + 15000 66.814346 -2804.5134 -455.80704 13.7421 1.0461046 105 0.0059590528 0.0021576214 0 0 35 70 + 16000 71.42983 -3930.4004 -458.43218 14.691394 1.0461046 105 0.0055547473 0.0020163729 0 0 35 70 + 17000 89.624855 -3569.8136 -455.18164 18.433672 1.0461046 105 0.0052173265 0.0018867687 0 0 35 70 + 18000 63.519962 -1882.8157 -456.58939 13.064525 1.0461046 105 0.0049082049 0.0017765986 0 0 35 70 + 19000 71.872698 -2243.5046 -454.93359 14.782481 1.0461046 105 0.0046439115 0.0016748361 0 0 35 70 + 20000 73.660765 -2285.3173 -476.35473 15.589381 1.0759934 108 0.0045124933 0.0015837653 0 0 36 72 + 21000 95.675868 987.92089 -475.46736 20.248603 1.0759934 108 0.004285814 0.0015049513 0 0 36 72 +Loop time of 220.662 on 1 procs for 20000 steps with 108 atoms + +Performance: 7.831 ns/day, 3.065 hours/ns, 90.637 timesteps/s +99.6% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 38.053 | 38.053 | 38.053 | 0.0 | 16.83 -Bond | 0.089673 | 0.089673 | 0.089673 | 0.0 | 0.04 -Kspace | 0.92778 | 0.92778 | 0.92778 | 0.0 | 0.41 -Neigh | 1.2619 | 1.2619 | 1.2619 | 0.0 | 0.56 -Comm | 0.97483 | 0.97483 | 0.97483 | 0.0 | 0.43 -Output | 0.0013306 | 0.0013306 | 0.0013306 | 0.0 | 0.00 -Modify | 184.68 | 184.68 | 184.68 | 0.0 | 81.66 -Other | | 0.171 | | | 0.08 +Pair | 37.459 | 37.459 | 37.459 | 0.0 | 16.98 +Bond | 0.087067 | 0.087067 | 0.087067 | 0.0 | 0.04 +Kspace | 0.90234 | 0.90234 | 0.90234 | 0.0 | 0.41 +Neigh | 1.2299 | 1.2299 | 1.2299 | 0.0 | 0.56 +Comm | 0.95437 | 0.95437 | 0.95437 | 0.0 | 0.43 +Output | 0.0010636 | 0.0010636 | 0.0010636 | 0.0 | 0.00 +Modify | 179.85 | 179.85 | 179.85 | 0.0 | 81.51 +Other | | 0.1754 | | | 0.08 Nlocal: 108 ave 108 max 108 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -278,4 +290,4 @@ Ave special neighs/atom = 2 Neighbor list builds = 20439 Dangerous builds = 0 -Total wall time: 0:03:46 +Total wall time: 0:03:40 diff --git a/examples/gcmc/log.6Jul17.gcmc.h2o.g++.4 b/examples/gcmc/log.23Oct17.gcmc.h2o.g++.4 similarity index 78% rename from examples/gcmc/log.6Jul17.gcmc.h2o.g++.4 rename to examples/gcmc/log.23Oct17.gcmc.h2o.g++.4 index c04b25f45eaf88e4a82ca9edd8b79a0ec9ac934c..4eeab969ddc8c578baf028394c39064f2b57aace 100644 --- a/examples/gcmc/log.6Jul17.gcmc.h2o.g++.4 +++ b/examples/gcmc/log.23Oct17.gcmc.h2o.g++.4 @@ -1,4 +1,4 @@ -LAMMPS (6 Jul 2017) +LAMMPS (23 Oct 2017) using 1 OpenMP thread(s) per MPI task # fix gcmc example with fix shake @@ -51,6 +51,7 @@ Read molecule h2omol: 0 impropers with 0 types create_atoms 0 box mol h2omol 464563 units box Created 24 atoms + Time spent = 0.00174451 secs # rigid SPC/E water model @@ -100,9 +101,9 @@ Per MPI rank memory allocation (min/avg/max) = 11.85 | 11.85 | 11.85 Mbytes Step Temp E_pair E_mol TotEng Press 0 338 -4.9610706 9.2628112e-06 18.211756 730.90791 100 338 -15.742442 0.14954269 7.579918 -637.49568 -Loop time of 0.0828406 on 4 procs for 100 steps with 24 atoms +Loop time of 0.0566185 on 4 procs for 100 steps with 24 atoms -98.7% CPU use with 4 MPI tasks x 1 OpenMP threads +98.8% CPU use with 4 MPI tasks x 1 OpenMP threads Minimization stats: Stopping criterion = max iterations @@ -116,14 +117,14 @@ Minimization stats: MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.012844 | 0.025471 | 0.047008 | 8.1 | 30.75 -Bond | 0.00038934 | 0.00046468 | 0.00054336 | 0.0 | 0.56 -Kspace | 0.0061138 | 0.027556 | 0.04014 | 7.8 | 33.26 +Pair | 0.0085177 | 0.016083 | 0.026787 | 5.3 | 28.41 +Bond | 0.00022459 | 0.00031394 | 0.00037575 | 0.0 | 0.55 +Kspace | 0.0049062 | 0.014122 | 0.02044 | 5.0 | 24.94 Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.023276 | 0.023636 | 0.023804 | 0.1 | 28.53 -Output | 3.171e-05 | 3.3557e-05 | 3.8147e-05 | 0.0 | 0.04 +Comm | 0.018515 | 0.02086 | 0.023246 | 1.2 | 36.84 +Output | 2.4796e-05 | 2.6047e-05 | 2.9802e-05 | 0.0 | 0.05 Modify | 0 | 0 | 0 | 0.0 | 0.00 -Other | | 0.00568 | | | 6.86 +Other | | 0.005213 | | | 9.21 Nlocal: 6 ave 8 max 3 min Histogram: 1 0 0 0 1 0 0 0 0 2 @@ -164,22 +165,22 @@ Per MPI rank memory allocation (min/avg/max) = 11.6 | 11.6 | 11.6 Mbytes Step Temp E_pair E_mol TotEng Press 0 518.26667 -15.742442 0 7.4303753 -613.0781 1000 369.81793 -54.202686 0 -37.667331 294.98823 -Loop time of 0.199641 on 4 procs for 1000 steps with 24 atoms +Loop time of 0.154891 on 4 procs for 1000 steps with 24 atoms -Performance: 432.777 ns/day, 0.055 hours/ns, 5008.996 timesteps/s -98.5% CPU use with 4 MPI tasks x 1 OpenMP threads +Performance: 557.810 ns/day, 0.043 hours/ns, 6456.135 timesteps/s +98.3% CPU use with 4 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.017161 | 0.034988 | 0.05833 | 8.0 | 17.53 -Bond | 0.00017357 | 0.00021374 | 0.00027347 | 0.0 | 0.11 -Kspace | 0.018025 | 0.044624 | 0.065613 | 8.4 | 22.35 -Neigh | 0.0029755 | 0.0033154 | 0.0036366 | 0.6 | 1.66 -Comm | 0.059933 | 0.06537 | 0.070709 | 1.5 | 32.74 -Output | 3.4571e-05 | 3.6657e-05 | 4.22e-05 | 0.0 | 0.02 -Modify | 0.043458 | 0.045628 | 0.04767 | 0.9 | 22.86 -Other | | 0.005465 | | | 2.74 +Pair | 0.0154 | 0.028993 | 0.040525 | 5.5 | 18.72 +Bond | 0.00016999 | 0.0001902 | 0.00023293 | 0.0 | 0.12 +Kspace | 0.019093 | 0.028112 | 0.038976 | 4.3 | 18.15 +Neigh | 0.0020263 | 0.0022184 | 0.002408 | 0.4 | 1.43 +Comm | 0.04947 | 0.053627 | 0.058009 | 1.4 | 34.62 +Output | 2.5749e-05 | 2.7537e-05 | 3.2187e-05 | 0.0 | 0.02 +Modify | 0.035275 | 0.036815 | 0.038425 | 0.7 | 23.77 +Other | | 0.004909 | | | 3.17 Nlocal: 6 ave 8 max 3 min Histogram: 1 0 0 0 0 0 1 0 1 1 @@ -201,6 +202,17 @@ fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 ${disp} mol fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol h2omol tfac_insert ${tfac} group h2o shake wshake fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol h2omol tfac_insert 1.66666666666667 group h2o shake wshake +# atom counts + +variable oxygen atom "type==1" +variable hydrogen atom "type==2" +group oxygen dynamic all var oxygen +dynamic group oxygen defined +group hydrogen dynamic all var hydrogen +dynamic group hydrogen defined +variable nO equal count(oxygen) +variable nH equal count(hydrogen) + # output variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) @@ -208,7 +220,7 @@ variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) compute_modify thermo_temp dynamic/dof yes -thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc v_nO v_nH thermo 1000 # run @@ -226,44 +238,44 @@ WARNING: Fix gcmc using full_energy option (../fix_gcmc.cpp:445) 0 atoms in group FixGCMC:rotation_gas_atoms:mygcmc WARNING: Neighbor exclusions used with KSpace solver may give inconsistent Coulombic energies (../neighbor.cpp:472) Per MPI rank memory allocation (min/avg/max) = 11.6 | 11.6 | 11.6 Mbytes -Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc - 1000 369.81793 295.32434 -54.202686 16.535355 0.23910963 24 0 0 0 0 - 2000 84.544466 -2810.9047 -344.81664 14.364627 0.86677242 87 0.052198354 0.0099581757 0 0 - 3000 75.188527 -3688.256 -425.02228 14.567977 0.98632724 99 0.030546787 0.0049111089 0 0 - 4000 75.019396 -5669.3063 -427.69454 14.535207 0.98632724 99 0.019972039 0.0033375609 0 0 - 5000 90.415371 -2141.7596 -434.65925 17.518218 0.98632724 99 0.014909796 0.002514964 0 0 - 6000 78.212628 -943.75125 -428.80584 15.153904 0.98632724 99 0.01181521 0.0020316119 0 0 - 7000 71.754139 -2028.5122 -435.2139 13.902555 0.98632724 99 0.0099466198 0.0016755471 0 0 - 8000 84.446231 -1969.1657 -428.27313 16.361681 0.98632724 99 0.0084791272 0.0014442102 0 0 - 9000 70.952348 -2476.9812 -446.33824 14.170197 1.0162159 102 0.0077150892 0.0012556189 0 0 - 10000 71.418543 -1875.7083 -443.7214 14.263302 1.0162159 102 0.0068355714 0.0011197957 0 0 - 11000 86.094994 -4508.7581 -444.82687 17.194399 1.0162159 102 0.0061494515 0.0010082475 0 0 - 12000 81.906091 -1547.8105 -442.36719 16.357815 1.0162159 102 0.0055834729 0.00091775114 0 0 - 13000 57.221548 -4607.6222 -448.30939 11.42796 1.0162159 102 0.0051230355 0.00084046326 0 0 - 14000 61.288344 -2518.1779 -445.70636 12.240157 1.0162159 102 0.0047276997 0.00077602396 0 0 - 15000 85.787669 -2407.7111 -443.3834 17.133022 1.0162159 102 0.0043983485 0.00071920715 0 0 - 16000 74.845939 -3288.3403 -445.8247 14.947802 1.0162159 102 0.0042321884 0.00080654918 0 0 - 17000 73.835431 -1926.9566 -445.67476 14.745989 1.0162159 102 0.0039751059 0.00075470749 0 0 - 18000 72.634985 -3997.552 -447.2351 14.506243 1.0162159 102 0.0037395847 0.00071063946 0 0 - 19000 96.776472 -714.44132 -453.65552 19.904587 1.0461046 105 0.0036487876 0.00066993446 0 0 - 20000 75.470786 183.16972 -464.04688 15.522521 1.0461046 105 0.0034630763 0.00063350614 0 0 - 21000 65.658309 -773.41266 -466.27068 13.504331 1.0461046 105 0.003289113 0.00060198052 0 0 -Loop time of 93.8859 on 4 procs for 20000 steps with 105 atoms - -Performance: 18.405 ns/day, 1.304 hours/ns, 213.024 timesteps/s +Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc v_nO v_nH + 1000 369.81793 295.32434 -54.202686 16.535355 0.23910963 24 0 0 0 0 8 16 + 2000 84.544466 -2810.9047 -344.81664 14.364627 0.86677242 87 0.052198354 0.0099581757 0 0 29 58 + 3000 75.188527 -3688.256 -425.02228 14.567977 0.98632724 99 0.030546787 0.0049111089 0 0 33 66 + 4000 75.019396 -5669.3063 -427.69454 14.535207 0.98632724 99 0.019972039 0.0033375609 0 0 33 66 + 5000 90.415371 -2141.7596 -434.65925 17.518218 0.98632724 99 0.014909796 0.002514964 0 0 33 66 + 6000 78.212628 -943.75125 -428.80584 15.153904 0.98632724 99 0.01181521 0.0020316119 0 0 33 66 + 7000 71.754139 -2028.5122 -435.2139 13.902555 0.98632724 99 0.0099466198 0.0016755471 0 0 33 66 + 8000 84.446231 -1969.1657 -428.27313 16.361681 0.98632724 99 0.0084791272 0.0014442102 0 0 33 66 + 9000 70.952348 -2476.9812 -446.33824 14.170197 1.0162159 102 0.0077150892 0.0012556189 0 0 34 68 + 10000 71.418543 -1875.7083 -443.7214 14.263302 1.0162159 102 0.0068355714 0.0011197957 0 0 34 68 + 11000 86.094994 -4508.7581 -444.82687 17.194399 1.0162159 102 0.0061494515 0.0010082475 0 0 34 68 + 12000 81.906091 -1547.8105 -442.36719 16.357815 1.0162159 102 0.0055834729 0.00091775114 0 0 34 68 + 13000 57.221548 -4607.6222 -448.30939 11.42796 1.0162159 102 0.0051230355 0.00084046326 0 0 34 68 + 14000 61.288344 -2518.1779 -445.70636 12.240157 1.0162159 102 0.0047276997 0.00077602396 0 0 34 68 + 15000 85.787669 -2407.7111 -443.3834 17.133022 1.0162159 102 0.0043983485 0.00071920715 0 0 34 68 + 16000 74.845939 -3288.3403 -445.8247 14.947802 1.0162159 102 0.0042321884 0.00080654918 0 0 34 68 + 17000 73.835431 -1926.9566 -445.67476 14.745989 1.0162159 102 0.0039751059 0.00075470749 0 0 34 68 + 18000 72.634985 -3997.552 -447.2351 14.506243 1.0162159 102 0.0037395847 0.00071063946 0 0 34 68 + 19000 96.776472 -714.44132 -453.65552 19.904587 1.0461046 105 0.0036487876 0.00066993446 0 0 35 70 + 20000 75.470786 183.16972 -464.04688 15.522521 1.0461046 105 0.0034630763 0.00063350614 0 0 35 70 + 21000 65.658309 -773.41266 -466.27068 13.504331 1.0461046 105 0.003289113 0.00060198052 0 0 35 70 +Loop time of 84.4085 on 4 procs for 20000 steps with 105 atoms + +Performance: 20.472 ns/day, 1.172 hours/ns, 236.943 timesteps/s 98.8% CPU use with 4 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 6.7882 | 10.264 | 14.758 | 93.2 | 10.93 -Bond | 0.028286 | 0.034218 | 0.039038 | 2.5 | 0.04 -Kspace | 0.57255 | 5.2227 | 8.8493 | 133.8 | 5.56 -Neigh | 0.3635 | 0.36915 | 0.37473 | 0.9 | 0.39 -Comm | 2.9961 | 3.2542 | 3.509 | 11.4 | 3.47 -Output | 0.0011675 | 0.0012342 | 0.001375 | 0.2 | 0.00 -Modify | 74.428 | 74.499 | 74.571 | 0.7 | 79.35 -Other | | 0.2411 | | | 0.26 +Pair | 6.3571 | 9.7574 | 13.984 | 90.7 | 11.56 +Bond | 0.026374 | 0.031321 | 0.035482 | 2.1 | 0.04 +Kspace | 0.57402 | 4.7894 | 8.1754 | 129.0 | 5.67 +Neigh | 0.34952 | 0.34987 | 0.35021 | 0.1 | 0.41 +Comm | 2.4028 | 2.4228 | 2.4372 | 0.9 | 2.87 +Output | 0.0012269 | 0.0012826 | 0.0014355 | 0.2 | 0.00 +Modify | 66.819 | 66.828 | 66.837 | 0.1 | 79.17 +Other | | 0.2281 | | | 0.27 Nlocal: 26.25 ave 31 max 22 min Histogram: 1 0 1 0 0 0 1 0 0 1 @@ -278,4 +290,4 @@ Ave special neighs/atom = 2 Neighbor list builds = 20428 Dangerous builds = 0 -Total wall time: 0:01:34 +Total wall time: 0:01:24 diff --git a/examples/gcmc/log.6Jul17.gcmc.lj.g++.1 b/examples/gcmc/log.23Oct17.gcmc.lj.g++.1 similarity index 76% rename from examples/gcmc/log.6Jul17.gcmc.lj.g++.1 rename to examples/gcmc/log.23Oct17.gcmc.lj.g++.1 index 69fc2ede1cd28681f7b139e8bb2b1a4976d6df89..a38dfeee77897b740bfecfaedd046676a6ad56b3 100644 --- a/examples/gcmc/log.6Jul17.gcmc.lj.g++.1 +++ b/examples/gcmc/log.23Oct17.gcmc.lj.g++.1 @@ -1,4 +1,4 @@ -LAMMPS (6 Jul 2017) +LAMMPS (23 Oct 2017) using 1 OpenMP thread(s) per MPI task # GCMC for LJ simple fluid, no dynamics # T = 2.0 @@ -43,6 +43,13 @@ fix mygcmc all gcmc 1 100 100 1 29494 2.0 ${mu} ${disp} fix mygcmc all gcmc 1 100 100 1 29494 2.0 -1.25 ${disp} fix mygcmc all gcmc 1 100 100 1 29494 2.0 -1.25 1.0 +# atom count + +variable type1 atom "type==1" +group type1 dynamic all var type1 +dynamic group type1 defined +variable n1 equal count(type1) + # averaging variable rho equal density @@ -54,10 +61,11 @@ variable muex equal -1.25-${temp}*ln(density*${lambda}+${nugget}) variable muex equal -1.25-2.0*ln(density*${lambda}+${nugget}) variable muex equal -1.25-2.0*ln(density*1+${nugget}) variable muex equal -1.25-2.0*ln(density*1+1e-08) -fix ave all ave/time 10 100 1000 v_rho v_p v_muex ave one file rho_vs_p.dat +fix ave all ave/time 10 100 1000 v_rho v_p v_muex v_n1 ave one file rho_vs_p.dat variable rhoav equal f_ave[1] variable pav equal f_ave[2] variable muexav equal f_ave[3] +variable n1av equal f_ave[4] # output @@ -68,7 +76,7 @@ variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+1e-08) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+${nugget}) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+1e-08) compute_modify thermo_temp dynamic yes -thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_rhoav v_pav v_muexav +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_rhoav v_pav v_muexav v_n1av thermo 1000 # run @@ -87,32 +95,32 @@ Neighbor list info ... stencil: half/bin/3d/newton bin: standard Per MPI rank memory allocation (min/avg/max) = 0.433 | 0.433 | 0.433 Mbytes -Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_rhoav v_pav v_muexav - 0 0 0 0 -0 0 0 0 0 0 0 0 0 - 1000 2.4038954 2.1735585 -2.7041368 3.5476844 0.496 62 0.064790036 0.06313096 0.1081294 0.54304 1.4513524 -0.025479219 - 2000 2.0461168 1.1913842 -2.9880181 3.0212194 0.512 64 0.067416408 0.066335853 0.11306166 0.52736 1.3274665 0.034690004 - 3000 1.7930436 1.3788681 -3.2212667 2.6505861 0.552 69 0.067733191 0.066877836 0.1133516 0.5344 1.3834744 0.0070582537 - 4000 1.981449 1.2541054 -2.8222868 2.9217977 0.472 59 0.068546991 0.067856412 0.11442807 0.52504 1.3815629 0.043309657 - 5000 2.0946818 1.0701629 -3.5213291 3.0977688 0.568 71 0.06813743 0.067567891 0.11342906 0.53824 1.4049567 -0.0054539777 - 6000 1.9793484 0.68224187 -3.410211 2.9247088 0.536 67 0.067797628 0.067420108 0.11295333 0.5384 1.401683 -0.0066894359 - 7000 2.1885798 1.6745012 -3.185499 3.2345922 0.544 68 0.068630201 0.068261832 0.11403705 0.5244 1.449239 0.045987399 - 8000 2.2175324 1.5897263 -3.078898 3.2759002 0.528 66 0.068180395 0.067899629 0.11332691 0.53928 1.5488388 -0.01075766 - 9000 1.8610779 1.0396231 -2.923262 2.7465908 0.496 62 0.068346453 0.068028117 0.1134132 0.52912 1.4352871 0.027082544 - 10000 2.1079271 1.1746643 -2.9112062 3.1091925 0.48 60 0.068352878 0.068054948 0.11335434 0.5316 1.4462327 0.018503094 -Loop time of 20.6892 on 1 procs for 10000 steps with 60 atoms - -Performance: 208804.611 tau/day, 483.344 timesteps/s -99.4% CPU use with 1 MPI tasks x 1 OpenMP threads +Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_rhoav v_pav v_muexav v_n1av + 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 + 1000 2.4038954 2.1735585 -2.7041368 3.5476844 0.496 62 0.064790036 0.06313096 0.1081294 0.54304 1.4513524 -0.025479219 64.98 + 2000 2.0461168 1.1913842 -2.9880181 3.0212194 0.512 64 0.067416408 0.066335853 0.11306166 0.52736 1.3274665 0.034690004 62.97 + 3000 1.7930436 1.3788681 -3.2212667 2.6505861 0.552 69 0.067733191 0.066877836 0.1133516 0.5344 1.3834744 0.0070582537 63.5 + 4000 1.981449 1.2541054 -2.8222868 2.9217977 0.472 59 0.068546991 0.067856412 0.11442807 0.52504 1.3815629 0.043309657 62.17 + 5000 2.0946818 1.0701629 -3.5213291 3.0977688 0.568 71 0.06813743 0.067567891 0.11342906 0.53824 1.4049567 -0.0054539777 64.15 + 6000 1.9793484 0.68224187 -3.410211 2.9247088 0.536 67 0.067797628 0.067420108 0.11295333 0.5384 1.401683 -0.0066894359 64.37 + 7000 2.1885798 1.6745012 -3.185499 3.2345922 0.544 68 0.068630201 0.068261832 0.11403705 0.5244 1.449239 0.045987399 62.33 + 8000 2.2175324 1.5897263 -3.078898 3.2759002 0.528 66 0.068180395 0.067899629 0.11332691 0.53928 1.5488388 -0.01075766 64.29 + 9000 1.8610779 1.0396231 -2.923262 2.7465908 0.496 62 0.068346453 0.068028117 0.1134132 0.52912 1.4352871 0.027082544 62.87 + 10000 2.1079271 1.1746643 -2.9112062 3.1091925 0.48 60 0.068352878 0.068054948 0.11335434 0.5316 1.4462327 0.018503094 63.2 +Loop time of 20.4081 on 1 procs for 10000 steps with 60 atoms + +Performance: 211680.375 tau/day, 490.001 timesteps/s +98.9% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.47227 | 0.47227 | 0.47227 | 0.0 | 2.28 -Neigh | 1.1729 | 1.1729 | 1.1729 | 0.0 | 5.67 -Comm | 0.17133 | 0.17133 | 0.17133 | 0.0 | 0.83 -Output | 0.00028253 | 0.00028253 | 0.00028253 | 0.0 | 0.00 -Modify | 18.852 | 18.852 | 18.852 | 0.0 | 91.12 -Other | | 0.02063 | | | 0.10 +Pair | 0.46484 | 0.46484 | 0.46484 | 0.0 | 2.28 +Neigh | 1.1447 | 1.1447 | 1.1447 | 0.0 | 5.61 +Comm | 0.1696 | 0.1696 | 0.1696 | 0.0 | 0.83 +Output | 0.000319 | 0.000319 | 0.000319 | 0.0 | 0.00 +Modify | 18.607 | 18.607 | 18.607 | 0.0 | 91.17 +Other | | 0.02194 | | | 0.11 Nlocal: 60 ave 60 max 60 min Histogram: 1 0 0 0 0 0 0 0 0 0 diff --git a/examples/gcmc/log.6Jul17.gcmc.lj.g++.4 b/examples/gcmc/log.23Oct17.gcmc.lj.g++.4 similarity index 76% rename from examples/gcmc/log.6Jul17.gcmc.lj.g++.4 rename to examples/gcmc/log.23Oct17.gcmc.lj.g++.4 index 6bd3b3189cfeda01ff29feefff5d861c8d32b888..ea7dc8116febce3c356ca8782599656cf6250986 100644 --- a/examples/gcmc/log.6Jul17.gcmc.lj.g++.4 +++ b/examples/gcmc/log.23Oct17.gcmc.lj.g++.4 @@ -1,4 +1,4 @@ -LAMMPS (6 Jul 2017) +LAMMPS (23 Oct 2017) using 1 OpenMP thread(s) per MPI task # GCMC for LJ simple fluid, no dynamics # T = 2.0 @@ -43,6 +43,13 @@ fix mygcmc all gcmc 1 100 100 1 29494 2.0 ${mu} ${disp} fix mygcmc all gcmc 1 100 100 1 29494 2.0 -1.25 ${disp} fix mygcmc all gcmc 1 100 100 1 29494 2.0 -1.25 1.0 +# atom count + +variable type1 atom "type==1" +group type1 dynamic all var type1 +dynamic group type1 defined +variable n1 equal count(type1) + # averaging variable rho equal density @@ -54,10 +61,11 @@ variable muex equal -1.25-${temp}*ln(density*${lambda}+${nugget}) variable muex equal -1.25-2.0*ln(density*${lambda}+${nugget}) variable muex equal -1.25-2.0*ln(density*1+${nugget}) variable muex equal -1.25-2.0*ln(density*1+1e-08) -fix ave all ave/time 10 100 1000 v_rho v_p v_muex ave one file rho_vs_p.dat +fix ave all ave/time 10 100 1000 v_rho v_p v_muex v_n1 ave one file rho_vs_p.dat variable rhoav equal f_ave[1] variable pav equal f_ave[2] variable muexav equal f_ave[3] +variable n1av equal f_ave[4] # output @@ -68,7 +76,7 @@ variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+1e-08) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+${nugget}) variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+1e-08) compute_modify thermo_temp dynamic yes -thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_rhoav v_pav v_muexav +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_rhoav v_pav v_muexav v_n1av thermo 1000 # run @@ -87,32 +95,32 @@ Neighbor list info ... stencil: half/bin/3d/newton bin: standard Per MPI rank memory allocation (min/avg/max) = 0.4477 | 0.4477 | 0.4477 Mbytes -Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_rhoav v_pav v_muexav - 0 0 0 0 -0 0 0 0 0 0 0 0 0 - 1000 1.956397 1.7699101 -2.7889468 2.8864874 0.488 61 0.068894746 0.067229075 0.1141726 0.53288 1.3832798 0.013392866 - 2000 2.040943 0.56060899 -2.8001647 3.0077055 0.456 57 0.069858594 0.068831934 0.11629114 0.5232 1.3587174 0.049995794 - 3000 2.0004866 1.5736515 -3.3098044 2.9572411 0.552 69 0.069594029 0.068727791 0.11592543 0.53096 1.4129434 0.020022578 - 4000 2.1127942 2.642809 -2.8865084 3.1211733 0.528 66 0.070268697 0.069533235 0.11693806 0.52424 1.3444615 0.046884078 - 5000 2.3663648 1.354269 -3.1917346 3.4957662 0.528 66 0.070519633 0.069960064 0.11710321 0.52688 1.3595814 0.036270867 - 6000 1.9224136 0.82756699 -3.1965 2.839257 0.52 65 0.06985018 0.069474645 0.11628632 0.536 1.47062 0.00141549 - 7000 2.0266192 1.5593811 -2.9972341 2.9931606 0.52 65 0.070244693 0.069880791 0.11666541 0.52528 1.3246332 0.040754793 - 8000 1.7790467 1.8680568 -2.8028819 2.6275151 0.52 65 0.070454494 0.070172368 0.11736806 0.524 1.4213649 0.047985191 - 9000 1.7968847 1.3195587 -3.261001 2.6550983 0.536 67 0.069952011 0.069618327 0.11650087 0.53904 1.4624201 -0.01069837 - 10000 2.1566109 1.1015729 -3.4999837 3.1880335 0.552 69 0.069603309 0.069284134 0.11625548 0.53128 1.3587249 0.02075238 -Loop time of 24.9916 on 4 procs for 10000 steps with 69 atoms - -Performance: 172857.936 tau/day, 400.134 timesteps/s -98.2% CPU use with 4 MPI tasks x 1 OpenMP threads +Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_rhoav v_pav v_muexav v_n1av + 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 + 1000 1.956397 1.7699101 -2.7889468 2.8864874 0.488 61 0.068894746 0.067229075 0.1141726 0.53288 1.3832798 0.013392866 63.44 + 2000 2.040943 0.56060899 -2.8001647 3.0077055 0.456 57 0.069858594 0.068831934 0.11629114 0.5232 1.3587174 0.049995794 62.19 + 3000 2.0004866 1.5736515 -3.3098044 2.9572411 0.552 69 0.069594029 0.068727791 0.11592543 0.53096 1.4129434 0.020022578 63.23 + 4000 2.1127942 2.642809 -2.8865084 3.1211733 0.528 66 0.070268697 0.069533235 0.11693806 0.52424 1.3444615 0.046884078 62.57 + 5000 2.3663648 1.354269 -3.1917346 3.4957662 0.528 66 0.070519633 0.069960064 0.11710321 0.52688 1.3595814 0.036270867 62.56 + 6000 1.9224136 0.82756699 -3.1965 2.839257 0.52 65 0.06985018 0.069474645 0.11628632 0.536 1.47062 0.00141549 63.76 + 7000 2.0266192 1.5593811 -2.9972341 2.9931606 0.52 65 0.070244693 0.069880791 0.11666541 0.52528 1.3246332 0.040754793 62.2 + 8000 1.7790467 1.8680568 -2.8028819 2.6275151 0.52 65 0.070454494 0.070172368 0.11736806 0.524 1.4213649 0.047985191 62.03 + 9000 1.7968847 1.3195587 -3.261001 2.6550983 0.536 67 0.069952011 0.069618327 0.11650087 0.53904 1.4624201 -0.01069837 64.36 + 10000 2.1566109 1.1015729 -3.4999837 3.1880335 0.552 69 0.069603309 0.069284134 0.11625548 0.53128 1.3587249 0.02075238 63.24 +Loop time of 23.8213 on 4 procs for 10000 steps with 69 atoms + +Performance: 181350.388 tau/day, 419.793 timesteps/s +97.6% CPU use with 4 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.11696 | 0.12516 | 0.1321 | 1.7 | 0.50 -Neigh | 0.34874 | 0.35644 | 0.36545 | 1.2 | 1.43 -Comm | 0.48531 | 0.51366 | 0.54755 | 3.8 | 2.06 -Output | 0.0005362 | 0.00069767 | 0.00076008 | 0.0 | 0.00 -Modify | 23.956 | 23.972 | 23.988 | 0.3 | 95.92 -Other | | 0.02376 | | | 0.10 +Pair | 0.10935 | 0.11844 | 0.12741 | 2.1 | 0.50 +Neigh | 0.33 | 0.33945 | 0.35091 | 1.6 | 1.42 +Comm | 0.49249 | 0.51745 | 0.53856 | 2.7 | 2.17 +Output | 0.00053334 | 0.0007208 | 0.0007906 | 0.0 | 0.00 +Modify | 22.82 | 22.822 | 22.825 | 0.0 | 95.81 +Other | | 0.02289 | | | 0.10 Nlocal: 17.25 ave 23 max 10 min Histogram: 1 0 0 0 0 0 2 0 0 1 @@ -125,4 +133,4 @@ Total # of neighbors = 2823 Ave neighs/atom = 40.913 Neighbor list builds = 10000 Dangerous builds = 0 -Total wall time: 0:00:24 +Total wall time: 0:00:23 diff --git a/lib/atc/Matrix.h b/lib/atc/Matrix.h index e467128fbc7733af1c7aa551e6a718059194bc59..e806ebd0168c1bbc924a42a8d3ee8d229d4d90a9 100644 --- a/lib/atc/Matrix.h +++ b/lib/atc/Matrix.h @@ -356,7 +356,7 @@ DenseMatrix<T> Matrix<T>::pow(double n) const int sz=this->size(); for(INDEX i=0; i<sz; i++) { double val = R[i]; - R[i] = pow(val,n); + R[i] = std::pow(val,n); } return R; } diff --git a/lib/gpu/lal_lj_expand_ext.cpp b/lib/gpu/lal_lj_expand_ext.cpp index 94a57192b9fcfaf27db05517b7724dc25d4a4e31..a9c791803b11f3ff5343f10cfb433f0dd1d9924e 100644 --- a/lib/gpu/lal_lj_expand_ext.cpp +++ b/lib/gpu/lal_lj_expand_ext.cpp @@ -92,7 +92,7 @@ int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, // --------------------------------------------------------------------------- // Copy updated coeffs from host to device // --------------------------------------------------------------------------- -int lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, +void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, double **shift) { int world_me=LJEMF.device->world_me(); diff --git a/lib/latte/Makefile.lammps.linalg b/lib/latte/Makefile.lammps.linalg new file mode 100644 index 0000000000000000000000000000000000000000..af062a87e36a1158bd4630203be8b685d629f0cd --- /dev/null +++ b/lib/latte/Makefile.lammps.linalg @@ -0,0 +1,7 @@ +# Settings that the LAMMPS build will import when this package library is used + +# GNU Fortran settings for use with bundled linalg lib + +latte_SYSINC = +latte_SYSLIB = ../../lib/latte/filelink.o -llatte -llinalg -lgfortran +latte_SYSPATH = -L../../lib/linalg -fopenmp diff --git a/lib/latte/Makefile.lammps.mpi b/lib/latte/Makefile.lammps.mpi index 6017d01535c3cc0ae3ebe7a35ce427aee2c01078..28dd3a614cd88eb6c28a794fef01c032f1e9696d 120000 --- a/lib/latte/Makefile.lammps.mpi +++ b/lib/latte/Makefile.lammps.mpi @@ -1 +1 @@ -Makefile.lammps.gfortran \ No newline at end of file +Makefile.lammps.linalg \ No newline at end of file diff --git a/lib/latte/Makefile.lammps.serial b/lib/latte/Makefile.lammps.serial index 6017d01535c3cc0ae3ebe7a35ce427aee2c01078..28dd3a614cd88eb6c28a794fef01c032f1e9696d 120000 --- a/lib/latte/Makefile.lammps.serial +++ b/lib/latte/Makefile.lammps.serial @@ -1 +1 @@ -Makefile.lammps.gfortran \ No newline at end of file +Makefile.lammps.linalg \ No newline at end of file diff --git a/lib/linalg/dcabs1.f b/lib/linalg/dcabs1.f new file mode 100644 index 0000000000000000000000000000000000000000..f6debb9ac261ffd2987feec1ef8bad9b2ec964bf --- /dev/null +++ b/lib/linalg/dcabs1.f @@ -0,0 +1,58 @@ +*> \brief \b DCABS1 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DCABS1(Z) +* +* .. Scalar Arguments .. +* COMPLEX*16 Z +* .. +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DCABS1 computes absolute value of a double complex number +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_blas_level1 +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DCABS1(Z) +* +* -- Reference BLAS level1 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX*16 Z +* .. +* .. +* ===================================================================== +* +* .. Intrinsic Functions .. + INTRINSIC ABS,DBLE,DIMAG +* + DCABS1 = ABS(DBLE(Z)) + ABS(DIMAG(Z)) + RETURN + END diff --git a/lib/linalg/dgesv.f b/lib/linalg/dgesv.f new file mode 100644 index 0000000000000000000000000000000000000000..8d47f839dce221867a940cdad64ec390f789c755 --- /dev/null +++ b/lib/linalg/dgesv.f @@ -0,0 +1,179 @@ +*> \brief <b> DGESV computes the solution to system of linear equations A * X = B for GE matrices</b> +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGESV + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesv.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesv.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesv.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGESV computes the solution to a real system of linear equations +*> A * X = B, +*> where A is an N-by-N matrix and X and B are N-by-NRHS matrices. +*> +*> The LU decomposition with partial pivoting and row interchanges is +*> used to factor A as +*> A = P * L * U, +*> where P is a permutation matrix, L is unit lower triangular, and U is +*> upper triangular. The factored form of A is then used to solve the +*> system of equations A * X = B. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of linear equations, i.e., the order of the +*> matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the N-by-N coefficient matrix A. +*> On exit, the factors L and U from the factorization +*> A = P*L*U; the unit diagonal elements of L are not stored. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices that define the permutation matrix P; +*> row i of the matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the N-by-NRHS matrix of right hand side matrix B. +*> On exit, if INFO = 0, the N-by-NRHS solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, U(i,i) is exactly zero. The factorization +*> has been completed, but the factor U is exactly +*> singular, so the solution could not be computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleGEsolve +* +* ===================================================================== + SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. External Subroutines .. + EXTERNAL DGETRF, DGETRS, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + IF( N.LT.0 ) THEN + INFO = -1 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -7 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGESV ', -INFO ) + RETURN + END IF +* +* Compute the LU factorization of A. +* + CALL DGETRF( N, N, A, LDA, IPIV, INFO ) + IF( INFO.EQ.0 ) THEN +* +* Solve the system A*X = B, overwriting B with X. +* + CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB, + $ INFO ) + END IF + RETURN +* +* End of DGESV +* + END diff --git a/lib/linalg/dgetrs.f b/lib/linalg/dgetrs.f new file mode 100644 index 0000000000000000000000000000000000000000..02e9832af79bbb45e570db2fe226a5324ea64d39 --- /dev/null +++ b/lib/linalg/dgetrs.f @@ -0,0 +1,225 @@ +*> \brief \b DGETRS +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DGETRS + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgetrs.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgetrs.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgetrs.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TRANS +* INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. +* INTEGER IPIV( * ) +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DGETRS solves a system of linear equations +*> A * X = B or A**T * X = B +*> with a general N-by-N matrix A using the LU factorization computed +*> by DGETRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> Specifies the form of the system of equations: +*> = 'N': A * X = B (No transpose) +*> = 'T': A**T* X = B (Transpose) +*> = 'C': A**T* X = B (Conjugate transpose = Transpose) +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in] NRHS +*> \verbatim +*> NRHS is INTEGER +*> The number of right hand sides, i.e., the number of columns +*> of the matrix B. NRHS >= 0. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> The factors L and U from the factorization A = P*L*U +*> as computed by DGETRF. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] IPIV +*> \verbatim +*> IPIV is INTEGER array, dimension (N) +*> The pivot indices from DGETRF; for 1<=i<=N, row i of the +*> matrix was interchanged with row IPIV(i). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB,NRHS) +*> On entry, the right hand side matrix B. +*> On exit, the solution matrix X. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleGEcomputational +* +* ===================================================================== + SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER TRANS + INTEGER INFO, LDA, LDB, N, NRHS +* .. +* .. Array Arguments .. + INTEGER IPIV( * ) + DOUBLE PRECISION A( LDA, * ), B( LDB, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL NOTRAN +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL DLASWP, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 + NOTRAN = LSAME( TRANS, 'N' ) + IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT. + $ LSAME( TRANS, 'C' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( NRHS.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DGETRS', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. NRHS.EQ.0 ) + $ RETURN +* + IF( NOTRAN ) THEN +* +* Solve A * X = B. +* +* Apply row interchanges to the right hand sides. +* + CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, 1 ) +* +* Solve L*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve U*X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N, + $ NRHS, ONE, A, LDA, B, LDB ) + ELSE +* +* Solve A**T * X = B. +* +* Solve U**T *X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS, + $ ONE, A, LDA, B, LDB ) +* +* Solve L**T *X = B, overwriting B with X. +* + CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE, + $ A, LDA, B, LDB ) +* +* Apply row interchanges to the solution vectors. +* + CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, -1 ) + END IF +* + RETURN +* +* End of DGETRS +* + END diff --git a/lib/linalg/dladiv.f b/lib/linalg/dladiv.f new file mode 100644 index 0000000000000000000000000000000000000000..306a6b0020e39a5dd94b0045cc85ba15b33a678e --- /dev/null +++ b/lib/linalg/dladiv.f @@ -0,0 +1,128 @@ +*> \brief \b DLADIV performs complex division in real arithmetic, avoiding unnecessary overflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLADIV + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dladiv.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dladiv.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dladiv.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DLADIV( A, B, C, D, P, Q ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION A, B, C, D, P, Q +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLADIV performs complex division in real arithmetic +*> +*> a + i*b +*> p + i*q = --------- +*> c + i*d +*> +*> The algorithm is due to Robert L. Smith and can be found +*> in D. Knuth, The art of Computer Programming, Vol.2, p.195 +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] A +*> \verbatim +*> A is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] D +*> \verbatim +*> D is DOUBLE PRECISION +*> The scalars a, b, c, and d in the above expression. +*> \endverbatim +*> +*> \param[out] P +*> \verbatim +*> P is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[out] Q +*> \verbatim +*> Q is DOUBLE PRECISION +*> The scalars p and q in the above expression. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + SUBROUTINE DLADIV( A, B, C, D, P, Q ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + DOUBLE PRECISION A, B, C, D, P, Q +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION E, F +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* + IF( ABS( D ).LT.ABS( C ) ) THEN + E = D / C + F = C + D*E + P = ( A+B*E ) / F + Q = ( B-A*E ) / F + ELSE + E = C / D + F = D + C*E + P = ( B+A*E ) / F + Q = ( -A+B*E ) / F + END IF +* + RETURN +* +* End of DLADIV +* + END diff --git a/lib/linalg/dlapy3.f b/lib/linalg/dlapy3.f new file mode 100644 index 0000000000000000000000000000000000000000..23feecc4478a3ed002b7bdd5ec5c4c00c98603f6 --- /dev/null +++ b/lib/linalg/dlapy3.f @@ -0,0 +1,111 @@ +*> \brief \b DLAPY3 returns sqrt(x2+y2+z2). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DLAPY3 + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlapy3.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlapy3.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlapy3.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z ) +* +* .. Scalar Arguments .. +* DOUBLE PRECISION X, Y, Z +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause +*> unnecessary overflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] Z +*> \verbatim +*> Z is DOUBLE PRECISION +*> X, Y and Z specify the values x, y and z. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup auxOTHERauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + DOUBLE PRECISION X, Y, Z +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION W, XABS, YABS, ZABS +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SQRT +* .. +* .. Executable Statements .. +* + XABS = ABS( X ) + YABS = ABS( Y ) + ZABS = ABS( Z ) + W = MAX( XABS, YABS, ZABS ) + IF( W.EQ.ZERO ) THEN +* W can be zero for max(0,nan,0) +* adding all three entries together will make sure +* NaN will not disappear. + DLAPY3 = XABS + YABS + ZABS + ELSE + DLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+ + $ ( ZABS / W )**2 ) + END IF + RETURN +* +* End of DLAPY3 +* + END diff --git a/lib/linalg/dorg2l.f b/lib/linalg/dorg2l.f new file mode 100644 index 0000000000000000000000000000000000000000..b95fa50fc52e0b4da682052cd46e400b6ad2bcee --- /dev/null +++ b/lib/linalg/dorg2l.f @@ -0,0 +1,198 @@ +*> \brief \b DORG2L generates all or part of the orthogonal matrix Q from a QL factorization determined by sgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORG2L + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorg2l.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorg2l.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorg2l.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORG2L generates an m by n real matrix Q with orthonormal columns, +*> which is defined as the last n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by DGEQLF in the last k columns of its array +*> argument A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL DLARF, DSCAL, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL DLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL DSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of DORG2L +* + END diff --git a/lib/linalg/dorgql.f b/lib/linalg/dorgql.f new file mode 100644 index 0000000000000000000000000000000000000000..ca4698d799dbb9ef7568a696210891aaece4ad10 --- /dev/null +++ b/lib/linalg/dorgql.f @@ -0,0 +1,296 @@ +*> \brief \b DORGQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGQL + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgql.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgql.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgql.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGQL generates an M-by-N real matrix Q with orthonormal columns, +*> which is defined as the last N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by DGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by DGEQLF in the last k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL DLARFB, DLARFT, DORG2L, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'DORGQL', ' ', M, N, K, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'DORGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'DORGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL DORG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL DLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL DORG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of DORGQL +* + END diff --git a/lib/linalg/dorgtr.f b/lib/linalg/dorgtr.f new file mode 100644 index 0000000000000000000000000000000000000000..06a7b6cc1cdc5bd335ce9e50777a006ebfab3c06 --- /dev/null +++ b/lib/linalg/dorgtr.f @@ -0,0 +1,255 @@ +*> \brief \b DORGTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DORGTR + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dorgtr.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dorgtr.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dorgtr.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DORGTR generates a real orthogonal matrix Q which is defined as the +*> product of n-1 elementary reflectors of order N, as returned by +*> DSYTRD: +*> +*> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from DSYTRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from DSYTRD. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by DSYTRD. +*> On exit, the N-by-N orthogonal matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is DOUBLE PRECISION array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by DSYTRD. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N-1). +*> For optimum performance LWORK >= (N-1)*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleOTHERcomputational +* +* ===================================================================== + SUBROUTINE DORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DORGQL, DORGQR, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + NB = ILAENV( 1, 'DORGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'DORGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DORGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to DSYTRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL DORGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to DSYTRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL DORGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of DORGTR +* + END diff --git a/lib/linalg/dsyev.f b/lib/linalg/dsyev.f new file mode 100644 index 0000000000000000000000000000000000000000..64b39ed84783e6eaa8df5220437c433ad3153afe --- /dev/null +++ b/lib/linalg/dsyev.f @@ -0,0 +1,286 @@ +*> \brief <b> DSYEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for SY matrices</b> +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYEV + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsyev.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsyev.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsyev.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYEV computes all eigenvalues and, optionally, eigenvectors of a +*> real symmetric matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,3*N-1). +*> For optimal efficiency, LWORK >= (NB+2)*N, +*> where NB is the blocksize for DSYTRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleSYeigen +* +* ===================================================================== + SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWKOPT, NB + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, DLANSY + EXTERNAL LSAME, ILAENV, DLAMCH, DLANSY +* .. +* .. External Subroutines .. + EXTERNAL DLASCL, DORGTR, DSCAL, DSTEQR, DSTERF, DSYTRD, + $ XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+2 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 2 + IF( WANTZ ) + $ A( 1, 1 ) = ONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call DSYTRD to reduce symmetric matrix to tridiagonal form. +* + INDE = 1 + INDTAU = INDE + N + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL DSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* DORGTR to generate the orthogonal matrix, then call DSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, WORK( INDE ), INFO ) + ELSE + CALL DORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + CALL DSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ), + $ INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of DSYEV +* + END diff --git a/lib/linalg/dsygv.f b/lib/linalg/dsygv.f new file mode 100644 index 0000000000000000000000000000000000000000..e55631851869147a167a53aeeda1f72247794f1a --- /dev/null +++ b/lib/linalg/dsygv.f @@ -0,0 +1,314 @@ +*> \brief \b DSYGST +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download DSYGV + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsygv.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsygv.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsygv.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, +* LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DSYGV computes all the eigenvalues, and optionally, the eigenvectors +*> of a real generalized symmetric-definite eigenproblem, of the form +*> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. +*> Here A and B are assumed to be symmetric and B is also +*> positive definite. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] ITYPE +*> \verbatim +*> ITYPE is INTEGER +*> Specifies the problem type to be solved: +*> = 1: A*x = (lambda)*B*x +*> = 2: A*B*x = (lambda)*x +*> = 3: B*A*x = (lambda)*x +*> \endverbatim +*> +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangles of A and B are stored; +*> = 'L': Lower triangles of A and B are stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrices A and B. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is DOUBLE PRECISION array, dimension (LDA, N) +*> On entry, the symmetric matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> matrix Z of eigenvectors. The eigenvectors are normalized +*> as follows: +*> if ITYPE = 1 or 2, Z**T*B*Z = I; +*> if ITYPE = 3, Z**T*inv(B)*Z = I. +*> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') +*> or the lower triangle (if UPLO='L') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[in,out] B +*> \verbatim +*> B is DOUBLE PRECISION array, dimension (LDB, N) +*> On entry, the symmetric positive definite matrix B. +*> If UPLO = 'U', the leading N-by-N upper triangular part of B +*> contains the upper triangular part of the matrix B. +*> If UPLO = 'L', the leading N-by-N lower triangular part of B +*> contains the lower triangular part of the matrix B. +*> +*> On exit, if INFO <= N, the part of B containing the matrix is +*> overwritten by the triangular factor U or L from the Cholesky +*> factorization B = U**T*U or B = L*L**T. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> The leading dimension of the array B. LDB >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,3*N-1). +*> For optimal efficiency, LWORK >= (NB+2)*N, +*> where NB is the blocksize for DSYTRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: DPOTRF or DSYEV returned an error code: +*> <= N: if INFO = i, DSYEV failed to converge; +*> i off-diagonal elements of an intermediate +*> tridiagonal form did not converge to zero; +*> > N: if INFO = N + i, for 1 <= i <= N, then the leading +*> minor of order i of B is not positive definite. +*> The factorization of B could not be completed and +*> no eigenvalues or eigenvectors were computed. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup doubleSYeigen +* +* ===================================================================== + SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK, + $ LWORK, INFO ) +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, ITYPE, LDA, LDB, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION A( LDA, * ), B( LDB, * ), W( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER, WANTZ + CHARACTER TRANS + INTEGER LWKMIN, LWKOPT, NB, NEIG +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL DPOTRF, DSYEV, DSYGST, DTRMM, DTRSM, XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN + INFO = -1 + ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -2 + ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN + INFO = -3 + ELSE IF( N.LT.0 ) THEN + INFO = -4 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -6 + ELSE IF( LDB.LT.MAX( 1, N ) ) THEN + INFO = -8 + END IF +* + IF( INFO.EQ.0 ) THEN + LWKMIN = MAX( 1, 3*N - 1 ) + NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( LWKMIN, ( NB + 2 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN + INFO = -11 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'DSYGV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* +* Form a Cholesky factorization of B. +* + CALL DPOTRF( UPLO, N, B, LDB, INFO ) + IF( INFO.NE.0 ) THEN + INFO = N + INFO + RETURN + END IF +* +* Transform problem to standard eigenvalue problem and solve. +* + CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO ) + CALL DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO ) +* + IF( WANTZ ) THEN +* +* Backtransform eigenvectors to the original problem. +* + NEIG = N + IF( INFO.GT.0 ) + $ NEIG = INFO - 1 + IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN +* +* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; +* backtransform eigenvectors: x = inv(L)**T*y or inv(U)*y +* + IF( UPPER ) THEN + TRANS = 'N' + ELSE + TRANS = 'T' + END IF +* + CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* For B*A*x=(lambda)*x; +* backtransform eigenvectors: x = L*y or U**T*y +* + IF( UPPER ) THEN + TRANS = 'T' + ELSE + TRANS = 'N' + END IF +* + CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE, + $ B, LDB, A, LDA ) + END IF + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of DSYGV +* + END diff --git a/lib/linalg/dznrm2.f b/lib/linalg/dznrm2.f new file mode 100644 index 0000000000000000000000000000000000000000..b5713a2bfaf0b92dd3e27e8a007eb91130c2195a --- /dev/null +++ b/lib/linalg/dznrm2.f @@ -0,0 +1,119 @@ +*> \brief \b DZNRM2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION DZNRM2(N,X,INCX) +* +* .. Scalar Arguments .. +* INTEGER INCX,N +* .. +* .. Array Arguments .. +* COMPLEX*16 X(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> DZNRM2 returns the euclidean norm of a vector via the function +*> name, so that +*> +*> DZNRM2 := sqrt( x**H*x ) +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup double_blas_level1 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> -- This version written on 25-October-1982. +*> Modified on 14-October-1993 to inline the call to ZLASSQ. +*> Sven Hammarling, Nag Ltd. +*> \endverbatim +*> +* ===================================================================== + DOUBLE PRECISION FUNCTION DZNRM2(N,X,INCX) +* +* -- Reference BLAS level1 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX,N +* .. +* .. Array Arguments .. + COMPLEX*16 X(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE,ZERO + PARAMETER (ONE=1.0D+0,ZERO=0.0D+0) +* .. +* .. Local Scalars .. + DOUBLE PRECISION NORM,SCALE,SSQ,TEMP + INTEGER IX +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS,DBLE,DIMAG,SQRT +* .. + IF (N.LT.1 .OR. INCX.LT.1) THEN + NORM = ZERO + ELSE + SCALE = ZERO + SSQ = ONE +* The following loop is equivalent to this call to the LAPACK +* auxiliary routine: +* CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) +* + DO 10 IX = 1,1 + (N-1)*INCX,INCX + IF (DBLE(X(IX)).NE.ZERO) THEN + TEMP = ABS(DBLE(X(IX))) + IF (SCALE.LT.TEMP) THEN + SSQ = ONE + SSQ* (SCALE/TEMP)**2 + SCALE = TEMP + ELSE + SSQ = SSQ + (TEMP/SCALE)**2 + END IF + END IF + IF (DIMAG(X(IX)).NE.ZERO) THEN + TEMP = ABS(DIMAG(X(IX))) + IF (SCALE.LT.TEMP) THEN + SSQ = ONE + SSQ* (SCALE/TEMP)**2 + SCALE = TEMP + ELSE + SSQ = SSQ + (TEMP/SCALE)**2 + END IF + END IF + 10 CONTINUE + NORM = SCALE*SQRT(SSQ) + END IF +* + DZNRM2 = NORM + RETURN +* +* End of DZNRM2. +* + END diff --git a/lib/linalg/ilazlc.f b/lib/linalg/ilazlc.f new file mode 100644 index 0000000000000000000000000000000000000000..718b277dfa6596e95fcbe37972f5af9d1fb79fb3 --- /dev/null +++ b/lib/linalg/ilazlc.f @@ -0,0 +1,118 @@ +*> \brief \b ILAZLC scans a matrix for its last non-zero column. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAZLC + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ilazlc.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ilazlc.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ilazlc.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAZLC( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILAZLC scans A for its last non-zero column. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILAZLC( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = (0.0D+0, 0.0D+0) ) +* .. +* .. Local Scalars .. + INTEGER I +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( N.EQ.0 ) THEN + ILAZLC = N + ELSE IF( A(1, N).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILAZLC = N + ELSE +* Now scan each column from the end, returning with the first non-zero. + DO ILAZLC = N, 1, -1 + DO I = 1, M + IF( A(I, ILAZLC).NE.ZERO ) RETURN + END DO + END DO + END IF + RETURN + END diff --git a/lib/linalg/ilazlr.f b/lib/linalg/ilazlr.f new file mode 100644 index 0000000000000000000000000000000000000000..44697214c75b0358a1568e2f46aa3d2b449f7b5c --- /dev/null +++ b/lib/linalg/ilazlr.f @@ -0,0 +1,121 @@ +*> \brief \b ILAZLR scans a matrix for its last non-zero row. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ILAZLR + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ilazlr.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ilazlr.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ilazlr.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* INTEGER FUNCTION ILAZLR( M, N, A, LDA ) +* +* .. Scalar Arguments .. +* INTEGER M, N, LDA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ILAZLR scans A for its last non-zero row. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The m by n matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + INTEGER FUNCTION ILAZLR( M, N, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER M, N, LDA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = (0.0D+0, 0.0D+0) ) +* .. +* .. Local Scalars .. + INTEGER I, J +* .. +* .. Executable Statements .. +* +* Quick test for the common case where one corner is non-zero. + IF( M.EQ.0 ) THEN + ILAZLR = M + ELSE IF( A(M, 1).NE.ZERO .OR. A(M, N).NE.ZERO ) THEN + ILAZLR = M + ELSE +* Scan up each column tracking the last zero row seen. + ILAZLR = 0 + DO J = 1, N + I=M + DO WHILE((A(MAX(I,1),J).EQ.ZERO).AND.(I.GE.1)) + I=I-1 + ENDDO + ILAZLR = MAX( ILAZLR, I ) + END DO + END IF + RETURN + END diff --git a/lib/linalg/zaxpy.f b/lib/linalg/zaxpy.f new file mode 100644 index 0000000000000000000000000000000000000000..e6f5e1f6dbfe289ad666ffb6652387be9a808666 --- /dev/null +++ b/lib/linalg/zaxpy.f @@ -0,0 +1,102 @@ +*> \brief \b ZAXPY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZAXPY(N,ZA,ZX,INCX,ZY,INCY) +* +* .. Scalar Arguments .. +* COMPLEX*16 ZA +* INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. +* COMPLEX*16 ZX(*),ZY(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZAXPY constant times a vector plus a vector. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level1 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> jack dongarra, 3/11/78. +*> modified 12/3/93, array(1) declarations changed to array(*) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZAXPY(N,ZA,ZX,INCX,ZY,INCY) +* +* -- Reference BLAS level1 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX*16 ZA + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + COMPLEX*16 ZX(*),ZY(*) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I,IX,IY +* .. +* .. External Functions .. + DOUBLE PRECISION DCABS1 + EXTERNAL DCABS1 +* .. + IF (N.LE.0) RETURN + IF (DCABS1(ZA).EQ.0.0d0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN +* +* code for both increments equal to 1 +* + DO I = 1,N + ZY(I) = ZY(I) + ZA*ZX(I) + END DO + ELSE +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO I = 1,N + ZY(IY) = ZY(IY) + ZA*ZX(IX) + IX = IX + INCX + IY = IY + INCY + END DO + END IF +* + RETURN + END diff --git a/lib/linalg/zcopy.f b/lib/linalg/zcopy.f new file mode 100644 index 0000000000000000000000000000000000000000..baeafd5c3b211b62e3dd415508e861579461fcc9 --- /dev/null +++ b/lib/linalg/zcopy.f @@ -0,0 +1,94 @@ +*> \brief \b ZCOPY +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZCOPY(N,ZX,INCX,ZY,INCY) +* +* .. Scalar Arguments .. +* INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. +* COMPLEX*16 ZX(*),ZY(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZCOPY copies a vector, x, to a vector, y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level1 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> jack dongarra, linpack, 4/11/78. +*> modified 12/3/93, array(1) declarations changed to array(*) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZCOPY(N,ZX,INCX,ZY,INCY) +* +* -- Reference BLAS level1 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + COMPLEX*16 ZX(*),ZY(*) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I,IX,IY +* .. + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN +* +* code for both increments equal to 1 +* + DO I = 1,N + ZY(I) = ZX(I) + END DO + ELSE +* +* code for unequal increments or equal increments +* not equal to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO I = 1,N + ZY(IY) = ZX(IX) + IX = IX + INCX + IY = IY + INCY + END DO + END IF + RETURN + END diff --git a/lib/linalg/zgemm.f b/lib/linalg/zgemm.f new file mode 100644 index 0000000000000000000000000000000000000000..f423315508a0de2fc5ce314db47bab240da9d7fa --- /dev/null +++ b/lib/linalg/zgemm.f @@ -0,0 +1,489 @@ +*> \brief \b ZGEMM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* +* .. Scalar Arguments .. +* COMPLEX*16 ALPHA,BETA +* INTEGER K,LDA,LDB,LDC,M,N +* CHARACTER TRANSA,TRANSB +* .. +* .. Array Arguments .. +* COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEMM performs one of the matrix-matrix operations +*> +*> C := alpha*op( A )*op( B ) + beta*C, +*> +*> where op( X ) is one of +*> +*> op( X ) = X or op( X ) = X**T or op( X ) = X**H, +*> +*> alpha and beta are scalars, and A, B and C are matrices, with op( A ) +*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANSA +*> \verbatim +*> TRANSA is CHARACTER*1 +*> On entry, TRANSA specifies the form of op( A ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSA = 'N' or 'n', op( A ) = A. +*> +*> TRANSA = 'T' or 't', op( A ) = A**T. +*> +*> TRANSA = 'C' or 'c', op( A ) = A**H. +*> \endverbatim +*> +*> \param[in] TRANSB +*> \verbatim +*> TRANSB is CHARACTER*1 +*> On entry, TRANSB specifies the form of op( B ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSB = 'N' or 'n', op( B ) = B. +*> +*> TRANSB = 'T' or 't', op( B ) = B**T. +*> +*> TRANSB = 'C' or 'c', op( B ) = B**H. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix +*> op( A ) and of the matrix C. M must be at least zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix +*> op( B ) and the number of columns of the matrix C. N must be +*> at least zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry, K specifies the number of columns of the matrix +*> op( A ) and the number of rows of the matrix op( B ). K must +*> be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +*> k when TRANSA = 'N' or 'n', and is m otherwise. +*> Before entry with TRANSA = 'N' or 'n', the leading m by k +*> part of the array A must contain the matrix A, otherwise +*> the leading k by m part of the array A must contain the +*> matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANSA = 'N' or 'n' then +*> LDA must be at least max( 1, m ), otherwise LDA must be at +*> least max( 1, k ). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +*> n when TRANSB = 'N' or 'n', and is k otherwise. +*> Before entry with TRANSB = 'N' or 'n', the leading k by n +*> part of the array B must contain the matrix B, otherwise +*> the leading n by k part of the array B must contain the +*> matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. When TRANSB = 'N' or 'n' then +*> LDB must be at least max( 1, k ), otherwise LDB must be at +*> least max( 1, n ). +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX*16 +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then C need not be set on input. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array of DIMENSION ( LDC, n ). +*> Before entry, the leading m by n part of the array C must +*> contain the matrix C, except when beta is zero, in which +*> case C need not be set on entry. +*> On exit, the array C is overwritten by the m by n matrix +*> ( alpha*op( A )*op( B ) + beta*C ). +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the first dimension of C as declared +*> in the calling (sub) program. LDC must be at least +*> max( 1, m ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level3 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 3 Blas routine. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* +* -- Reference BLAS level3 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX*16 ALPHA,BETA + INTEGER K,LDA,LDB,LDC,M,N + CHARACTER TRANSA,TRANSB +* .. +* .. Array Arguments .. + COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG,MAX +* .. +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB + LOGICAL CONJA,CONJB,NOTA,NOTB +* .. +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER (ONE= (1.0D+0,0.0D+0)) + COMPLEX*16 ZERO + PARAMETER (ZERO= (0.0D+0,0.0D+0)) +* .. +* +* Set NOTA and NOTB as true if A and B respectively are not +* conjugated or transposed, set CONJA and CONJB as true if A and +* B respectively are to be transposed but not conjugated and set +* NROWA, NCOLA and NROWB as the number of rows and columns of A +* and the number of rows of B respectively. +* + NOTA = LSAME(TRANSA,'N') + NOTB = LSAME(TRANSB,'N') + CONJA = LSAME(TRANSA,'C') + CONJB = LSAME(TRANSB,'C') + IF (NOTA) THEN + NROWA = M + NCOLA = K + ELSE + NROWA = K + NCOLA = M + END IF + IF (NOTB) THEN + NROWB = K + ELSE + NROWB = N + END IF +* +* Test the input parameters. +* + INFO = 0 + IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND. + + (.NOT.LSAME(TRANSA,'T'))) THEN + INFO = 1 + ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND. + + (.NOT.LSAME(TRANSB,'T'))) THEN + INFO = 2 + ELSE IF (M.LT.0) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (K.LT.0) THEN + INFO = 5 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 8 + ELSE IF (LDB.LT.MAX(1,NROWB)) THEN + INFO = 10 + ELSE IF (LDC.LT.MAX(1,M)) THEN + INFO = 13 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('ZGEMM ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + + (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (BETA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,M + C(I,J) = BETA*C(I,J) + 30 CONTINUE + 40 CONTINUE + END IF + RETURN + END IF +* +* Start the operations. +* + IF (NOTB) THEN + IF (NOTA) THEN +* +* Form C := alpha*A*B + beta*C. +* + DO 90 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 50 I = 1,M + C(I,J) = ZERO + 50 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 60 I = 1,M + C(I,J) = BETA*C(I,J) + 60 CONTINUE + END IF + DO 80 L = 1,K + IF (B(L,J).NE.ZERO) THEN + TEMP = ALPHA*B(L,J) + DO 70 I = 1,M + C(I,J) = C(I,J) + TEMP*A(I,L) + 70 CONTINUE + END IF + 80 CONTINUE + 90 CONTINUE + ELSE IF (CONJA) THEN +* +* Form C := alpha*A**H*B + beta*C. +* + DO 120 J = 1,N + DO 110 I = 1,M + TEMP = ZERO + DO 100 L = 1,K + TEMP = TEMP + DCONJG(A(L,I))*B(L,J) + 100 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 110 CONTINUE + 120 CONTINUE + ELSE +* +* Form C := alpha*A**T*B + beta*C +* + DO 150 J = 1,N + DO 140 I = 1,M + TEMP = ZERO + DO 130 L = 1,K + TEMP = TEMP + A(L,I)*B(L,J) + 130 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 140 CONTINUE + 150 CONTINUE + END IF + ELSE IF (NOTA) THEN + IF (CONJB) THEN +* +* Form C := alpha*A*B**H + beta*C. +* + DO 200 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 160 I = 1,M + C(I,J) = ZERO + 160 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 170 I = 1,M + C(I,J) = BETA*C(I,J) + 170 CONTINUE + END IF + DO 190 L = 1,K + IF (B(J,L).NE.ZERO) THEN + TEMP = ALPHA*DCONJG(B(J,L)) + DO 180 I = 1,M + C(I,J) = C(I,J) + TEMP*A(I,L) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE +* +* Form C := alpha*A*B**T + beta*C +* + DO 250 J = 1,N + IF (BETA.EQ.ZERO) THEN + DO 210 I = 1,M + C(I,J) = ZERO + 210 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 220 I = 1,M + C(I,J) = BETA*C(I,J) + 220 CONTINUE + END IF + DO 240 L = 1,K + IF (B(J,L).NE.ZERO) THEN + TEMP = ALPHA*B(J,L) + DO 230 I = 1,M + C(I,J) = C(I,J) + TEMP*A(I,L) + 230 CONTINUE + END IF + 240 CONTINUE + 250 CONTINUE + END IF + ELSE IF (CONJA) THEN + IF (CONJB) THEN +* +* Form C := alpha*A**H*B**H + beta*C. +* + DO 280 J = 1,N + DO 270 I = 1,M + TEMP = ZERO + DO 260 L = 1,K + TEMP = TEMP + DCONJG(A(L,I))*DCONJG(B(J,L)) + 260 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 270 CONTINUE + 280 CONTINUE + ELSE +* +* Form C := alpha*A**H*B**T + beta*C +* + DO 310 J = 1,N + DO 300 I = 1,M + TEMP = ZERO + DO 290 L = 1,K + TEMP = TEMP + DCONJG(A(L,I))*B(J,L) + 290 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 300 CONTINUE + 310 CONTINUE + END IF + ELSE + IF (CONJB) THEN +* +* Form C := alpha*A**T*B**H + beta*C +* + DO 340 J = 1,N + DO 330 I = 1,M + TEMP = ZERO + DO 320 L = 1,K + TEMP = TEMP + A(L,I)*DCONJG(B(J,L)) + 320 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 330 CONTINUE + 340 CONTINUE + ELSE +* +* Form C := alpha*A**T*B**T + beta*C +* + DO 370 J = 1,N + DO 360 I = 1,M + TEMP = ZERO + DO 350 L = 1,K + TEMP = TEMP + A(L,I)*B(J,L) + 350 CONTINUE + IF (BETA.EQ.ZERO) THEN + C(I,J) = ALPHA*TEMP + ELSE + C(I,J) = ALPHA*TEMP + BETA*C(I,J) + END IF + 360 CONTINUE + 370 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGEMM . +* + END diff --git a/lib/linalg/zgemv.f b/lib/linalg/zgemv.f new file mode 100644 index 0000000000000000000000000000000000000000..4e174c956c9ee8cac85b1d5d765e92f838c4d777 --- /dev/null +++ b/lib/linalg/zgemv.f @@ -0,0 +1,354 @@ +*> \brief \b ZGEMV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) +* +* .. Scalar Arguments .. +* COMPLEX*16 ALPHA,BETA +* INTEGER INCX,INCY,LDA,M,N +* CHARACTER TRANS +* .. +* .. Array Arguments .. +* COMPLEX*16 A(LDA,*),X(*),Y(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGEMV performs one of the matrix-vector operations +*> +*> y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y, or +*> +*> y := alpha*A**H*x + beta*y, +*> +*> where alpha and beta are scalars, x and y are vectors and A is an +*> m by n matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> TRANS = 'N' or 'n' y := alpha*A*x + beta*y. +*> +*> TRANS = 'T' or 't' y := alpha*A**T*x + beta*y. +*> +*> TRANS = 'C' or 'c' y := alpha*A**H*x + beta*y. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix A. +*> M must be at least zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, m ). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array of DIMENSION at least +*> ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. +*> Before entry, the incremented array X must contain the +*> vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX*16 +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array of DIMENSION at least +*> ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' +*> and at least +*> ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. +*> Before entry with BETA non-zero, the incremented array Y +*> must contain the vector y. On exit, Y is overwritten by the +*> updated vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level2 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> The vector and matrix arguments are not referenced when N = 0, or M = 0 +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) +* +* -- Reference BLAS level2 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX*16 ALPHA,BETA + INTEGER INCX,INCY,LDA,M,N + CHARACTER TRANS +* .. +* .. Array Arguments .. + COMPLEX*16 A(LDA,*),X(*),Y(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER (ONE= (1.0D+0,0.0D+0)) + COMPLEX*16 ZERO + PARAMETER (ZERO= (0.0D+0,0.0D+0)) +* .. +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY + LOGICAL NOCONJ +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG,MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 1 + ELSE IF (M.LT.0) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (LDA.LT.MAX(1,M)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + ELSE IF (INCY.EQ.0) THEN + INFO = 11 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('ZGEMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. + + ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN +* + NOCONJ = LSAME(TRANS,'T') +* +* Set LENX and LENY, the lengths of the vectors x and y, and set +* up the start points in X and Y. +* + IF (LSAME(TRANS,'N')) THEN + LENX = N + LENY = M + ELSE + LENX = M + LENY = N + END IF + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (LENX-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (LENY-1)*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* +* First form y := beta*y. +* + IF (BETA.NE.ONE) THEN + IF (INCY.EQ.1) THEN + IF (BETA.EQ.ZERO) THEN + DO 10 I = 1,LENY + Y(I) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1,LENY + Y(I) = BETA*Y(I) + 20 CONTINUE + END IF + ELSE + IY = KY + IF (BETA.EQ.ZERO) THEN + DO 30 I = 1,LENY + Y(IY) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1,LENY + Y(IY) = BETA*Y(IY) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF (ALPHA.EQ.ZERO) RETURN + IF (LSAME(TRANS,'N')) THEN +* +* Form y := alpha*A*x + y. +* + JX = KX + IF (INCY.EQ.1) THEN + DO 60 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*X(JX) + DO 50 I = 1,M + Y(I) = Y(I) + TEMP*A(I,J) + 50 CONTINUE + END IF + JX = JX + INCX + 60 CONTINUE + ELSE + DO 80 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = ALPHA*X(JX) + IY = KY + DO 70 I = 1,M + Y(IY) = Y(IY) + TEMP*A(I,J) + IY = IY + INCY + 70 CONTINUE + END IF + JX = JX + INCX + 80 CONTINUE + END IF + ELSE +* +* Form y := alpha*A**T*x + y or y := alpha*A**H*x + y. +* + JY = KY + IF (INCX.EQ.1) THEN + DO 110 J = 1,N + TEMP = ZERO + IF (NOCONJ) THEN + DO 90 I = 1,M + TEMP = TEMP + A(I,J)*X(I) + 90 CONTINUE + ELSE + DO 100 I = 1,M + TEMP = TEMP + DCONJG(A(I,J))*X(I) + 100 CONTINUE + END IF + Y(JY) = Y(JY) + ALPHA*TEMP + JY = JY + INCY + 110 CONTINUE + ELSE + DO 140 J = 1,N + TEMP = ZERO + IX = KX + IF (NOCONJ) THEN + DO 120 I = 1,M + TEMP = TEMP + A(I,J)*X(IX) + IX = IX + INCX + 120 CONTINUE + ELSE + DO 130 I = 1,M + TEMP = TEMP + DCONJG(A(I,J))*X(IX) + IX = IX + INCX + 130 CONTINUE + END IF + Y(JY) = Y(JY) + ALPHA*TEMP + JY = JY + INCY + 140 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZGEMV . +* + END diff --git a/lib/linalg/zgerc.f b/lib/linalg/zgerc.f new file mode 100644 index 0000000000000000000000000000000000000000..accfeafc053ad42c844281de2739d62148d1a602 --- /dev/null +++ b/lib/linalg/zgerc.f @@ -0,0 +1,227 @@ +*> \brief \b ZGERC +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZGERC(M,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* +* .. Scalar Arguments .. +* COMPLEX*16 ALPHA +* INTEGER INCX,INCY,LDA,M,N +* .. +* .. Array Arguments .. +* COMPLEX*16 A(LDA,*),X(*),Y(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZGERC performs the rank 1 operation +*> +*> A := alpha*x*y**H + A, +*> +*> where alpha is a scalar, x is an m element vector, y is an n element +*> vector and A is an m by n matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of the matrix A. +*> M must be at least zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of the matrix A. +*> N must be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array of dimension at least +*> ( 1 + ( m - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the m +*> element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is COMPLEX*16 array of dimension at least +*> ( 1 + ( n - 1 )*abs( INCY ) ). +*> Before entry, the incremented array Y must contain the n +*> element vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> Before entry, the leading m by n part of the array A must +*> contain the matrix of coefficients. On exit, A is +*> overwritten by the updated matrix. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, m ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level2 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZGERC(M,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* +* -- Reference BLAS level2 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER INCX,INCY,LDA,M,N +* .. +* .. Array Arguments .. + COMPLEX*16 A(LDA,*),X(*),Y(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER (ZERO= (0.0D+0,0.0D+0)) +* .. +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I,INFO,IX,J,JY,KX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG,MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (M.LT.0) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (INCY.EQ.0) THEN + INFO = 7 + ELSE IF (LDA.LT.MAX(1,M)) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('ZGERC ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (INCY.GT.0) THEN + JY = 1 + ELSE + JY = 1 - (N-1)*INCY + END IF + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (Y(JY).NE.ZERO) THEN + TEMP = ALPHA*DCONJG(Y(JY)) + DO 10 I = 1,M + A(I,J) = A(I,J) + X(I)*TEMP + 10 CONTINUE + END IF + JY = JY + INCY + 20 CONTINUE + ELSE + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (M-1)*INCX + END IF + DO 40 J = 1,N + IF (Y(JY).NE.ZERO) THEN + TEMP = ALPHA*DCONJG(Y(JY)) + IX = KX + DO 30 I = 1,M + A(I,J) = A(I,J) + X(IX)*TEMP + IX = IX + INCX + 30 CONTINUE + END IF + JY = JY + INCY + 40 CONTINUE + END IF +* + RETURN +* +* End of ZGERC . +* + END diff --git a/lib/linalg/zheev.f b/lib/linalg/zheev.f new file mode 100644 index 0000000000000000000000000000000000000000..adba990f0a9d396198cf99711d38364e90f4e514 --- /dev/null +++ b/lib/linalg/zheev.f @@ -0,0 +1,298 @@ +*> \brief <b> ZHEEV computes the eigenvalues and, optionally, the left and/or right eigenvectors for HE matrices</b> +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHEEV + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zheev.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zheev.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zheev.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, +* INFO ) +* +* .. Scalar Arguments .. +* CHARACTER JOBZ, UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION RWORK( * ), W( * ) +* COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEEV computes all eigenvalues and, optionally, eigenvectors of a +*> complex Hermitian matrix A. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] JOBZ +*> \verbatim +*> JOBZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only; +*> = 'V': Compute eigenvalues and eigenvectors. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA, N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the +*> leading N-by-N upper triangular part of A contains the +*> upper triangular part of the matrix A. If UPLO = 'L', +*> the leading N-by-N lower triangular part of A contains +*> the lower triangular part of the matrix A. +*> On exit, if JOBZ = 'V', then if INFO = 0, A contains the +*> orthonormal eigenvectors of the matrix A. +*> If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') +*> or the upper triangle (if UPLO='U') of A, including the +*> diagonal, is destroyed. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is DOUBLE PRECISION array, dimension (N) +*> If INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The length of the array WORK. LWORK >= max(1,2*N-1). +*> For optimal efficiency, LWORK >= (NB+1)*N, +*> where NB is the blocksize for ZHETRD returned by ILAENV. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] RWORK +*> \verbatim +*> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2)) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: if INFO = i, the algorithm failed to converge; i +*> off-diagonal elements of an intermediate tridiagonal +*> form did not converge to zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16HEeigen +* +* ===================================================================== + SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, + $ INFO ) +* +* -- LAPACK driver routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER JOBZ, UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION RWORK( * ), W( * ) + COMPLEX*16 A( LDA, * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D0, 0.0D0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LOWER, LQUERY, WANTZ + INTEGER IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE, + $ LLWORK, LWKOPT, NB + DOUBLE PRECISION ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, + $ SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + DOUBLE PRECISION DLAMCH, ZLANHE + EXTERNAL LSAME, ILAENV, DLAMCH, ZLANHE +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, DSTERF, XERBLA, ZHETRD, ZLASCL, ZSTEQR, + $ ZUNGTR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + WANTZ = LSAME( JOBZ, 'V' ) + LOWER = LSAME( UPLO, 'L' ) + LQUERY = ( LWORK.EQ.-1 ) +* + INFO = 0 + IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN + INFO = -1 + ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN + INFO = -2 + ELSE IF( N.LT.0 ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = MAX( 1, ( NB+1 )*N ) + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) + $ INFO = -8 + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHEEV ', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + RETURN + END IF +* + IF( N.EQ.1 ) THEN + W( 1 ) = A( 1, 1 ) + WORK( 1 ) = 1 + IF( WANTZ ) + $ A( 1, 1 ) = CONE + RETURN + END IF +* +* Get machine constants. +* + SAFMIN = DLAMCH( 'Safe minimum' ) + EPS = DLAMCH( 'Precision' ) + SMLNUM = SAFMIN / EPS + BIGNUM = ONE / SMLNUM + RMIN = SQRT( SMLNUM ) + RMAX = SQRT( BIGNUM ) +* +* Scale matrix to allowable range, if necessary. +* + ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK ) + ISCALE = 0 + IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN + ISCALE = 1 + SIGMA = RMIN / ANRM + ELSE IF( ANRM.GT.RMAX ) THEN + ISCALE = 1 + SIGMA = RMAX / ANRM + END IF + IF( ISCALE.EQ.1 ) + $ CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO ) +* +* Call ZHETRD to reduce Hermitian matrix to tridiagonal form. +* + INDE = 1 + INDTAU = 1 + INDWRK = INDTAU + N + LLWORK = LWORK - INDWRK + 1 + CALL ZHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ), + $ WORK( INDWRK ), LLWORK, IINFO ) +* +* For eigenvalues only, call DSTERF. For eigenvectors, first call +* ZUNGTR to generate the unitary matrix, then call ZSTEQR. +* + IF( .NOT.WANTZ ) THEN + CALL DSTERF( N, W, RWORK( INDE ), INFO ) + ELSE + CALL ZUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ), + $ LLWORK, IINFO ) + INDWRK = INDE + N + CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA, + $ RWORK( INDWRK ), INFO ) + END IF +* +* If matrix was scaled, then rescale eigenvalues appropriately. +* + IF( ISCALE.EQ.1 ) THEN + IF( INFO.EQ.0 ) THEN + IMAX = N + ELSE + IMAX = INFO - 1 + END IF + CALL DSCAL( IMAX, ONE / SIGMA, W, 1 ) + END IF +* +* Set WORK(1) to optimal complex workspace size. +* + WORK( 1 ) = LWKOPT +* + RETURN +* +* End of ZHEEV +* + END diff --git a/lib/linalg/zhemv.f b/lib/linalg/zhemv.f new file mode 100644 index 0000000000000000000000000000000000000000..34216fbfff8a12b8d4c18cbdb2a7aa70a2d275e3 --- /dev/null +++ b/lib/linalg/zhemv.f @@ -0,0 +1,337 @@ +*> \brief \b ZHEMV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZHEMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) +* +* .. Scalar Arguments .. +* COMPLEX*16 ALPHA,BETA +* INTEGER INCX,INCY,LDA,N +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A(LDA,*),X(*),Y(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHEMV performs the matrix-vector operation +*> +*> y := alpha*A*x + beta*y, +*> +*> where alpha and beta are scalars, x and y are n element vectors and +*> A is an n by n hermitian matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of A +*> is to be referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> Before entry with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array A must contain the upper +*> triangular part of the hermitian matrix and the strictly +*> lower triangular part of A is not referenced. +*> Before entry with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array A must contain the lower +*> triangular part of the hermitian matrix and the strictly +*> upper triangular part of A is not referenced. +*> Note that the imaginary parts of the diagonal elements need +*> not be set and are assumed to be zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array of dimension at least +*> ( 1 + ( n - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the n +*> element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX*16 +*> On entry, BETA specifies the scalar beta. When BETA is +*> supplied as zero then Y need not be set on input. +*> \endverbatim +*> +*> \param[in,out] Y +*> \verbatim +*> Y is COMPLEX*16 array of dimension at least +*> ( 1 + ( n - 1 )*abs( INCY ) ). +*> Before entry, the incremented array Y must contain the n +*> element vector y. On exit, Y is overwritten by the updated +*> vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level2 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> The vector and matrix arguments are not referenced when N = 0, or M = 0 +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHEMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) +* +* -- Reference BLAS level2 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX*16 ALPHA,BETA + INTEGER INCX,INCY,LDA,N + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A(LDA,*),X(*),Y(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER (ONE= (1.0D+0,0.0D+0)) + COMPLEX*16 ZERO + PARAMETER (ZERO= (0.0D+0,0.0D+0)) +* .. +* .. Local Scalars .. + COMPLEX*16 TEMP1,TEMP2 + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE,DCONJG,MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 5 + ELSE IF (INCX.EQ.0) THEN + INFO = 7 + ELSE IF (INCY.EQ.0) THEN + INFO = 10 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('ZHEMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN +* +* Set up the start points in X and Y. +* + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (N-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (N-1)*INCY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* +* First form y := beta*y. +* + IF (BETA.NE.ONE) THEN + IF (INCY.EQ.1) THEN + IF (BETA.EQ.ZERO) THEN + DO 10 I = 1,N + Y(I) = ZERO + 10 CONTINUE + ELSE + DO 20 I = 1,N + Y(I) = BETA*Y(I) + 20 CONTINUE + END IF + ELSE + IY = KY + IF (BETA.EQ.ZERO) THEN + DO 30 I = 1,N + Y(IY) = ZERO + IY = IY + INCY + 30 CONTINUE + ELSE + DO 40 I = 1,N + Y(IY) = BETA*Y(IY) + IY = IY + INCY + 40 CONTINUE + END IF + END IF + END IF + IF (ALPHA.EQ.ZERO) RETURN + IF (LSAME(UPLO,'U')) THEN +* +* Form y when A is stored in upper triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 60 J = 1,N + TEMP1 = ALPHA*X(J) + TEMP2 = ZERO + DO 50 I = 1,J - 1 + Y(I) = Y(I) + TEMP1*A(I,J) + TEMP2 = TEMP2 + DCONJG(A(I,J))*X(I) + 50 CONTINUE + Y(J) = Y(J) + TEMP1*DBLE(A(J,J)) + ALPHA*TEMP2 + 60 CONTINUE + ELSE + JX = KX + JY = KY + DO 80 J = 1,N + TEMP1 = ALPHA*X(JX) + TEMP2 = ZERO + IX = KX + IY = KY + DO 70 I = 1,J - 1 + Y(IY) = Y(IY) + TEMP1*A(I,J) + TEMP2 = TEMP2 + DCONJG(A(I,J))*X(IX) + IX = IX + INCX + IY = IY + INCY + 70 CONTINUE + Y(JY) = Y(JY) + TEMP1*DBLE(A(J,J)) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + ELSE +* +* Form y when A is stored in lower triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 100 J = 1,N + TEMP1 = ALPHA*X(J) + TEMP2 = ZERO + Y(J) = Y(J) + TEMP1*DBLE(A(J,J)) + DO 90 I = J + 1,N + Y(I) = Y(I) + TEMP1*A(I,J) + TEMP2 = TEMP2 + DCONJG(A(I,J))*X(I) + 90 CONTINUE + Y(J) = Y(J) + ALPHA*TEMP2 + 100 CONTINUE + ELSE + JX = KX + JY = KY + DO 120 J = 1,N + TEMP1 = ALPHA*X(JX) + TEMP2 = ZERO + Y(JY) = Y(JY) + TEMP1*DBLE(A(J,J)) + IX = JX + IY = JY + DO 110 I = J + 1,N + IX = IX + INCX + IY = IY + INCY + Y(IY) = Y(IY) + TEMP1*A(I,J) + TEMP2 = TEMP2 + DCONJG(A(I,J))*X(IX) + 110 CONTINUE + Y(JY) = Y(JY) + ALPHA*TEMP2 + JX = JX + INCX + JY = JY + INCY + 120 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHEMV . +* + END diff --git a/lib/linalg/zher2.f b/lib/linalg/zher2.f new file mode 100644 index 0000000000000000000000000000000000000000..e2a02c3c68fb3d705aa2c734d4e21fc722908def --- /dev/null +++ b/lib/linalg/zher2.f @@ -0,0 +1,317 @@ +*> \brief \b ZHER2 +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZHER2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* +* .. Scalar Arguments .. +* COMPLEX*16 ALPHA +* INTEGER INCX,INCY,LDA,N +* CHARACTER UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A(LDA,*),X(*),Y(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHER2 performs the hermitian rank 2 operation +*> +*> A := alpha*x*y**H + conjg( alpha )*y*x**H + A, +*> +*> where alpha is a scalar, x and y are n element vectors and A is an n +*> by n hermitian matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array A is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of A +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of A +*> is to be referenced. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array of dimension at least +*> ( 1 + ( n - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the n +*> element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is COMPLEX*16 array of dimension at least +*> ( 1 + ( n - 1 )*abs( INCY ) ). +*> Before entry, the incremented array Y must contain the n +*> element vector y. +*> \endverbatim +*> +*> \param[in] INCY +*> \verbatim +*> INCY is INTEGER +*> On entry, INCY specifies the increment for the elements of +*> Y. INCY must not be zero. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> Before entry with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array A must contain the upper +*> triangular part of the hermitian matrix and the strictly +*> lower triangular part of A is not referenced. On exit, the +*> upper triangular part of the array A is overwritten by the +*> upper triangular part of the updated matrix. +*> Before entry with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array A must contain the lower +*> triangular part of the hermitian matrix and the strictly +*> upper triangular part of A is not referenced. On exit, the +*> lower triangular part of the array A is overwritten by the +*> lower triangular part of the updated matrix. +*> Note that the imaginary parts of the diagonal elements need +*> not be set, they are assumed to be zero, and on exit they +*> are set to zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level2 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHER2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA) +* +* -- Reference BLAS level2 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER INCX,INCY,LDA,N + CHARACTER UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A(LDA,*),X(*),Y(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER (ZERO= (0.0D+0,0.0D+0)) +* .. +* .. Local Scalars .. + COMPLEX*16 TEMP1,TEMP2 + INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE,DCONJG,MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (N.LT.0) THEN + INFO = 2 + ELSE IF (INCX.EQ.0) THEN + INFO = 5 + ELSE IF (INCY.EQ.0) THEN + INFO = 7 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 9 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('ZHER2 ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN +* +* Set up the start points in X and Y if the increments are not both +* unity. +* + IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN + IF (INCX.GT.0) THEN + KX = 1 + ELSE + KX = 1 - (N-1)*INCX + END IF + IF (INCY.GT.0) THEN + KY = 1 + ELSE + KY = 1 - (N-1)*INCY + END IF + JX = KX + JY = KY + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through the triangular part +* of A. +* + IF (LSAME(UPLO,'U')) THEN +* +* Form A when A is stored in the upper triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 20 J = 1,N + IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN + TEMP1 = ALPHA*DCONJG(Y(J)) + TEMP2 = DCONJG(ALPHA*X(J)) + DO 10 I = 1,J - 1 + A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2 + 10 CONTINUE + A(J,J) = DBLE(A(J,J)) + + + DBLE(X(J)*TEMP1+Y(J)*TEMP2) + ELSE + A(J,J) = DBLE(A(J,J)) + END IF + 20 CONTINUE + ELSE + DO 40 J = 1,N + IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN + TEMP1 = ALPHA*DCONJG(Y(JY)) + TEMP2 = DCONJG(ALPHA*X(JX)) + IX = KX + IY = KY + DO 30 I = 1,J - 1 + A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2 + IX = IX + INCX + IY = IY + INCY + 30 CONTINUE + A(J,J) = DBLE(A(J,J)) + + + DBLE(X(JX)*TEMP1+Y(JY)*TEMP2) + ELSE + A(J,J) = DBLE(A(J,J)) + END IF + JX = JX + INCX + JY = JY + INCY + 40 CONTINUE + END IF + ELSE +* +* Form A when A is stored in the lower triangle. +* + IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN + DO 60 J = 1,N + IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN + TEMP1 = ALPHA*DCONJG(Y(J)) + TEMP2 = DCONJG(ALPHA*X(J)) + A(J,J) = DBLE(A(J,J)) + + + DBLE(X(J)*TEMP1+Y(J)*TEMP2) + DO 50 I = J + 1,N + A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2 + 50 CONTINUE + ELSE + A(J,J) = DBLE(A(J,J)) + END IF + 60 CONTINUE + ELSE + DO 80 J = 1,N + IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN + TEMP1 = ALPHA*DCONJG(Y(JY)) + TEMP2 = DCONJG(ALPHA*X(JX)) + A(J,J) = DBLE(A(J,J)) + + + DBLE(X(JX)*TEMP1+Y(JY)*TEMP2) + IX = JX + IY = JY + DO 70 I = J + 1,N + IX = IX + INCX + IY = IY + INCY + A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2 + 70 CONTINUE + ELSE + A(J,J) = DBLE(A(J,J)) + END IF + JX = JX + INCX + JY = JY + INCY + 80 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHER2 . +* + END diff --git a/lib/linalg/zher2k.f b/lib/linalg/zher2k.f new file mode 100644 index 0000000000000000000000000000000000000000..0b91bd2cbbf09f79d782ac7b1b05313ca55c9f7e --- /dev/null +++ b/lib/linalg/zher2k.f @@ -0,0 +1,443 @@ +*> \brief \b ZHER2K +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* +* .. Scalar Arguments .. +* COMPLEX*16 ALPHA +* DOUBLE PRECISION BETA +* INTEGER K,LDA,LDB,LDC,N +* CHARACTER TRANS,UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHER2K performs one of the hermitian rank 2k operations +*> +*> C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C, +*> +*> or +*> +*> C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C, +*> +*> where alpha and beta are scalars with beta real, C is an n by n +*> hermitian matrix and A and B are n by k matrices in the first case +*> and k by n matrices in the second case. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the upper or lower +*> triangular part of the array C is to be referenced as +*> follows: +*> +*> UPLO = 'U' or 'u' Only the upper triangular part of C +*> is to be referenced. +*> +*> UPLO = 'L' or 'l' Only the lower triangular part of C +*> is to be referenced. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> TRANS = 'N' or 'n' C := alpha*A*B**H + +*> conjg( alpha )*B*A**H + +*> beta*C. +*> +*> TRANS = 'C' or 'c' C := alpha*A**H*B + +*> conjg( alpha )*B**H*A + +*> beta*C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix C. N must be +*> at least zero. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> On entry with TRANS = 'N' or 'n', K specifies the number +*> of columns of the matrices A and B, and on entry with +*> TRANS = 'C' or 'c', K specifies the number of rows of the +*> matrices A and B. K must be at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 . +*> On entry, ALPHA specifies the scalar alpha. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is +*> k when TRANS = 'N' or 'n', and is n otherwise. +*> Before entry with TRANS = 'N' or 'n', the leading n by k +*> part of the array A must contain the matrix A, otherwise +*> the leading k by n part of the array A must contain the +*> matrix A. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When TRANS = 'N' or 'n' +*> then LDA must be at least max( 1, n ), otherwise LDA must +*> be at least max( 1, k ). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is +*> k when TRANS = 'N' or 'n', and is n otherwise. +*> Before entry with TRANS = 'N' or 'n', the leading n by k +*> part of the array B must contain the matrix B, otherwise +*> the leading k by n part of the array B must contain the +*> matrix B. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. When TRANS = 'N' or 'n' +*> then LDB must be at least max( 1, n ), otherwise LDB must +*> be at least max( 1, k ). +*> Unchanged on exit. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is DOUBLE PRECISION . +*> On entry, BETA specifies the scalar beta. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array of DIMENSION ( LDC, n ). +*> Before entry with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array C must contain the upper +*> triangular part of the hermitian matrix and the strictly +*> lower triangular part of C is not referenced. On exit, the +*> upper triangular part of the array C is overwritten by the +*> upper triangular part of the updated matrix. +*> Before entry with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array C must contain the lower +*> triangular part of the hermitian matrix and the strictly +*> upper triangular part of C is not referenced. On exit, the +*> lower triangular part of the array C is overwritten by the +*> lower triangular part of the updated matrix. +*> Note that the imaginary parts of the diagonal elements need +*> not be set, they are assumed to be zero, and on exit they +*> are set to zero. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> On entry, LDC specifies the first dimension of C as declared +*> in the calling (sub) program. LDC must be at least +*> max( 1, n ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level3 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 3 Blas routine. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> +*> -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. +*> Ed Anderson, Cray Research Inc. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC) +* +* -- Reference BLAS level3 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + DOUBLE PRECISION BETA + INTEGER K,LDA,LDB,LDC,N + CHARACTER TRANS,UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE,DCONJG,MAX +* .. +* .. Local Scalars .. + COMPLEX*16 TEMP1,TEMP2 + INTEGER I,INFO,J,L,NROWA + LOGICAL UPPER +* .. +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER (ONE=1.0D+0) + COMPLEX*16 ZERO + PARAMETER (ZERO= (0.0D+0,0.0D+0)) +* .. +* +* Test the input parameters. +* + IF (LSAME(TRANS,'N')) THEN + NROWA = N + ELSE + NROWA = K + END IF + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 1 + ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND. + + (.NOT.LSAME(TRANS,'C'))) THEN + INFO = 2 + ELSE IF (N.LT.0) THEN + INFO = 3 + ELSE IF (K.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 7 + ELSE IF (LDB.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDC.LT.MAX(1,N)) THEN + INFO = 12 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('ZHER2K',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR. + + (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + IF (UPPER) THEN + IF (BETA.EQ.DBLE(ZERO)) THEN + DO 20 J = 1,N + DO 10 I = 1,J + C(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + DO 40 J = 1,N + DO 30 I = 1,J - 1 + C(I,J) = BETA*C(I,J) + 30 CONTINUE + C(J,J) = BETA*DBLE(C(J,J)) + 40 CONTINUE + END IF + ELSE + IF (BETA.EQ.DBLE(ZERO)) THEN + DO 60 J = 1,N + DO 50 I = J,N + C(I,J) = ZERO + 50 CONTINUE + 60 CONTINUE + ELSE + DO 80 J = 1,N + C(J,J) = BETA*DBLE(C(J,J)) + DO 70 I = J + 1,N + C(I,J) = BETA*C(I,J) + 70 CONTINUE + 80 CONTINUE + END IF + END IF + RETURN + END IF +* +* Start the operations. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form C := alpha*A*B**H + conjg( alpha )*B*A**H + +* C. +* + IF (UPPER) THEN + DO 130 J = 1,N + IF (BETA.EQ.DBLE(ZERO)) THEN + DO 90 I = 1,J + C(I,J) = ZERO + 90 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 100 I = 1,J - 1 + C(I,J) = BETA*C(I,J) + 100 CONTINUE + C(J,J) = BETA*DBLE(C(J,J)) + ELSE + C(J,J) = DBLE(C(J,J)) + END IF + DO 120 L = 1,K + IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN + TEMP1 = ALPHA*DCONJG(B(J,L)) + TEMP2 = DCONJG(ALPHA*A(J,L)) + DO 110 I = 1,J - 1 + C(I,J) = C(I,J) + A(I,L)*TEMP1 + + + B(I,L)*TEMP2 + 110 CONTINUE + C(J,J) = DBLE(C(J,J)) + + + DBLE(A(J,L)*TEMP1+B(J,L)*TEMP2) + END IF + 120 CONTINUE + 130 CONTINUE + ELSE + DO 180 J = 1,N + IF (BETA.EQ.DBLE(ZERO)) THEN + DO 140 I = J,N + C(I,J) = ZERO + 140 CONTINUE + ELSE IF (BETA.NE.ONE) THEN + DO 150 I = J + 1,N + C(I,J) = BETA*C(I,J) + 150 CONTINUE + C(J,J) = BETA*DBLE(C(J,J)) + ELSE + C(J,J) = DBLE(C(J,J)) + END IF + DO 170 L = 1,K + IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN + TEMP1 = ALPHA*DCONJG(B(J,L)) + TEMP2 = DCONJG(ALPHA*A(J,L)) + DO 160 I = J + 1,N + C(I,J) = C(I,J) + A(I,L)*TEMP1 + + + B(I,L)*TEMP2 + 160 CONTINUE + C(J,J) = DBLE(C(J,J)) + + + DBLE(A(J,L)*TEMP1+B(J,L)*TEMP2) + END IF + 170 CONTINUE + 180 CONTINUE + END IF + ELSE +* +* Form C := alpha*A**H*B + conjg( alpha )*B**H*A + +* C. +* + IF (UPPER) THEN + DO 210 J = 1,N + DO 200 I = 1,J + TEMP1 = ZERO + TEMP2 = ZERO + DO 190 L = 1,K + TEMP1 = TEMP1 + DCONJG(A(L,I))*B(L,J) + TEMP2 = TEMP2 + DCONJG(B(L,I))*A(L,J) + 190 CONTINUE + IF (I.EQ.J) THEN + IF (BETA.EQ.DBLE(ZERO)) THEN + C(J,J) = DBLE(ALPHA*TEMP1+ + + DCONJG(ALPHA)*TEMP2) + ELSE + C(J,J) = BETA*DBLE(C(J,J)) + + + DBLE(ALPHA*TEMP1+ + + DCONJG(ALPHA)*TEMP2) + END IF + ELSE + IF (BETA.EQ.DBLE(ZERO)) THEN + C(I,J) = ALPHA*TEMP1 + DCONJG(ALPHA)*TEMP2 + ELSE + C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 + + + DCONJG(ALPHA)*TEMP2 + END IF + END IF + 200 CONTINUE + 210 CONTINUE + ELSE + DO 240 J = 1,N + DO 230 I = J,N + TEMP1 = ZERO + TEMP2 = ZERO + DO 220 L = 1,K + TEMP1 = TEMP1 + DCONJG(A(L,I))*B(L,J) + TEMP2 = TEMP2 + DCONJG(B(L,I))*A(L,J) + 220 CONTINUE + IF (I.EQ.J) THEN + IF (BETA.EQ.DBLE(ZERO)) THEN + C(J,J) = DBLE(ALPHA*TEMP1+ + + DCONJG(ALPHA)*TEMP2) + ELSE + C(J,J) = BETA*DBLE(C(J,J)) + + + DBLE(ALPHA*TEMP1+ + + DCONJG(ALPHA)*TEMP2) + END IF + ELSE + IF (BETA.EQ.DBLE(ZERO)) THEN + C(I,J) = ALPHA*TEMP1 + DCONJG(ALPHA)*TEMP2 + ELSE + C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 + + + DCONJG(ALPHA)*TEMP2 + END IF + END IF + 230 CONTINUE + 240 CONTINUE + END IF + END IF +* + RETURN +* +* End of ZHER2K. +* + END diff --git a/lib/linalg/zhetd2.f b/lib/linalg/zhetd2.f new file mode 100644 index 0000000000000000000000000000000000000000..dd8f9cf0145642ee5db89462d087c668bb6a6bf7 --- /dev/null +++ b/lib/linalg/zhetd2.f @@ -0,0 +1,334 @@ +*> \brief \b ZHETD2 reduces a Hermitian matrix to real symmetric tridiagonal form by an unitary similarity transformation (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETD2 + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetd2.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetd2.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetd2.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETD2 reduces a complex Hermitian matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO, HALF + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL UPPER + INTEGER I + COMPLEX*16 ALPHA, TAUI +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZAXPY, ZHEMV, ZHER2, ZLARFG +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MAX, MIN +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U') + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETD2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A +* + A( N, N ) = DBLE( A( N, N ) ) + DO 10 I = N - 1, 1, -1 +* +* Generate elementary reflector H(i) = I - tau * v * v**H +* to annihilate A(1:i-1,i+1) +* + ALPHA = A( I, I+1 ) + CALL ZLARFG( I, ALPHA, A( 1, I+1 ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(1:i,1:i) +* + A( I, I+1 ) = ONE +* +* Compute x := tau * A * v storing x in TAU(1:i) +* + CALL ZHEMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO, + $ TAU, 1 ) +* +* Compute w := x - 1/2 * tau * (x**H * v) * v +* + ALPHA = -HALF*TAUI*ZDOTC( I, TAU, 1, A( 1, I+1 ), 1 ) + CALL ZAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**H - w * v**H +* + CALL ZHER2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A, + $ LDA ) +* + ELSE + A( I, I ) = DBLE( A( I, I ) ) + END IF + A( I, I+1 ) = E( I ) + D( I+1 ) = A( I+1, I+1 ) + TAU( I ) = TAUI + 10 CONTINUE + D( 1 ) = A( 1, 1 ) + ELSE +* +* Reduce the lower triangle of A +* + A( 1, 1 ) = DBLE( A( 1, 1 ) ) + DO 20 I = 1, N - 1 +* +* Generate elementary reflector H(i) = I - tau * v * v**H +* to annihilate A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAUI ) + E( I ) = ALPHA +* + IF( TAUI.NE.ZERO ) THEN +* +* Apply H(i) from both sides to A(i+1:n,i+1:n) +* + A( I+1, I ) = ONE +* +* Compute x := tau * A * v storing y in TAU(i:n-1) +* + CALL ZHEMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, TAU( I ), 1 ) +* +* Compute w := x - 1/2 * tau * (x**H * v) * v +* + ALPHA = -HALF*TAUI*ZDOTC( N-I, TAU( I ), 1, A( I+1, I ), + $ 1 ) + CALL ZAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 ) +* +* Apply the transformation as a rank-2 update: +* A := A - v * w**H - w * v**H +* + CALL ZHER2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1, + $ A( I+1, I+1 ), LDA ) +* + ELSE + A( I+1, I+1 ) = DBLE( A( I+1, I+1 ) ) + END IF + A( I+1, I ) = E( I ) + D( I ) = A( I, I ) + TAU( I ) = TAUI + 20 CONTINUE + D( N ) = A( N, N ) + END IF +* + RETURN +* +* End of ZHETD2 +* + END diff --git a/lib/linalg/zhetrd.f b/lib/linalg/zhetrd.f new file mode 100644 index 0000000000000000000000000000000000000000..c6074846379f79f0ff837209d81e2d0fc28cecb2 --- /dev/null +++ b/lib/linalg/zhetrd.f @@ -0,0 +1,378 @@ +*> \brief \b ZHETRD +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZHETRD + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZHETRD reduces a complex Hermitian matrix A to real symmetric +*> tridiagonal form T by a unitary similarity transformation: +*> Q**H * A * Q = T. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A is stored; +*> = 'L': Lower triangle of A is stored. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> N-by-N upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading N-by-N lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit, if UPLO = 'U', the diagonal and first superdiagonal +*> of A are overwritten by the corresponding elements of the +*> tridiagonal matrix T, and the elements above the first +*> superdiagonal, with the array TAU, represent the unitary +*> matrix Q as a product of elementary reflectors; if UPLO +*> = 'L', the diagonal and first subdiagonal of A are over- +*> written by the corresponding elements of the tridiagonal +*> matrix T, and the elements below the first subdiagonal, with +*> the array TAU, represent the unitary matrix Q as a product +*> of elementary reflectors. See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> The diagonal elements of the tridiagonal matrix T: +*> D(i) = A(i,i). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> The off-diagonal elements of the tridiagonal matrix T: +*> E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> The scalar factors of the elementary reflectors (see Further +*> Details). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= 1. +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16HEcomputational +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n-1) . . . H(2) H(1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in +*> A(1:i-1,i+1), and tau in TAU(i). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(n-1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), +*> and tau in TAU(i). +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( d e v2 v3 v4 ) ( d ) +*> ( d e v3 v4 ) ( e d ) +*> ( d e v4 ) ( v1 e d ) +*> ( d e ) ( v1 v2 e d ) +*> ( d ) ( v1 v2 v3 e d ) +*> +*> where d and e denote diagonal and off-diagonal elements of T, and vi +*> denotes an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE + PARAMETER ( ONE = 1.0D+0 ) + COMPLEX*16 CONE + PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB, + $ NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZHER2K, ZHETD2, ZLATRD +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + UPPER = LSAME( UPLO, 'U' ) + LQUERY = ( LWORK.EQ.-1 ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN + INFO = -9 + END IF +* + IF( INFO.EQ.0 ) THEN +* +* Determine the block size. +* + NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 ) + LWKOPT = N*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZHETRD', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NX = N + IWS = 1 + IF( NB.GT.1 .AND. NB.LT.N ) THEN +* +* Determine when to cross over from blocked to unblocked code +* (last block is always handled by unblocked code). +* + NX = MAX( NB, ILAENV( 3, 'ZHETRD', UPLO, N, -1, -1, -1 ) ) + IF( NX.LT.N ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: determine the +* minimum value of NB, and reduce NB or force use of +* unblocked code by setting NX = N. +* + NB = MAX( LWORK / LDWORK, 1 ) + NBMIN = ILAENV( 2, 'ZHETRD', UPLO, N, -1, -1, -1 ) + IF( NB.LT.NBMIN ) + $ NX = N + END IF + ELSE + NX = N + END IF + ELSE + NB = 1 + END IF +* + IF( UPPER ) THEN +* +* Reduce the upper triangle of A. +* Columns 1:kk are handled by the unblocked method. +* + KK = N - ( ( N-NX+NB-1 ) / NB )*NB + DO 20 I = N - NB + 1, KK + 1, -NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL ZLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK, + $ LDWORK ) +* +* Update the unreduced submatrix A(1:i-1,1:i-1), using an +* update of the form: A := A - V*W**H - W*V**H +* + CALL ZHER2K( UPLO, 'No transpose', I-1, NB, -CONE, + $ A( 1, I ), LDA, WORK, LDWORK, ONE, A, LDA ) +* +* Copy superdiagonal elements back into A, and diagonal +* elements into D +* + DO 10 J = I, I + NB - 1 + A( J-1, J ) = E( J-1 ) + D( J ) = A( J, J ) + 10 CONTINUE + 20 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL ZHETD2( UPLO, KK, A, LDA, D, E, TAU, IINFO ) + ELSE +* +* Reduce the lower triangle of A +* + DO 40 I = 1, N - NX, NB +* +* Reduce columns i:i+nb-1 to tridiagonal form and form the +* matrix W which is needed to update the unreduced part of +* the matrix +* + CALL ZLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ), + $ TAU( I ), WORK, LDWORK ) +* +* Update the unreduced submatrix A(i+nb:n,i+nb:n), using +* an update of the form: A := A - V*W**H - W*V**H +* + CALL ZHER2K( UPLO, 'No transpose', N-I-NB+1, NB, -CONE, + $ A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE, + $ A( I+NB, I+NB ), LDA ) +* +* Copy subdiagonal elements back into A, and diagonal +* elements into D +* + DO 30 J = I, I + NB - 1 + A( J+1, J ) = E( J ) + D( J ) = A( J, J ) + 30 CONTINUE + 40 CONTINUE +* +* Use unblocked code to reduce the last or only block +* + CALL ZHETD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ), + $ TAU( I ), IINFO ) + END IF +* + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZHETRD +* + END diff --git a/lib/linalg/zlacgv.f b/lib/linalg/zlacgv.f new file mode 100644 index 0000000000000000000000000000000000000000..315c4de5ce103048eeab7d103a20a8978de13005 --- /dev/null +++ b/lib/linalg/zlacgv.f @@ -0,0 +1,116 @@ +*> \brief \b ZLACGV conjugates a complex vector. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLACGV + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlacgv.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlacgv.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlacgv.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLACGV( N, X, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLACGV conjugates a complex vector of length N. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The length of the vector X. N >= 0. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension +*> (1+(N-1)*abs(INCX)) +*> On entry, the vector of length N to be conjugated. +*> On exit, X is overwritten with conjg(X). +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The spacing between successive elements of X. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLACGV( N, X, INCX ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INCX, N +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, IOFF +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* + IF( INCX.EQ.1 ) THEN + DO 10 I = 1, N + X( I ) = DCONJG( X( I ) ) + 10 CONTINUE + ELSE + IOFF = 1 + IF( INCX.LT.0 ) + $ IOFF = 1 - ( N-1 )*INCX + DO 20 I = 1, N + X( IOFF ) = DCONJG( X( IOFF ) ) + IOFF = IOFF + INCX + 20 CONTINUE + END IF + RETURN +* +* End of ZLACGV +* + END diff --git a/lib/linalg/zladiv.f b/lib/linalg/zladiv.f new file mode 100644 index 0000000000000000000000000000000000000000..8f01fe3e63b2296c728d402a7d776f40b2c27539 --- /dev/null +++ b/lib/linalg/zladiv.f @@ -0,0 +1,97 @@ +*> \brief \b ZLADIV performs complex division in real arithmetic, avoiding unnecessary overflow. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLADIV + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zladiv.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zladiv.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zladiv.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* COMPLEX*16 FUNCTION ZLADIV( X, Y ) +* +* .. Scalar Arguments .. +* COMPLEX*16 X, Y +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLADIV := X / Y, where X and Y are complex. The computation of X / Y +*> will not overflow on an intermediary step unless the results +*> overflows. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 +*> \endverbatim +*> +*> \param[in] Y +*> \verbatim +*> Y is COMPLEX*16 +*> The complex scalars X and Y. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + COMPLEX*16 FUNCTION ZLADIV( X, Y ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + COMPLEX*16 X, Y +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + DOUBLE PRECISION ZI, ZR +* .. +* .. External Subroutines .. + EXTERNAL DLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, DCMPLX, DIMAG +* .. +* .. Executable Statements .. +* + CALL DLADIV( DBLE( X ), DIMAG( X ), DBLE( Y ), DIMAG( Y ), ZR, + $ ZI ) + ZLADIV = DCMPLX( ZR, ZI ) +* + RETURN +* +* End of ZLADIV +* + END diff --git a/lib/linalg/zlanhe.f b/lib/linalg/zlanhe.f new file mode 100644 index 0000000000000000000000000000000000000000..3093a151afe516731e2b2ccfb438407b0da3dbce --- /dev/null +++ b/lib/linalg/zlanhe.f @@ -0,0 +1,258 @@ +*> \brief \b ZLANHE returns the value of the 1-norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a complex Hermitian matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLANHE + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlanhe.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlanhe.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlanhe.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* DOUBLE PRECISION FUNCTION ZLANHE( NORM, UPLO, N, A, LDA, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER NORM, UPLO +* INTEGER LDA, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION WORK( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLANHE returns the value of the one norm, or the Frobenius norm, or +*> the infinity norm, or the element of largest absolute value of a +*> complex hermitian matrix A. +*> \endverbatim +*> +*> \return ZLANHE +*> \verbatim +*> +*> ZLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm' +*> ( +*> ( norm1(A), NORM = '1', 'O' or 'o' +*> ( +*> ( normI(A), NORM = 'I' or 'i' +*> ( +*> ( normF(A), NORM = 'F', 'f', 'E' or 'e' +*> +*> where norm1 denotes the one norm of a matrix (maximum column sum), +*> normI denotes the infinity norm of a matrix (maximum row sum) and +*> normF denotes the Frobenius norm of a matrix (square root of sum of +*> squares). Note that max(abs(A(i,j))) is not a consistent matrix norm. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] NORM +*> \verbatim +*> NORM is CHARACTER*1 +*> Specifies the value to be returned in ZLANHE as described +*> above. +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> hermitian matrix A is to be referenced. +*> = 'U': Upper triangular part of A is referenced +*> = 'L': Lower triangular part of A is referenced +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. N >= 0. When N = 0, ZLANHE is +*> set to zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The hermitian matrix A. If UPLO = 'U', the leading n by n +*> upper triangular part of A contains the upper triangular part +*> of the matrix A, and the strictly lower triangular part of A +*> is not referenced. If UPLO = 'L', the leading n by n lower +*> triangular part of A contains the lower triangular part of +*> the matrix A, and the strictly upper triangular part of A is +*> not referenced. Note that the imaginary parts of the diagonal +*> elements need not be set and are assumed to be zero. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(N,1). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)), +*> where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise, +*> WORK is not referenced. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16HEauxiliary +* +* ===================================================================== + DOUBLE PRECISION FUNCTION ZLANHE( NORM, UPLO, N, A, LDA, WORK ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER NORM, UPLO + INTEGER LDA, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION WORK( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, J + DOUBLE PRECISION ABSA, SCALE, SUM, VALUE +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + EXTERNAL LSAME, DISNAN +* .. +* .. External Subroutines .. + EXTERNAL ZLASSQ +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, SQRT +* .. +* .. Executable Statements .. +* + IF( N.EQ.0 ) THEN + VALUE = ZERO + ELSE IF( LSAME( NORM, 'M' ) ) THEN +* +* Find max(abs(A(i,j))). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 20 J = 1, N + DO 10 I = 1, J - 1 + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 10 CONTINUE + SUM = ABS( DBLE( A( J, J ) ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 20 CONTINUE + ELSE + DO 40 J = 1, N + SUM = ABS( DBLE( A( J, J ) ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + DO 30 I = J + 1, N + SUM = ABS( A( I, J ) ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 30 CONTINUE + 40 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR. + $ ( NORM.EQ.'1' ) ) THEN +* +* Find normI(A) ( = norm1(A), since A is hermitian). +* + VALUE = ZERO + IF( LSAME( UPLO, 'U' ) ) THEN + DO 60 J = 1, N + SUM = ZERO + DO 50 I = 1, J - 1 + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 50 CONTINUE + WORK( J ) = SUM + ABS( DBLE( A( J, J ) ) ) + 60 CONTINUE + DO 70 I = 1, N + SUM = WORK( I ) + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 70 CONTINUE + ELSE + DO 80 I = 1, N + WORK( I ) = ZERO + 80 CONTINUE + DO 100 J = 1, N + SUM = WORK( J ) + ABS( DBLE( A( J, J ) ) ) + DO 90 I = J + 1, N + ABSA = ABS( A( I, J ) ) + SUM = SUM + ABSA + WORK( I ) = WORK( I ) + ABSA + 90 CONTINUE + IF( VALUE .LT. SUM .OR. DISNAN( SUM ) ) VALUE = SUM + 100 CONTINUE + END IF + ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN +* +* Find normF(A). +* + SCALE = ZERO + SUM = ONE + IF( LSAME( UPLO, 'U' ) ) THEN + DO 110 J = 2, N + CALL ZLASSQ( J-1, A( 1, J ), 1, SCALE, SUM ) + 110 CONTINUE + ELSE + DO 120 J = 1, N - 1 + CALL ZLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM ) + 120 CONTINUE + END IF + SUM = 2*SUM + DO 130 I = 1, N + IF( DBLE( A( I, I ) ).NE.ZERO ) THEN + ABSA = ABS( DBLE( A( I, I ) ) ) + IF( SCALE.LT.ABSA ) THEN + SUM = ONE + SUM*( SCALE / ABSA )**2 + SCALE = ABSA + ELSE + SUM = SUM + ( ABSA / SCALE )**2 + END IF + END IF + 130 CONTINUE + VALUE = SCALE*SQRT( SUM ) + END IF +* + ZLANHE = VALUE + RETURN +* +* End of ZLANHE +* + END diff --git a/lib/linalg/zlarf.f b/lib/linalg/zlarf.f new file mode 100644 index 0000000000000000000000000000000000000000..f51e1d73831544937bfb8f5f66c83bbb0edf6a8e --- /dev/null +++ b/lib/linalg/zlarf.f @@ -0,0 +1,232 @@ +*> \brief \b ZLARF applies an elementary reflector to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARF + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarf.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarf.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarf.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* .. Scalar Arguments .. +* CHARACTER SIDE +* INTEGER INCV, LDC, M, N +* COMPLEX*16 TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARF applies a complex elementary reflector H to a complex M-by-N +*> matrix C, from either the left or the right. H is represented in the +*> form +*> +*> H = I - tau * v * v**H +*> +*> where tau is a complex scalar and v is a complex vector. +*> +*> If tau = 0, then H is taken to be the unit matrix. +*> +*> To apply H**H, supply conjg(tau) instead +*> tau. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': form H * C +*> = 'R': form C * H +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (1 + (M-1)*abs(INCV)) if SIDE = 'L' +*> or (1 + (N-1)*abs(INCV)) if SIDE = 'R' +*> The vector v in the representation of H. V is not used if +*> TAU = 0. +*> \endverbatim +*> +*> \param[in] INCV +*> \verbatim +*> INCV is INTEGER +*> The increment between elements of v. INCV <> 0. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau in the representation of H. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by the matrix H * C if SIDE = 'L', +*> or C * H if SIDE = 'R'. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension +*> (N) if SIDE = 'L' +*> or (M) if SIDE = 'R' +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER SIDE + INTEGER INCV, LDC, M, N + COMPLEX*16 TAU +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), V( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL APPLYLEFT + INTEGER I, LASTV, LASTC +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZGERC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAZLR, ILAZLC + EXTERNAL LSAME, ILAZLR, ILAZLC +* .. +* .. Executable Statements .. +* + APPLYLEFT = LSAME( SIDE, 'L' ) + LASTV = 0 + LASTC = 0 + IF( TAU.NE.ZERO ) THEN +* Set up variables for scanning V. LASTV begins pointing to the end +* of V. + IF( APPLYLEFT ) THEN + LASTV = M + ELSE + LASTV = N + END IF + IF( INCV.GT.0 ) THEN + I = 1 + (LASTV-1) * INCV + ELSE + I = 1 + END IF +* Look for the last non-zero row in V. + DO WHILE( LASTV.GT.0 .AND. V( I ).EQ.ZERO ) + LASTV = LASTV - 1 + I = I - INCV + END DO + IF( APPLYLEFT ) THEN +* Scan for the last non-zero column in C(1:lastv,:). + LASTC = ILAZLC(LASTV, N, C, LDC) + ELSE +* Scan for the last non-zero row in C(:,1:lastv). + LASTC = ILAZLR(M, LASTV, C, LDC) + END IF + END IF +* Note that lastc.eq.0 renders the BLAS operations null; no special +* case is needed at this level. + IF( APPLYLEFT ) THEN +* +* Form H * C +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastv,1:lastc)**H * v(1:lastv,1) +* + CALL ZGEMV( 'Conjugate transpose', LASTV, LASTC, ONE, + $ C, LDC, V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)**H +* + CALL ZGERC( LASTV, LASTC, -TAU, V, INCV, WORK, 1, C, LDC ) + END IF + ELSE +* +* Form C * H +* + IF( LASTV.GT.0 ) THEN +* +* w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) +* + CALL ZGEMV( 'No transpose', LASTC, LASTV, ONE, C, LDC, + $ V, INCV, ZERO, WORK, 1 ) +* +* C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)**H +* + CALL ZGERC( LASTC, LASTV, -TAU, WORK, 1, V, INCV, C, LDC ) + END IF + END IF + RETURN +* +* End of ZLARF +* + END diff --git a/lib/linalg/zlarfb.f b/lib/linalg/zlarfb.f new file mode 100644 index 0000000000000000000000000000000000000000..99490f5827ffad2e19c79cf4da285dd6e4f8b681 --- /dev/null +++ b/lib/linalg/zlarfb.f @@ -0,0 +1,769 @@ +*> \brief \b ZLARFB applies a block reflector or its conjugate-transpose to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFB + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarfb.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarfb.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarfb.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, +* T, LDT, C, LDC, WORK, LDWORK ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, SIDE, STOREV, TRANS +* INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), +* $ WORK( LDWORK, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFB applies a complex block reflector H or its transpose H**H to a +*> complex M-by-N matrix C, from either the left or the right. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> = 'L': apply H or H**H from the Left +*> = 'R': apply H or H**H from the Right +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> = 'N': apply H (No transpose) +*> = 'C': apply H**H (Conjugate transpose) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Indicates how H is formed from a product of elementary +*> reflectors +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Indicates how the vectors which define the elementary +*> reflectors are stored: +*> = 'C': Columnwise +*> = 'R': Rowwise +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix C. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix C. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the matrix T (= the number of elementary +*> reflectors whose product defines the block reflector). +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,M) if STOREV = 'R' and SIDE = 'L' +*> (LDV,N) if STOREV = 'R' and SIDE = 'R' +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M); +*> if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N); +*> if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The triangular K-by-K matrix T in the representation of the +*> block reflector. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +*> +*> \param[in,out] C +*> \verbatim +*> C is COMPLEX*16 array, dimension (LDC,N) +*> On entry, the M-by-N matrix C. +*> On exit, C is overwritten by H*C or H**H*C or C*H or C*H**H. +*> \endverbatim +*> +*> \param[in] LDC +*> \verbatim +*> LDC is INTEGER +*> The leading dimension of the array C. LDC >= max(1,M). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (LDWORK,K) +*> \endverbatim +*> +*> \param[in] LDWORK +*> \verbatim +*> LDWORK is INTEGER +*> The leading dimension of the array WORK. +*> If SIDE = 'L', LDWORK >= max(1,N); +*> if SIDE = 'R', LDWORK >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored; the corresponding +*> array elements are modified but restored on exit. The rest of the +*> array is not used. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV, + $ T, LDT, C, LDC, WORK, LDWORK ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, SIDE, STOREV, TRANS + INTEGER K, LDC, LDT, LDV, LDWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 C( LDC, * ), T( LDT, * ), V( LDV, * ), + $ WORK( LDWORK, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + CHARACTER TRANST + INTEGER I, J, LASTV, LASTC +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAZLR, ILAZLC + EXTERNAL LSAME, ILAZLR, ILAZLC +* .. +* .. External Subroutines .. + EXTERNAL ZCOPY, ZGEMM, ZLACGV, ZTRMM +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( M.LE.0 .OR. N.LE.0 ) + $ RETURN +* + IF( LSAME( TRANS, 'N' ) ) THEN + TRANST = 'C' + ELSE + TRANST = 'N' + END IF +* + IF( LSAME( STOREV, 'C' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 ) (first K rows) +* ( V2 ) +* where V1 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILAZLR( M, K, V, LDV ) ) + LASTC = ILAZLC( LASTV, N, C, LDC ) +* +* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) +* +* W := C1**H +* + DO 10 J = 1, K + CALL ZCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( LASTC, WORK( 1, J ), 1 ) + 10 CONTINUE +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**H *V2 +* + CALL ZGEMM( 'Conjugate transpose', 'No transpose', + $ LASTC, K, LASTV-K, ONE, C( K+1, 1 ), LDC, + $ V( K+1, 1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**H +* + IF( M.GT.K ) THEN +* +* C2 := C2 - V2 * W**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTV-K, LASTC, K, + $ -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, + $ ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**H +* + DO 30 J = 1, K + DO 20 I = 1, LASTC + C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) ) + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTV = MAX( K, ILAZLR( N, K, V, LDV ) ) + LASTC = ILAZLR( M, LASTV, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C1 +* + DO 40 J = 1, K + CALL ZCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 40 CONTINUE +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2 +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ LASTC, K, LASTV-K, + $ ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**H +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, + $ ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 60 J = 1, K + DO 50 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 50 CONTINUE + 60 CONTINUE + END IF +* + ELSE +* +* Let V = ( V1 ) +* ( V2 ) (last K rows) +* where V2 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTC = ILAZLC( M, N, C, LDC ) +* +* W := C**H * V = (C1**H * V1 + C2**H * V2) (stored in WORK) +* +* W := C2**H +* + DO 70 J = 1, K + CALL ZCOPY( LASTC, C( M-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + CALL ZLACGV( LASTC, WORK( 1, J ), 1 ) + 70 CONTINUE +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( M-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**H*V1 +* + CALL ZGEMM( 'Conjugate transpose', 'No transpose', + $ LASTC, K, M-K, + $ ONE, C, LDC, V, LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V * W**H +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1 * W**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ M-K, LASTC, K, + $ -ONE, V, LDV, WORK, LDWORK, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( M-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**H +* + DO 90 J = 1, K + DO 80 I = 1, LASTC + C( M-K+J, I ) = C( M-K+J, I ) - + $ DCONJG( WORK( I, J ) ) + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTC = ILAZLR( M, N, C, LDC ) +* +* W := C * V = (C1*V1 + C2*V2) (stored in WORK) +* +* W := C2 +* + DO 100 J = 1, K + CALL ZCOPY( LASTC, C( 1, N-K+J ), 1, + $ WORK( 1, J ), 1 ) + 100 CONTINUE +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( N-K+1, 1 ), LDV, + $ WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1 +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ LASTC, K, N-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V**H +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, N-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( N-K+1, 1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W +* + DO 120 J = 1, K + DO 110 I = 1, LASTC + C( I, N-K+J ) = C( I, N-K+J ) + $ - WORK( I, J ) + 110 CONTINUE + 120 CONTINUE + END IF + END IF +* + ELSE IF( LSAME( STOREV, 'R' ) ) THEN +* + IF( LSAME( DIRECT, 'F' ) ) THEN +* +* Let V = ( V1 V2 ) (V1: first K columns) +* where V1 is unit upper triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTV = MAX( K, ILAZLC( K, M, V, LDV ) ) + LASTC = ILAZLC( LASTV, N, C, LDC ) +* +* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) +* +* W := C1**H +* + DO 130 J = 1, K + CALL ZCOPY( LASTC, C( J, 1 ), LDC, WORK( 1, J ), 1 ) + CALL ZLACGV( LASTC, WORK( 1, J ), 1 ) + 130 CONTINUE +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2**H*V2**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTC, K, LASTV-K, + $ ONE, C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, + $ ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**H * W**H +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - V2**H * W**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTV-K, LASTC, K, + $ -ONE, V( 1, K+1 ), LDV, WORK, LDWORK, + $ ONE, C( K+1, 1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W**H +* + DO 150 J = 1, K + DO 140 I = 1, LASTC + C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) ) + 140 CONTINUE + 150 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTV = MAX( K, ILAZLC( K, N, V, LDV ) ) + LASTC = ILAZLR( M, LASTV, C, LDC ) +* +* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) +* +* W := C1 +* + DO 160 J = 1, K + CALL ZCOPY( LASTC, C( 1, J ), 1, WORK( 1, J ), 1 ) + 160 CONTINUE +* +* W := W * V1**H +* + CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V, LDV, WORK, LDWORK ) + IF( LASTV.GT.K ) THEN +* +* W := W + C2 * V2**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, K, LASTV-K, ONE, C( 1, K+1 ), LDC, + $ V( 1, K+1 ), LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( LASTV.GT.K ) THEN +* +* C2 := C2 - W * V2 +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ LASTC, LASTV-K, K, + $ -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, + $ ONE, C( 1, K+1 ), LDC ) + END IF +* +* W := W * V1 +* + CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', + $ LASTC, K, ONE, V, LDV, WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 180 J = 1, K + DO 170 I = 1, LASTC + C( I, J ) = C( I, J ) - WORK( I, J ) + 170 CONTINUE + 180 CONTINUE +* + END IF +* + ELSE +* +* Let V = ( V1 V2 ) (V2: last K columns) +* where V2 is unit lower triangular. +* + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form H * C or H**H * C where C = ( C1 ) +* ( C2 ) +* + LASTC = ILAZLC( M, N, C, LDC ) +* +* W := C**H * V**H = (C1**H * V1**H + C2**H * V2**H) (stored in WORK) +* +* W := C2**H +* + DO 190 J = 1, K + CALL ZCOPY( LASTC, C( M-K+J, 1 ), LDC, + $ WORK( 1, J ), 1 ) + CALL ZLACGV( LASTC, WORK( 1, J ), 1 ) + 190 CONTINUE +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( 1, M-K+1 ), LDV, + $ WORK, LDWORK ) + IF( M.GT.K ) THEN +* +* W := W + C1**H * V1**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', LASTC, K, M-K, + $ ONE, C, LDC, V, LDV, ONE, WORK, LDWORK ) + END IF +* +* W := W * T**H or W * T +* + CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - V**H * W**H +* + IF( M.GT.K ) THEN +* +* C1 := C1 - V1**H * W**H +* + CALL ZGEMM( 'Conjugate transpose', + $ 'Conjugate transpose', M-K, LASTC, K, + $ -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, M-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C2 := C2 - W**H +* + DO 210 J = 1, K + DO 200 I = 1, LASTC + C( M-K+J, I ) = C( M-K+J, I ) - + $ DCONJG( WORK( I, J ) ) + 200 CONTINUE + 210 CONTINUE +* + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form C * H or C * H**H where C = ( C1 C2 ) +* + LASTC = ILAZLR( M, N, C, LDC ) +* +* W := C * V**H = (C1*V1**H + C2*V2**H) (stored in WORK) +* +* W := C2 +* + DO 220 J = 1, K + CALL ZCOPY( LASTC, C( 1, N-K+J ), 1, + $ WORK( 1, J ), 1 ) + 220 CONTINUE +* +* W := W * V2**H +* + CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose', + $ 'Unit', LASTC, K, ONE, V( 1, N-K+1 ), LDV, + $ WORK, LDWORK ) + IF( N.GT.K ) THEN +* +* W := W + C1 * V1**H +* + CALL ZGEMM( 'No transpose', 'Conjugate transpose', + $ LASTC, K, N-K, ONE, C, LDC, V, LDV, ONE, + $ WORK, LDWORK ) + END IF +* +* W := W * T or W * T**H +* + CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', + $ LASTC, K, ONE, T, LDT, WORK, LDWORK ) +* +* C := C - W * V +* + IF( N.GT.K ) THEN +* +* C1 := C1 - W * V1 +* + CALL ZGEMM( 'No transpose', 'No transpose', + $ LASTC, N-K, K, -ONE, WORK, LDWORK, V, LDV, + $ ONE, C, LDC ) + END IF +* +* W := W * V2 +* + CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', + $ LASTC, K, ONE, V( 1, N-K+1 ), LDV, + $ WORK, LDWORK ) +* +* C1 := C1 - W +* + DO 240 J = 1, K + DO 230 I = 1, LASTC + C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J ) + 230 CONTINUE + 240 CONTINUE +* + END IF +* + END IF + END IF +* + RETURN +* +* End of ZLARFB +* + END diff --git a/lib/linalg/zlarfg.f b/lib/linalg/zlarfg.f new file mode 100644 index 0000000000000000000000000000000000000000..e37c683fc9acbf85612ff8fb338c2fa3a64944e3 --- /dev/null +++ b/lib/linalg/zlarfg.f @@ -0,0 +1,203 @@ +*> \brief \b ZLARFG generates an elementary reflector (Householder matrix). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFG + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarfg.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarfg.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarfg.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* COMPLEX*16 ALPHA, TAU +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFG generates a complex elementary reflector H of order n, such +*> that +*> +*> H**H * ( alpha ) = ( beta ), H**H * H = I. +*> ( x ) ( 0 ) +*> +*> where alpha and beta are scalars, with beta real, and x is an +*> (n-1)-element complex vector. H is represented in the form +*> +*> H = I - tau * ( 1 ) * ( 1 v**H ) , +*> ( v ) +*> +*> where tau is a complex scalar and v is a complex (n-1)-element +*> vector. Note that H is not hermitian. +*> +*> If the elements of x are all zero and alpha is real, then tau = 0 +*> and H is taken to be the unit matrix. +*> +*> Otherwise 1 <= real(tau) <= 2 and abs(tau-1) <= 1 . +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the elementary reflector. +*> \endverbatim +*> +*> \param[in,out] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, the value alpha. +*> On exit, it is overwritten with the value beta. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension +*> (1+(N-2)*abs(INCX)) +*> On entry, the vector x. +*> On exit, it is overwritten with the vector v. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between elements of X. INCX > 0. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 +*> The value tau. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX*16 ALPHA, TAU +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER J, KNT + DOUBLE PRECISION ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH, DLAPY3, DZNRM2 + COMPLEX*16 ZLADIV + EXTERNAL DLAMCH, DLAPY3, DZNRM2, ZLADIV +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DCMPLX, DIMAG, SIGN +* .. +* .. External Subroutines .. + EXTERNAL ZDSCAL, ZSCAL +* .. +* .. Executable Statements .. +* + IF( N.LE.0 ) THEN + TAU = ZERO + RETURN + END IF +* + XNORM = DZNRM2( N-1, X, INCX ) + ALPHR = DBLE( ALPHA ) + ALPHI = DIMAG( ALPHA ) +* + IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN +* +* H = I +* + TAU = ZERO + ELSE +* +* general case +* + BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' ) + RSAFMN = ONE / SAFMIN +* + KNT = 0 + IF( ABS( BETA ).LT.SAFMIN ) THEN +* +* XNORM, BETA may be inaccurate; scale X and recompute them +* + 10 CONTINUE + KNT = KNT + 1 + CALL ZDSCAL( N-1, RSAFMN, X, INCX ) + BETA = BETA*RSAFMN + ALPHI = ALPHI*RSAFMN + ALPHR = ALPHR*RSAFMN + IF( ABS( BETA ).LT.SAFMIN ) + $ GO TO 10 +* +* New BETA is at most 1, at least SAFMIN +* + XNORM = DZNRM2( N-1, X, INCX ) + ALPHA = DCMPLX( ALPHR, ALPHI ) + BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR ) + END IF + TAU = DCMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA ) + ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA-BETA ) + CALL ZSCAL( N-1, ALPHA, X, INCX ) +* +* If ALPHA is subnormal, it may lose relative accuracy +* + DO 20 J = 1, KNT + BETA = BETA*SAFMIN + 20 CONTINUE + ALPHA = BETA + END IF +* + RETURN +* +* End of ZLARFG +* + END diff --git a/lib/linalg/zlarft.f b/lib/linalg/zlarft.f new file mode 100644 index 0000000000000000000000000000000000000000..2278d11d2b3d89098c2f3296766a3e0ecd6ea485 --- /dev/null +++ b/lib/linalg/zlarft.f @@ -0,0 +1,327 @@ +*> \brief \b ZLARFT forms the triangular factor T of a block reflector H = I - vtvH +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLARFT + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlarft.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlarft.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlarft.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, STOREV +* INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. +* COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLARFT forms the triangular factor T of a complex block reflector H +*> of order n, which is defined as a product of k elementary reflectors. +*> +*> If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; +*> +*> If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. +*> +*> If STOREV = 'C', the vector which defines the elementary reflector +*> H(i) is stored in the i-th column of the array V, and +*> +*> H = I - V * T * V**H +*> +*> If STOREV = 'R', the vector which defines the elementary reflector +*> H(i) is stored in the i-th row of the array V, and +*> +*> H = I - V**H * T * V +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies the order in which the elementary reflectors are +*> multiplied to form the block reflector: +*> = 'F': H = H(1) H(2) . . . H(k) (Forward) +*> = 'B': H = H(k) . . . H(2) H(1) (Backward) +*> \endverbatim +*> +*> \param[in] STOREV +*> \verbatim +*> STOREV is CHARACTER*1 +*> Specifies how the vectors which define the elementary +*> reflectors are stored (see also Further Details): +*> = 'C': columnwise +*> = 'R': rowwise +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the block reflector H. N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The order of the triangular factor T (= the number of +*> elementary reflectors). K >= 1. +*> \endverbatim +*> +*> \param[in] V +*> \verbatim +*> V is COMPLEX*16 array, dimension +*> (LDV,K) if STOREV = 'C' +*> (LDV,N) if STOREV = 'R' +*> The matrix V. See further details. +*> \endverbatim +*> +*> \param[in] LDV +*> \verbatim +*> LDV is INTEGER +*> The leading dimension of the array V. +*> If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i). +*> \endverbatim +*> +*> \param[out] T +*> \verbatim +*> T is COMPLEX*16 array, dimension (LDT,K) +*> The k by k triangular factor T of the block reflector. +*> If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is +*> lower triangular. The rest of the array is not used. +*> \endverbatim +*> +*> \param[in] LDT +*> \verbatim +*> LDT is INTEGER +*> The leading dimension of the array T. LDT >= K. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> The shape of the matrix V and the storage of the vectors which define +*> the H(i) is best illustrated by the following example with n = 5 and +*> k = 3. The elements equal to 1 are not stored. +*> +*> DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': +*> +*> V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) +*> ( v1 1 ) ( 1 v2 v2 v2 ) +*> ( v1 v2 1 ) ( 1 v3 v3 ) +*> ( v1 v2 v3 ) +*> ( v1 v2 v3 ) +*> +*> DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': +*> +*> V = ( v1 v2 v3 ) V = ( v1 v1 1 ) +*> ( v1 v2 v3 ) ( v2 v2 v2 1 ) +*> ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) +*> ( 1 v3 ) +*> ( 1 ) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, STOREV + INTEGER K, LDT, LDV, N +* .. +* .. Array Arguments .. + COMPLEX*16 T( LDT, * ), TAU( * ), V( LDV, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, PREVLASTV, LASTV +* .. +* .. External Subroutines .. + EXTERNAL ZGEMV, ZLACGV, ZTRMV +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( LSAME( DIRECT, 'F' ) ) THEN + PREVLASTV = N + DO I = 1, K + PREVLASTV = MAX( PREVLASTV, I ) + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = 1, I + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * CONJG( V( I , J ) ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)**H * V(i:j,i) +* + CALL ZGEMV( 'Conjugate transpose', J-I, I-1, + $ -TAU( I ), V( I+1, 1 ), LDV, + $ V( I+1, I ), 1, ONE, T( 1, I ), 1 ) + ELSE +* Skip any trailing zeros. + DO LASTV = N, I+1, -1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = 1, I-1 + T( J, I ) = -TAU( I ) * V( J , I ) + END DO + J = MIN( LASTV, PREVLASTV ) +* +* T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)**H +* + CALL ZGEMM( 'N', 'C', I-1, 1, J-I, -TAU( I ), + $ V( 1, I+1 ), LDV, V( I, I+1 ), LDV, + $ ONE, T( 1, I ), LDT ) + END IF +* +* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) +* + CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, + $ LDT, T( 1, I ), 1 ) + T( I, I ) = TAU( I ) + IF( I.GT.1 ) THEN + PREVLASTV = MAX( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + END DO + ELSE + PREVLASTV = 1 + DO I = K, 1, -1 + IF( TAU( I ).EQ.ZERO ) THEN +* +* H(i) = I +* + DO J = I, K + T( J, I ) = ZERO + END DO + ELSE +* +* general case +* + IF( I.LT.K ) THEN + IF( LSAME( STOREV, 'C' ) ) THEN +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( LASTV, I ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * CONJG( V( N-K+I , J ) ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(j:n-k+i,i+1:k)**H * V(j:n-k+i,i) +* + CALL ZGEMV( 'Conjugate transpose', N-K+I-J, K-I, + $ -TAU( I ), V( J, I+1 ), LDV, V( J, I ), + $ 1, ONE, T( I+1, I ), 1 ) + ELSE +* Skip any leading zeros. + DO LASTV = 1, I-1 + IF( V( I, LASTV ).NE.ZERO ) EXIT + END DO + DO J = I+1, K + T( J, I ) = -TAU( I ) * V( J, N-K+I ) + END DO + J = MAX( LASTV, PREVLASTV ) +* +* T(i+1:k,i) = -tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)**H +* + CALL ZGEMM( 'N', 'C', K-I, 1, N-K+I-J, -TAU( I ), + $ V( I+1, J ), LDV, V( I, J ), LDV, + $ ONE, T( I+1, I ), LDT ) + END IF +* +* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) +* + CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I, + $ T( I+1, I+1 ), LDT, T( I+1, I ), 1 ) + IF( I.GT.1 ) THEN + PREVLASTV = MIN( PREVLASTV, LASTV ) + ELSE + PREVLASTV = LASTV + END IF + END IF + T( I, I ) = TAU( I ) + END IF + END DO + END IF + RETURN +* +* End of ZLARFT +* + END diff --git a/lib/linalg/zlascl.f b/lib/linalg/zlascl.f new file mode 100644 index 0000000000000000000000000000000000000000..51a4f0f61494c3507dde135c77a2efa21c1d8053 --- /dev/null +++ b/lib/linalg/zlascl.f @@ -0,0 +1,364 @@ +*> \brief \b ZLASCL multiplies a general rectangular matrix by a real scalar defined as cto/cfrom. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASCL + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlascl.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlascl.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlascl.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER TYPE +* INTEGER INFO, KL, KU, LDA, M, N +* DOUBLE PRECISION CFROM, CTO +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASCL multiplies the M by N complex matrix A by the real scalar +*> CTO/CFROM. This is done without over/underflow as long as the final +*> result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that +*> A may be full, upper triangular, lower triangular, upper Hessenberg, +*> or banded. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] TYPE +*> \verbatim +*> TYPE is CHARACTER*1 +*> TYPE indices the storage type of the input matrix. +*> = 'G': A is a full matrix. +*> = 'L': A is a lower triangular matrix. +*> = 'U': A is an upper triangular matrix. +*> = 'H': A is an upper Hessenberg matrix. +*> = 'B': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the lower +*> half stored. +*> = 'Q': A is a symmetric band matrix with lower bandwidth KL +*> and upper bandwidth KU and with the only the upper +*> half stored. +*> = 'Z': A is a band matrix with lower bandwidth KL and upper +*> bandwidth KU. See ZGBTRF for storage details. +*> \endverbatim +*> +*> \param[in] KL +*> \verbatim +*> KL is INTEGER +*> The lower bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] KU +*> \verbatim +*> KU is INTEGER +*> The upper bandwidth of A. Referenced only if TYPE = 'B', +*> 'Q' or 'Z'. +*> \endverbatim +*> +*> \param[in] CFROM +*> \verbatim +*> CFROM is DOUBLE PRECISION +*> \endverbatim +*> +*> \param[in] CTO +*> \verbatim +*> CTO is DOUBLE PRECISION +*> +*> The matrix A is multiplied by CTO/CFROM. A(I,J) is computed +*> without over/underflow if the final result CTO*A(I,J)/CFROM +*> can be represented without over/underflow. CFROM must be +*> nonzero. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The matrix to be multiplied by CTO/CFROM. See TYPE for the +*> storage type. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> 0 - successful exit +*> <0 - if INFO = -i, the i-th argument had an illegal value. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER TYPE + INTEGER INFO, KL, KU, LDA, M, N + DOUBLE PRECISION CFROM, CTO +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) +* .. +* .. Local Scalars .. + LOGICAL DONE + INTEGER I, ITYPE, J, K1, K2, K3, K4 + DOUBLE PRECISION BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM +* .. +* .. External Functions .. + LOGICAL LSAME, DISNAN + DOUBLE PRECISION DLAMCH + EXTERNAL LSAME, DLAMCH, DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, MIN +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 +* + IF( LSAME( TYPE, 'G' ) ) THEN + ITYPE = 0 + ELSE IF( LSAME( TYPE, 'L' ) ) THEN + ITYPE = 1 + ELSE IF( LSAME( TYPE, 'U' ) ) THEN + ITYPE = 2 + ELSE IF( LSAME( TYPE, 'H' ) ) THEN + ITYPE = 3 + ELSE IF( LSAME( TYPE, 'B' ) ) THEN + ITYPE = 4 + ELSE IF( LSAME( TYPE, 'Q' ) ) THEN + ITYPE = 5 + ELSE IF( LSAME( TYPE, 'Z' ) ) THEN + ITYPE = 6 + ELSE + ITYPE = -1 + END IF +* + IF( ITYPE.EQ.-1 ) THEN + INFO = -1 + ELSE IF( CFROM.EQ.ZERO .OR. DISNAN(CFROM) ) THEN + INFO = -4 + ELSE IF( DISNAN(CTO) ) THEN + INFO = -5 + ELSE IF( M.LT.0 ) THEN + INFO = -6 + ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR. + $ ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN + INFO = -7 + ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN + INFO = -9 + ELSE IF( ITYPE.GE.4 ) THEN + IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN + INFO = -2 + ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR. + $ ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) ) + $ THEN + INFO = -3 + ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR. + $ ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR. + $ ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN + INFO = -9 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLASCL', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 .OR. M.EQ.0 ) + $ RETURN +* +* Get machine parameters +* + SMLNUM = DLAMCH( 'S' ) + BIGNUM = ONE / SMLNUM +* + CFROMC = CFROM + CTOC = CTO +* + 10 CONTINUE + CFROM1 = CFROMC*SMLNUM + IF( CFROM1.EQ.CFROMC ) THEN +! CFROMC is an inf. Multiply by a correctly signed zero for +! finite CTOC, or a NaN if CTOC is infinite. + MUL = CTOC / CFROMC + DONE = .TRUE. + CTO1 = CTOC + ELSE + CTO1 = CTOC / BIGNUM + IF( CTO1.EQ.CTOC ) THEN +! CTOC is either 0 or an inf. In both cases, CTOC itself +! serves as the correct multiplication factor. + MUL = CTOC + DONE = .TRUE. + CFROMC = ONE + ELSE IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN + MUL = SMLNUM + DONE = .FALSE. + CFROMC = CFROM1 + ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN + MUL = BIGNUM + DONE = .FALSE. + CTOC = CTO1 + ELSE + MUL = CTOC / CFROMC + DONE = .TRUE. + END IF + END IF +* + IF( ITYPE.EQ.0 ) THEN +* +* Full matrix +* + DO 30 J = 1, N + DO 20 I = 1, M + A( I, J ) = A( I, J )*MUL + 20 CONTINUE + 30 CONTINUE +* + ELSE IF( ITYPE.EQ.1 ) THEN +* +* Lower triangular matrix +* + DO 50 J = 1, N + DO 40 I = J, M + A( I, J ) = A( I, J )*MUL + 40 CONTINUE + 50 CONTINUE +* + ELSE IF( ITYPE.EQ.2 ) THEN +* +* Upper triangular matrix +* + DO 70 J = 1, N + DO 60 I = 1, MIN( J, M ) + A( I, J ) = A( I, J )*MUL + 60 CONTINUE + 70 CONTINUE +* + ELSE IF( ITYPE.EQ.3 ) THEN +* +* Upper Hessenberg matrix +* + DO 90 J = 1, N + DO 80 I = 1, MIN( J+1, M ) + A( I, J ) = A( I, J )*MUL + 80 CONTINUE + 90 CONTINUE +* + ELSE IF( ITYPE.EQ.4 ) THEN +* +* Lower half of a symmetric band matrix +* + K3 = KL + 1 + K4 = N + 1 + DO 110 J = 1, N + DO 100 I = 1, MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 100 CONTINUE + 110 CONTINUE +* + ELSE IF( ITYPE.EQ.5 ) THEN +* +* Upper half of a symmetric band matrix +* + K1 = KU + 2 + K3 = KU + 1 + DO 130 J = 1, N + DO 120 I = MAX( K1-J, 1 ), K3 + A( I, J ) = A( I, J )*MUL + 120 CONTINUE + 130 CONTINUE +* + ELSE IF( ITYPE.EQ.6 ) THEN +* +* Band matrix +* + K1 = KL + KU + 2 + K2 = KL + 1 + K3 = 2*KL + KU + 1 + K4 = KL + KU + 1 + M + DO 150 J = 1, N + DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J ) + A( I, J ) = A( I, J )*MUL + 140 CONTINUE + 150 CONTINUE +* + END IF +* + IF( .NOT.DONE ) + $ GO TO 10 +* + RETURN +* +* End of ZLASCL +* + END diff --git a/lib/linalg/zlaset.f b/lib/linalg/zlaset.f new file mode 100644 index 0000000000000000000000000000000000000000..11f82361b741c14ee49356042759aadddc4f3b34 --- /dev/null +++ b/lib/linalg/zlaset.f @@ -0,0 +1,184 @@ +*> \brief \b ZLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASET + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlaset.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlaset.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlaset.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, M, N +* COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASET initializes a 2-D array A to BETA on the diagonal and +*> ALPHA on the offdiagonals. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies the part of the matrix A to be set. +*> = 'U': Upper triangular part is set. The lower triangle +*> is unchanged. +*> = 'L': Lower triangular part is set. The upper triangle +*> is unchanged. +*> Otherwise: All of the matrix A is set. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of A. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of A. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> All the offdiagonal array elements are set to ALPHA. +*> \endverbatim +*> +*> \param[in] BETA +*> \verbatim +*> BETA is COMPLEX*16 +*> All the diagonal array elements are set to BETA. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the m by n matrix A. +*> On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j; +*> A(i,i) = BETA , 1 <= i <= min(m,n) +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLASET( UPLO, M, N, ALPHA, BETA, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, M, N + COMPLEX*16 ALPHA, BETA +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + INTEGER I, J +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. Intrinsic Functions .. + INTRINSIC MIN +* .. +* .. Executable Statements .. +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Set the diagonal to BETA and the strictly upper triangular +* part of the array to ALPHA. +* + DO 20 J = 2, N + DO 10 I = 1, MIN( J-1, M ) + A( I, J ) = ALPHA + 10 CONTINUE + 20 CONTINUE + DO 30 I = 1, MIN( N, M ) + A( I, I ) = BETA + 30 CONTINUE +* + ELSE IF( LSAME( UPLO, 'L' ) ) THEN +* +* Set the diagonal to BETA and the strictly lower triangular +* part of the array to ALPHA. +* + DO 50 J = 1, MIN( M, N ) + DO 40 I = J + 1, M + A( I, J ) = ALPHA + 40 CONTINUE + 50 CONTINUE + DO 60 I = 1, MIN( N, M ) + A( I, I ) = BETA + 60 CONTINUE +* + ELSE +* +* Set the array to BETA on the diagonal and ALPHA on the +* offdiagonal. +* + DO 80 J = 1, N + DO 70 I = 1, M + A( I, J ) = ALPHA + 70 CONTINUE + 80 CONTINUE + DO 90 I = 1, MIN( M, N ) + A( I, I ) = BETA + 90 CONTINUE + END IF +* + RETURN +* +* End of ZLASET +* + END diff --git a/lib/linalg/zlasr.f b/lib/linalg/zlasr.f new file mode 100644 index 0000000000000000000000000000000000000000..5243d8304a953a7d9b47cfdaa41be83b2646907a --- /dev/null +++ b/lib/linalg/zlasr.f @@ -0,0 +1,439 @@ +*> \brief \b ZLASR applies a sequence of plane rotations to a general rectangular matrix. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASR + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlasr.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlasr.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlasr.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* .. Scalar Arguments .. +* CHARACTER DIRECT, PIVOT, SIDE +* INTEGER LDA, M, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION C( * ), S( * ) +* COMPLEX*16 A( LDA, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASR applies a sequence of real plane rotations to a complex matrix +*> A, from either the left or the right. +*> +*> When SIDE = 'L', the transformation takes the form +*> +*> A := P*A +*> +*> and when SIDE = 'R', the transformation takes the form +*> +*> A := A*P**T +*> +*> where P is an orthogonal matrix consisting of a sequence of z plane +*> rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R', +*> and P**T is the transpose of P. +*> +*> When DIRECT = 'F' (Forward sequence), then +*> +*> P = P(z-1) * ... * P(2) * P(1) +*> +*> and when DIRECT = 'B' (Backward sequence), then +*> +*> P = P(1) * P(2) * ... * P(z-1) +*> +*> where P(k) is a plane rotation matrix defined by the 2-by-2 rotation +*> +*> R(k) = ( c(k) s(k) ) +*> = ( -s(k) c(k) ). +*> +*> When PIVOT = 'V' (Variable pivot), the rotation is performed +*> for the plane (k,k+1), i.e., P(k) has the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears as a rank-2 modification to the identity matrix in +*> rows and columns k and k+1. +*> +*> When PIVOT = 'T' (Top pivot), the rotation is performed for the +*> plane (1,k+1), so P(k) has the form +*> +*> P(k) = ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> +*> where R(k) appears in rows and columns 1 and k+1. +*> +*> Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is +*> performed for the plane (k,z), giving P(k) the form +*> +*> P(k) = ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( c(k) s(k) ) +*> ( 1 ) +*> ( ... ) +*> ( 1 ) +*> ( -s(k) c(k) ) +*> +*> where R(k) appears in rows and columns k and z. The rotations are +*> performed without ever forming P(k) explicitly. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> Specifies whether the plane rotation matrix P is applied to +*> A on the left or the right. +*> = 'L': Left, compute A := P*A +*> = 'R': Right, compute A:= A*P**T +*> \endverbatim +*> +*> \param[in] PIVOT +*> \verbatim +*> PIVOT is CHARACTER*1 +*> Specifies the plane for which P(k) is a plane rotation +*> matrix. +*> = 'V': Variable pivot, the plane (k,k+1) +*> = 'T': Top pivot, the plane (1,k+1) +*> = 'B': Bottom pivot, the plane (k,z) +*> \endverbatim +*> +*> \param[in] DIRECT +*> \verbatim +*> DIRECT is CHARACTER*1 +*> Specifies whether P is a forward or backward sequence of +*> plane rotations. +*> = 'F': Forward, P = P(z-1)*...*P(2)*P(1) +*> = 'B': Backward, P = P(1)*P(2)*...*P(z-1) +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix A. If m <= 1, an immediate +*> return is effected. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix A. If n <= 1, an +*> immediate return is effected. +*> \endverbatim +*> +*> \param[in] C +*> \verbatim +*> C is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The cosines c(k) of the plane rotations. +*> \endverbatim +*> +*> \param[in] S +*> \verbatim +*> S is DOUBLE PRECISION array, dimension +*> (M-1) if SIDE = 'L' +*> (N-1) if SIDE = 'R' +*> The sines s(k) of the plane rotations. The 2-by-2 plane +*> rotation part of the matrix P(k), R(k), has the form +*> R(k) = ( c(k) s(k) ) +*> ( -s(k) c(k) ). +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> The M-by-N matrix A. On exit, A is overwritten by P*A if +*> SIDE = 'R' or by A*P**T if SIDE = 'L'. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,M). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER DIRECT, PIVOT, SIDE + INTEGER LDA, M, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION C( * ), S( * ) + COMPLEX*16 A( LDA, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ONE, ZERO + PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER I, INFO, J + DOUBLE PRECISION CTEMP, STEMP + COMPLEX*16 TEMP +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Executable Statements .. +* +* Test the input parameters +* + INFO = 0 + IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN + INFO = 1 + ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT, + $ 'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN + INFO = 2 + ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) ) + $ THEN + INFO = 3 + ELSE IF( M.LT.0 ) THEN + INFO = 4 + ELSE IF( N.LT.0 ) THEN + INFO = 5 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = 9 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZLASR ', INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) ) + $ RETURN + IF( LSAME( SIDE, 'L' ) ) THEN +* +* Form P * A +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 20 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 10 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 10 CONTINUE + END IF + 20 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 40 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 30 I = 1, N + TEMP = A( J+1, I ) + A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I ) + A( J, I ) = STEMP*TEMP + CTEMP*A( J, I ) + 30 CONTINUE + END IF + 40 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 60 J = 2, M + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 50 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 50 CONTINUE + END IF + 60 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 80 J = M, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 70 I = 1, N + TEMP = A( J, I ) + A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I ) + A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I ) + 70 CONTINUE + END IF + 80 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 100 J = 1, M - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 90 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 90 CONTINUE + END IF + 100 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 120 J = M - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 110 I = 1, N + TEMP = A( J, I ) + A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP + A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP + 110 CONTINUE + END IF + 120 CONTINUE + END IF + END IF + ELSE IF( LSAME( SIDE, 'R' ) ) THEN +* +* Form A * P**T +* + IF( LSAME( PIVOT, 'V' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 140 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 130 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 130 CONTINUE + END IF + 140 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 160 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 150 I = 1, M + TEMP = A( I, J+1 ) + A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J ) + A( I, J ) = STEMP*TEMP + CTEMP*A( I, J ) + 150 CONTINUE + END IF + 160 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'T' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 180 J = 2, N + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 170 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 170 CONTINUE + END IF + 180 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 200 J = N, 2, -1 + CTEMP = C( J-1 ) + STEMP = S( J-1 ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 190 I = 1, M + TEMP = A( I, J ) + A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 ) + A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 ) + 190 CONTINUE + END IF + 200 CONTINUE + END IF + ELSE IF( LSAME( PIVOT, 'B' ) ) THEN + IF( LSAME( DIRECT, 'F' ) ) THEN + DO 220 J = 1, N - 1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 210 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 210 CONTINUE + END IF + 220 CONTINUE + ELSE IF( LSAME( DIRECT, 'B' ) ) THEN + DO 240 J = N - 1, 1, -1 + CTEMP = C( J ) + STEMP = S( J ) + IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN + DO 230 I = 1, M + TEMP = A( I, J ) + A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP + A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP + 230 CONTINUE + END IF + 240 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZLASR +* + END diff --git a/lib/linalg/zlassq.f b/lib/linalg/zlassq.f new file mode 100644 index 0000000000000000000000000000000000000000..5b7e66c30bd421e41b836e4262903dc952022712 --- /dev/null +++ b/lib/linalg/zlassq.f @@ -0,0 +1,168 @@ +*> \brief \b ZLASSQ updates a sum of squares represented in scaled form. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLASSQ + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlassq.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlassq.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlassq.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* DOUBLE PRECISION SCALE, SUMSQ +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLASSQ returns the values scl and ssq such that +*> +*> ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq, +*> +*> where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is +*> assumed to be at least unity and the value of ssq will then satisfy +*> +*> 1.0 .le. ssq .le. ( sumsq + 2*n ). +*> +*> scale is assumed to be non-negative and scl returns the value +*> +*> scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ), +*> i +*> +*> scale and sumsq must be supplied in SCALE and SUMSQ respectively. +*> SCALE and SUMSQ are overwritten by scl and ssq respectively. +*> +*> The routine makes only one pass through the vector X. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of elements to be used from the vector X. +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is COMPLEX*16 array, dimension (N) +*> The vector x as described above. +*> x( i ) = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector X. +*> INCX > 0. +*> \endverbatim +*> +*> \param[in,out] SCALE +*> \verbatim +*> SCALE is DOUBLE PRECISION +*> On entry, the value scale in the equation above. +*> On exit, SCALE is overwritten with the value scl . +*> \endverbatim +*> +*> \param[in,out] SUMSQ +*> \verbatim +*> SUMSQ is DOUBLE PRECISION +*> On entry, the value sumsq in the equation above. +*> On exit, SUMSQ is overwritten with the value ssq . +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZLASSQ( N, X, INCX, SCALE, SUMSQ ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INCX, N + DOUBLE PRECISION SCALE, SUMSQ +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO + PARAMETER ( ZERO = 0.0D+0 ) +* .. +* .. Local Scalars .. + INTEGER IX + DOUBLE PRECISION TEMP1 +* .. +* .. External Functions .. + LOGICAL DISNAN + EXTERNAL DISNAN +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, DBLE, DIMAG +* .. +* .. Executable Statements .. +* + IF( N.GT.0 ) THEN + DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX + TEMP1 = ABS( DBLE( X( IX ) ) ) + IF( TEMP1.GT.ZERO.OR.DISNAN( TEMP1 ) ) THEN + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + TEMP1 = ABS( DIMAG( X( IX ) ) ) + IF( TEMP1.GT.ZERO.OR.DISNAN( TEMP1 ) ) THEN + IF( SCALE.LT.TEMP1 ) THEN + SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2 + SCALE = TEMP1 + ELSE + SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2 + END IF + END IF + 10 CONTINUE + END IF +* + RETURN +* +* End of ZLASSQ +* + END diff --git a/lib/linalg/zlatrd.f b/lib/linalg/zlatrd.f new file mode 100644 index 0000000000000000000000000000000000000000..619d7280c482270f6117235e941bf529e347ebc0 --- /dev/null +++ b/lib/linalg/zlatrd.f @@ -0,0 +1,358 @@ +*> \brief \b ZLATRD reduces the first nb rows and columns of a symmetric/Hermitian matrix A to real tridiagonal form by an unitary similarity transformation. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZLATRD + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlatrd.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlatrd.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlatrd.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. +* DOUBLE PRECISION E( * ) +* COMPLEX*16 A( LDA, * ), TAU( * ), W( LDW, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to +*> Hermitian tridiagonal form by a unitary similarity +*> transformation Q**H * A * Q, and returns the matrices V and W which are +*> needed to apply the transformation to the unreduced part of A. +*> +*> If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a +*> matrix, of which the upper triangle is supplied; +*> if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a +*> matrix, of which the lower triangle is supplied. +*> +*> This is an auxiliary routine called by ZHETRD. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> Specifies whether the upper or lower triangular part of the +*> Hermitian matrix A is stored: +*> = 'U': Upper triangular +*> = 'L': Lower triangular +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix A. +*> \endverbatim +*> +*> \param[in] NB +*> \verbatim +*> NB is INTEGER +*> The number of rows and columns to be reduced. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the Hermitian matrix A. If UPLO = 'U', the leading +*> n-by-n upper triangular part of A contains the upper +*> triangular part of the matrix A, and the strictly lower +*> triangular part of A is not referenced. If UPLO = 'L', the +*> leading n-by-n lower triangular part of A contains the lower +*> triangular part of the matrix A, and the strictly upper +*> triangular part of A is not referenced. +*> On exit: +*> if UPLO = 'U', the last NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements above the diagonal +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors; +*> if UPLO = 'L', the first NB columns have been reduced to +*> tridiagonal form, with the diagonal elements overwriting +*> the diagonal elements of A; the elements below the diagonal +*> with the array TAU, represent the unitary matrix Q as a +*> product of elementary reflectors. +*> See Further Details. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= max(1,N). +*> \endverbatim +*> +*> \param[out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal +*> elements of the last NB columns of the reduced matrix; +*> if UPLO = 'L', E(1:nb) contains the subdiagonal elements of +*> the first NB columns of the reduced matrix. +*> \endverbatim +*> +*> \param[out] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> The scalar factors of the elementary reflectors, stored in +*> TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. +*> See Further Details. +*> \endverbatim +*> +*> \param[out] W +*> \verbatim +*> W is COMPLEX*16 array, dimension (LDW,NB) +*> The n-by-nb matrix W required to update the unreduced part +*> of A. +*> \endverbatim +*> +*> \param[in] LDW +*> \verbatim +*> LDW is INTEGER +*> The leading dimension of the array W. LDW >= max(1,N). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERauxiliary +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> If UPLO = 'U', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(n) H(n-1) . . . H(n-nb+1). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), +*> and tau in TAU(i-1). +*> +*> If UPLO = 'L', the matrix Q is represented as a product of elementary +*> reflectors +*> +*> Q = H(1) H(2) . . . H(nb). +*> +*> Each H(i) has the form +*> +*> H(i) = I - tau * v * v**H +*> +*> where tau is a complex scalar, and v is a complex vector with +*> v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), +*> and tau in TAU(i). +*> +*> The elements of the vectors v together form the n-by-nb matrix V +*> which is needed, with W, to apply the transformation to the unreduced +*> part of the matrix, using a Hermitian rank-2k update of the form: +*> A := A - V*W**H - W*V**H. +*> +*> The contents of A on exit are illustrated by the following examples +*> with n = 5 and nb = 2: +*> +*> if UPLO = 'U': if UPLO = 'L': +*> +*> ( a a a v4 v5 ) ( d ) +*> ( a a v4 v5 ) ( 1 d ) +*> ( a 1 v5 ) ( v1 1 a ) +*> ( d 1 ) ( v1 v2 a a ) +*> ( d ) ( v1 v2 a a a ) +*> +*> where d denotes a diagonal element of the reduced matrix, a denotes +*> an element of the original matrix that is unchanged, and vi denotes +*> an element of the vector defining H(i). +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW ) +* +* -- LAPACK auxiliary routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER LDA, LDW, N, NB +* .. +* .. Array Arguments .. + DOUBLE PRECISION E( * ) + COMPLEX*16 A( LDA, * ), TAU( * ), W( LDW, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE, HALF + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ), + $ HALF = ( 0.5D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, IW + COMPLEX*16 ALPHA +* .. +* .. External Subroutines .. + EXTERNAL ZAXPY, ZGEMV, ZHEMV, ZLACGV, ZLARFG, ZSCAL +* .. +* .. External Functions .. + LOGICAL LSAME + COMPLEX*16 ZDOTC + EXTERNAL LSAME, ZDOTC +* .. +* .. Intrinsic Functions .. + INTRINSIC DBLE, MIN +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* + IF( LSAME( UPLO, 'U' ) ) THEN +* +* Reduce last NB columns of upper triangle +* + DO 10 I = N, N - NB + 1, -1 + IW = I - N + NB + IF( I.LT.N ) THEN +* +* Update A(1:i,i) +* + A( I, I ) = DBLE( A( I, I ) ) + CALL ZLACGV( N-I, W( I, IW+1 ), LDW ) + CALL ZGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ), + $ LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 ) + CALL ZLACGV( N-I, W( I, IW+1 ), LDW ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + CALL ZGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ), + $ LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + A( I, I ) = DBLE( A( I, I ) ) + END IF + IF( I.GT.1 ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(1:i-2,i) +* + ALPHA = A( I-1, I ) + CALL ZLARFG( I-1, ALPHA, A( 1, I ), 1, TAU( I-1 ) ) + E( I-1 ) = ALPHA + A( I-1, I ) = ONE +* +* Compute W(1:i-1,i) +* + CALL ZHEMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1, + $ ZERO, W( 1, IW ), 1 ) + IF( I.LT.N ) THEN + CALL ZGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ W( 1, IW+1 ), LDW, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL ZGEMV( 'No transpose', I-1, N-I, -ONE, + $ A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + CALL ZGEMV( 'Conjugate transpose', I-1, N-I, ONE, + $ A( 1, I+1 ), LDA, A( 1, I ), 1, ZERO, + $ W( I+1, IW ), 1 ) + CALL ZGEMV( 'No transpose', I-1, N-I, -ONE, + $ W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE, + $ W( 1, IW ), 1 ) + END IF + CALL ZSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 ) + ALPHA = -HALF*TAU( I-1 )*ZDOTC( I-1, W( 1, IW ), 1, + $ A( 1, I ), 1 ) + CALL ZAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 ) + END IF +* + 10 CONTINUE + ELSE +* +* Reduce first NB columns of lower triangle +* + DO 20 I = 1, NB +* +* Update A(i:n,i) +* + A( I, I ) = DBLE( A( I, I ) ) + CALL ZLACGV( I-1, W( I, 1 ), LDW ) + CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ), + $ LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 ) + CALL ZLACGV( I-1, W( I, 1 ), LDW ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ), + $ LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 ) + CALL ZLACGV( I-1, A( I, 1 ), LDA ) + A( I, I ) = DBLE( A( I, I ) ) + IF( I.LT.N ) THEN +* +* Generate elementary reflector H(i) to annihilate +* A(i+2:n,i) +* + ALPHA = A( I+1, I ) + CALL ZLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, + $ TAU( I ) ) + E( I ) = ALPHA + A( I+1, I ) = ONE +* +* Compute W(i+1:n,i) +* + CALL ZHEMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA, + $ A( I+1, I ), 1, ZERO, W( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ W( I+1, 1 ), LDW, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ), + $ LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE, + $ A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO, + $ W( 1, I ), 1 ) + CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ), + $ LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 ) + CALL ZSCAL( N-I, TAU( I ), W( I+1, I ), 1 ) + ALPHA = -HALF*TAU( I )*ZDOTC( N-I, W( I+1, I ), 1, + $ A( I+1, I ), 1 ) + CALL ZAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 ) + END IF +* + 20 CONTINUE + END IF +* + RETURN +* +* End of ZLATRD +* + END diff --git a/lib/linalg/zsteqr.f b/lib/linalg/zsteqr.f new file mode 100644 index 0000000000000000000000000000000000000000..33af78e854425201ba713272e0532406d325ad8b --- /dev/null +++ b/lib/linalg/zsteqr.f @@ -0,0 +1,576 @@ +*> \brief \b ZSTEQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZSTEQR + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zsteqr.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zsteqr.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zsteqr.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER COMPZ +* INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. +* DOUBLE PRECISION D( * ), E( * ), WORK( * ) +* COMPLEX*16 Z( LDZ, * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a +*> symmetric tridiagonal matrix using the implicit QL or QR method. +*> The eigenvectors of a full or band complex Hermitian matrix can also +*> be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this +*> matrix to tridiagonal form. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] COMPZ +*> \verbatim +*> COMPZ is CHARACTER*1 +*> = 'N': Compute eigenvalues only. +*> = 'V': Compute eigenvalues and eigenvectors of the original +*> Hermitian matrix. On entry, Z must contain the +*> unitary matrix used to reduce the original matrix +*> to tridiagonal form. +*> = 'I': Compute eigenvalues and eigenvectors of the +*> tridiagonal matrix. Z is initialized to the identity +*> matrix. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix. N >= 0. +*> \endverbatim +*> +*> \param[in,out] D +*> \verbatim +*> D is DOUBLE PRECISION array, dimension (N) +*> On entry, the diagonal elements of the tridiagonal matrix. +*> On exit, if INFO = 0, the eigenvalues in ascending order. +*> \endverbatim +*> +*> \param[in,out] E +*> \verbatim +*> E is DOUBLE PRECISION array, dimension (N-1) +*> On entry, the (n-1) subdiagonal elements of the tridiagonal +*> matrix. +*> On exit, E has been destroyed. +*> \endverbatim +*> +*> \param[in,out] Z +*> \verbatim +*> Z is COMPLEX*16 array, dimension (LDZ, N) +*> On entry, if COMPZ = 'V', then Z contains the unitary +*> matrix used in the reduction to tridiagonal form. +*> On exit, if INFO = 0, then if COMPZ = 'V', Z contains the +*> orthonormal eigenvectors of the original Hermitian matrix, +*> and if COMPZ = 'I', Z contains the orthonormal eigenvectors +*> of the symmetric tridiagonal matrix. +*> If COMPZ = 'N', then Z is not referenced. +*> \endverbatim +*> +*> \param[in] LDZ +*> \verbatim +*> LDZ is INTEGER +*> The leading dimension of the array Z. LDZ >= 1, and if +*> eigenvectors are desired, then LDZ >= max(1,N). +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is DOUBLE PRECISION array, dimension (max(1,2*N-2)) +*> If COMPZ = 'N', then WORK is not referenced. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> > 0: the algorithm has failed to find all the eigenvalues in +*> a total of 30*N iterations; if INFO = i, then i +*> elements of E have not converged to zero; on exit, D +*> and E contain the elements of a symmetric tridiagonal +*> matrix which is unitarily similar to the original +*> matrix. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER COMPZ + INTEGER INFO, LDZ, N +* .. +* .. Array Arguments .. + DOUBLE PRECISION D( * ), E( * ), WORK( * ) + COMPLEX*16 Z( LDZ, * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE, TWO, THREE + PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0, + $ THREE = 3.0D0 ) + COMPLEX*16 CZERO, CONE + PARAMETER ( CZERO = ( 0.0D0, 0.0D0 ), + $ CONE = ( 1.0D0, 0.0D0 ) ) + INTEGER MAXIT + PARAMETER ( MAXIT = 30 ) +* .. +* .. Local Scalars .. + INTEGER I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND, + $ LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1, + $ NM1, NMAXIT + DOUBLE PRECISION ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2, + $ S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST +* .. +* .. External Functions .. + LOGICAL LSAME + DOUBLE PRECISION DLAMCH, DLANST, DLAPY2 + EXTERNAL LSAME, DLAMCH, DLANST, DLAPY2 +* .. +* .. External Subroutines .. + EXTERNAL DLAE2, DLAEV2, DLARTG, DLASCL, DLASRT, XERBLA, + $ ZLASET, ZLASR, ZSWAP +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS, MAX, SIGN, SQRT +* .. +* .. Executable Statements .. +* +* Test the input parameters. +* + INFO = 0 +* + IF( LSAME( COMPZ, 'N' ) ) THEN + ICOMPZ = 0 + ELSE IF( LSAME( COMPZ, 'V' ) ) THEN + ICOMPZ = 1 + ELSE IF( LSAME( COMPZ, 'I' ) ) THEN + ICOMPZ = 2 + ELSE + ICOMPZ = -1 + END IF + IF( ICOMPZ.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1, + $ N ) ) ) THEN + INFO = -6 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZSTEQR', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) + $ RETURN +* + IF( N.EQ.1 ) THEN + IF( ICOMPZ.EQ.2 ) + $ Z( 1, 1 ) = CONE + RETURN + END IF +* +* Determine the unit roundoff and over/underflow thresholds. +* + EPS = DLAMCH( 'E' ) + EPS2 = EPS**2 + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + SSFMAX = SQRT( SAFMAX ) / THREE + SSFMIN = SQRT( SAFMIN ) / EPS2 +* +* Compute the eigenvalues and eigenvectors of the tridiagonal +* matrix. +* + IF( ICOMPZ.EQ.2 ) + $ CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ ) +* + NMAXIT = N*MAXIT + JTOT = 0 +* +* Determine where the matrix splits and choose QL or QR iteration +* for each block, according to whether top or bottom diagonal +* element is smaller. +* + L1 = 1 + NM1 = N - 1 +* + 10 CONTINUE + IF( L1.GT.N ) + $ GO TO 160 + IF( L1.GT.1 ) + $ E( L1-1 ) = ZERO + IF( L1.LE.NM1 ) THEN + DO 20 M = L1, NM1 + TST = ABS( E( M ) ) + IF( TST.EQ.ZERO ) + $ GO TO 30 + IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+ + $ 1 ) ) ) )*EPS ) THEN + E( M ) = ZERO + GO TO 30 + END IF + 20 CONTINUE + END IF + M = N +* + 30 CONTINUE + L = L1 + LSV = L + LEND = M + LENDSV = LEND + L1 = M + 1 + IF( LEND.EQ.L ) + $ GO TO 10 +* +* Scale submatrix in rows and columns L to LEND +* + ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) ) + ISCALE = 0 + IF( ANORM.EQ.ZERO ) + $ GO TO 10 + IF( ANORM.GT.SSFMAX ) THEN + ISCALE = 1 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N, + $ INFO ) + ELSE IF( ANORM.LT.SSFMIN ) THEN + ISCALE = 2 + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N, + $ INFO ) + CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N, + $ INFO ) + END IF +* +* Choose between QL and QR iteration +* + IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN + LEND = LSV + L = LENDSV + END IF +* + IF( LEND.GT.L ) THEN +* +* QL Iteration +* +* Look for small subdiagonal element. +* + 40 CONTINUE + IF( L.NE.LEND ) THEN + LENDM1 = LEND - 1 + DO 50 M = L, LENDM1 + TST = ABS( E( M ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+ + $ SAFMIN )GO TO 60 + 50 CONTINUE + END IF +* + M = LEND +* + 60 CONTINUE + IF( M.LT.LEND ) + $ E( M ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 80 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L+1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S ) + WORK( L ) = C + WORK( N-1+L ) = S + CALL ZLASR( 'R', 'V', 'B', N, 2, WORK( L ), + $ WORK( N-1+L ), Z( 1, L ), LDZ ) + ELSE + CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 ) + END IF + D( L ) = RT1 + D( L+1 ) = RT2 + E( L ) = ZERO + L = L + 2 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L+1 )-P ) / ( TWO*E( L ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + MM1 = M - 1 + DO 70 I = MM1, L, -1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M-1 ) + $ E( I+1 ) = R + G = D( I+1 ) - P + R = ( D( I )-G )*S + TWO*C*B + P = S*R + D( I+1 ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = -S + END IF +* + 70 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = M - L + 1 + CALL ZLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ), + $ Z( 1, L ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( L ) = G + GO TO 40 +* +* Eigenvalue found. +* + 80 CONTINUE + D( L ) = P +* + L = L + 1 + IF( L.LE.LEND ) + $ GO TO 40 + GO TO 140 +* + ELSE +* +* QR Iteration +* +* Look for small superdiagonal element. +* + 90 CONTINUE + IF( L.NE.LEND ) THEN + LENDP1 = LEND + 1 + DO 100 M = L, LENDP1, -1 + TST = ABS( E( M-1 ) )**2 + IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+ + $ SAFMIN )GO TO 110 + 100 CONTINUE + END IF +* + M = LEND +* + 110 CONTINUE + IF( M.GT.LEND ) + $ E( M-1 ) = ZERO + P = D( L ) + IF( M.EQ.L ) + $ GO TO 130 +* +* If remaining matrix is 2-by-2, use DLAE2 or SLAEV2 +* to compute its eigensystem. +* + IF( M.EQ.L-1 ) THEN + IF( ICOMPZ.GT.0 ) THEN + CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S ) + WORK( M ) = C + WORK( N-1+M ) = S + CALL ZLASR( 'R', 'V', 'F', N, 2, WORK( M ), + $ WORK( N-1+M ), Z( 1, L-1 ), LDZ ) + ELSE + CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 ) + END IF + D( L-1 ) = RT1 + D( L ) = RT2 + E( L-1 ) = ZERO + L = L - 2 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 + END IF +* + IF( JTOT.EQ.NMAXIT ) + $ GO TO 140 + JTOT = JTOT + 1 +* +* Form shift. +* + G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) ) + R = DLAPY2( G, ONE ) + G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) ) +* + S = ONE + C = ONE + P = ZERO +* +* Inner loop +* + LM1 = L - 1 + DO 120 I = M, LM1 + F = S*E( I ) + B = C*E( I ) + CALL DLARTG( G, F, C, S, R ) + IF( I.NE.M ) + $ E( I-1 ) = R + G = D( I ) - P + R = ( D( I+1 )-G )*S + TWO*C*B + P = S*R + D( I ) = G + P + G = C*R - B +* +* If eigenvectors are desired, then save rotations. +* + IF( ICOMPZ.GT.0 ) THEN + WORK( I ) = C + WORK( N-1+I ) = S + END IF +* + 120 CONTINUE +* +* If eigenvectors are desired, then apply saved rotations. +* + IF( ICOMPZ.GT.0 ) THEN + MM = L - M + 1 + CALL ZLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ), + $ Z( 1, M ), LDZ ) + END IF +* + D( L ) = D( L ) - P + E( LM1 ) = G + GO TO 90 +* +* Eigenvalue found. +* + 130 CONTINUE + D( L ) = P +* + L = L - 1 + IF( L.GE.LEND ) + $ GO TO 90 + GO TO 140 +* + END IF +* +* Undo scaling if necessary +* + 140 CONTINUE + IF( ISCALE.EQ.1 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + ELSE IF( ISCALE.EQ.2 ) THEN + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1, + $ D( LSV ), N, INFO ) + CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ), + $ N, INFO ) + END IF +* +* Check for no convergence to an eigenvalue after a total +* of N*MAXIT iterations. +* + IF( JTOT.EQ.NMAXIT ) THEN + DO 150 I = 1, N - 1 + IF( E( I ).NE.ZERO ) + $ INFO = INFO + 1 + 150 CONTINUE + RETURN + END IF + GO TO 10 +* +* Order eigenvalues and eigenvectors. +* + 160 CONTINUE + IF( ICOMPZ.EQ.0 ) THEN +* +* Use Quick Sort +* + CALL DLASRT( 'I', N, D, INFO ) +* + ELSE +* +* Use Selection Sort to minimize swaps of eigenvectors +* + DO 180 II = 2, N + I = II - 1 + K = I + P = D( I ) + DO 170 J = II, N + IF( D( J ).LT.P ) THEN + K = J + P = D( J ) + END IF + 170 CONTINUE + IF( K.NE.I ) THEN + D( K ) = D( I ) + D( I ) = P + CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 ) + END IF + 180 CONTINUE + END IF + RETURN +* +* End of ZSTEQR +* + END diff --git a/lib/linalg/zswap.f b/lib/linalg/zswap.f new file mode 100644 index 0000000000000000000000000000000000000000..ca2f34721192c53ee11ec4e0dc05a722e5e4cfe8 --- /dev/null +++ b/lib/linalg/zswap.f @@ -0,0 +1,98 @@ +*> \brief \b ZSWAP +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZSWAP(N,ZX,INCX,ZY,INCY) +* +* .. Scalar Arguments .. +* INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. +* COMPLEX*16 ZX(*),ZY(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZSWAP interchanges two vectors. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level1 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> jack dongarra, 3/11/78. +*> modified 12/3/93, array(1) declarations changed to array(*) +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZSWAP(N,ZX,INCX,ZY,INCY) +* +* -- Reference BLAS level1 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX,INCY,N +* .. +* .. Array Arguments .. + COMPLEX*16 ZX(*),ZY(*) +* .. +* +* ===================================================================== +* +* .. Local Scalars .. + COMPLEX*16 ZTEMP + INTEGER I,IX,IY +* .. + IF (N.LE.0) RETURN + IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN +* +* code for both increments equal to 1 + DO I = 1,N + ZTEMP = ZX(I) + ZX(I) = ZY(I) + ZY(I) = ZTEMP + END DO + ELSE +* +* code for unequal increments or equal increments not equal +* to 1 +* + IX = 1 + IY = 1 + IF (INCX.LT.0) IX = (-N+1)*INCX + 1 + IF (INCY.LT.0) IY = (-N+1)*INCY + 1 + DO I = 1,N + ZTEMP = ZX(IX) + ZX(IX) = ZY(IY) + ZY(IY) = ZTEMP + IX = IX + INCX + IY = IY + INCY + END DO + END IF + RETURN + END diff --git a/lib/linalg/ztrmm.f b/lib/linalg/ztrmm.f new file mode 100644 index 0000000000000000000000000000000000000000..ba7aead68b5df58348242a8497ded795e1215168 --- /dev/null +++ b/lib/linalg/ztrmm.f @@ -0,0 +1,452 @@ +*> \brief \b ZTRMM +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) +* +* .. Scalar Arguments .. +* COMPLEX*16 ALPHA +* INTEGER LDA,LDB,M,N +* CHARACTER DIAG,SIDE,TRANSA,UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A(LDA,*),B(LDB,*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRMM performs one of the matrix-matrix operations +*> +*> B := alpha*op( A )*B, or B := alpha*B*op( A ) +*> +*> where alpha is a scalar, B is an m by n matrix, A is a unit, or +*> non-unit, upper or lower triangular matrix and op( A ) is one of +*> +*> op( A ) = A or op( A ) = A**T or op( A ) = A**H. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] SIDE +*> \verbatim +*> SIDE is CHARACTER*1 +*> On entry, SIDE specifies whether op( A ) multiplies B from +*> the left or right as follows: +*> +*> SIDE = 'L' or 'l' B := alpha*op( A )*B. +*> +*> SIDE = 'R' or 'r' B := alpha*B*op( A ). +*> \endverbatim +*> +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the matrix A is an upper or +*> lower triangular matrix as follows: +*> +*> UPLO = 'U' or 'u' A is an upper triangular matrix. +*> +*> UPLO = 'L' or 'l' A is a lower triangular matrix. +*> \endverbatim +*> +*> \param[in] TRANSA +*> \verbatim +*> TRANSA is CHARACTER*1 +*> On entry, TRANSA specifies the form of op( A ) to be used in +*> the matrix multiplication as follows: +*> +*> TRANSA = 'N' or 'n' op( A ) = A. +*> +*> TRANSA = 'T' or 't' op( A ) = A**T. +*> +*> TRANSA = 'C' or 'c' op( A ) = A**H. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> On entry, DIAG specifies whether or not A is unit triangular +*> as follows: +*> +*> DIAG = 'U' or 'u' A is assumed to be unit triangular. +*> +*> DIAG = 'N' or 'n' A is not assumed to be unit +*> triangular. +*> \endverbatim +*> +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> On entry, M specifies the number of rows of B. M must be at +*> least zero. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the number of columns of B. N must be +*> at least zero. +*> \endverbatim +*> +*> \param[in] ALPHA +*> \verbatim +*> ALPHA is COMPLEX*16 +*> On entry, ALPHA specifies the scalar alpha. When alpha is +*> zero then A is not referenced and B need not be set before +*> entry. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m +*> when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. +*> Before entry with UPLO = 'U' or 'u', the leading k by k +*> upper triangular part of the array A must contain the upper +*> triangular matrix and the strictly lower triangular part of +*> A is not referenced. +*> Before entry with UPLO = 'L' or 'l', the leading k by k +*> lower triangular part of the array A must contain the lower +*> triangular matrix and the strictly upper triangular part of +*> A is not referenced. +*> Note that when DIAG = 'U' or 'u', the diagonal elements of +*> A are not referenced either, but are assumed to be unity. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. When SIDE = 'L' or 'l' then +*> LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' +*> then LDA must be at least max( 1, n ). +*> \endverbatim +*> +*> \param[in] B +*> \verbatim +*> B is (input/output) COMPLEX*16 array of DIMENSION ( LDB, n ). +*> Before entry, the leading m by n part of the array B must +*> contain the matrix B, and on exit is overwritten by the +*> transformed matrix. +*> \endverbatim +*> +*> \param[in] LDB +*> \verbatim +*> LDB is INTEGER +*> On entry, LDB specifies the first dimension of B as declared +*> in the calling (sub) program. LDB must be at least +*> max( 1, m ). +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level3 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 3 Blas routine. +*> +*> -- Written on 8-February-1989. +*> Jack Dongarra, Argonne National Laboratory. +*> Iain Duff, AERE Harwell. +*> Jeremy Du Croz, Numerical Algorithms Group Ltd. +*> Sven Hammarling, Numerical Algorithms Group Ltd. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB) +* +* -- Reference BLAS level3 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + COMPLEX*16 ALPHA + INTEGER LDA,LDB,M,N + CHARACTER DIAG,SIDE,TRANSA,UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A(LDA,*),B(LDB,*) +* .. +* +* ===================================================================== +* +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG,MAX +* .. +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I,INFO,J,K,NROWA + LOGICAL LSIDE,NOCONJ,NOUNIT,UPPER +* .. +* .. Parameters .. + COMPLEX*16 ONE + PARAMETER (ONE= (1.0D+0,0.0D+0)) + COMPLEX*16 ZERO + PARAMETER (ZERO= (0.0D+0,0.0D+0)) +* .. +* +* Test the input parameters. +* + LSIDE = LSAME(SIDE,'L') + IF (LSIDE) THEN + NROWA = M + ELSE + NROWA = N + END IF + NOCONJ = LSAME(TRANSA,'T') + NOUNIT = LSAME(DIAG,'N') + UPPER = LSAME(UPLO,'U') +* + INFO = 0 + IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN + INFO = 1 + ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN + INFO = 2 + ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND. + + (.NOT.LSAME(TRANSA,'T')) .AND. + + (.NOT.LSAME(TRANSA,'C'))) THEN + INFO = 3 + ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN + INFO = 4 + ELSE IF (M.LT.0) THEN + INFO = 5 + ELSE IF (N.LT.0) THEN + INFO = 6 + ELSE IF (LDA.LT.MAX(1,NROWA)) THEN + INFO = 9 + ELSE IF (LDB.LT.MAX(1,M)) THEN + INFO = 11 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('ZTRMM ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (M.EQ.0 .OR. N.EQ.0) RETURN +* +* And when alpha.eq.zero. +* + IF (ALPHA.EQ.ZERO) THEN + DO 20 J = 1,N + DO 10 I = 1,M + B(I,J) = ZERO + 10 CONTINUE + 20 CONTINUE + RETURN + END IF +* +* Start the operations. +* + IF (LSIDE) THEN + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*A*B. +* + IF (UPPER) THEN + DO 50 J = 1,N + DO 40 K = 1,M + IF (B(K,J).NE.ZERO) THEN + TEMP = ALPHA*B(K,J) + DO 30 I = 1,K - 1 + B(I,J) = B(I,J) + TEMP*A(I,K) + 30 CONTINUE + IF (NOUNIT) TEMP = TEMP*A(K,K) + B(K,J) = TEMP + END IF + 40 CONTINUE + 50 CONTINUE + ELSE + DO 80 J = 1,N + DO 70 K = M,1,-1 + IF (B(K,J).NE.ZERO) THEN + TEMP = ALPHA*B(K,J) + B(K,J) = TEMP + IF (NOUNIT) B(K,J) = B(K,J)*A(K,K) + DO 60 I = K + 1,M + B(I,J) = B(I,J) + TEMP*A(I,K) + 60 CONTINUE + END IF + 70 CONTINUE + 80 CONTINUE + END IF + ELSE +* +* Form B := alpha*A**T*B or B := alpha*A**H*B. +* + IF (UPPER) THEN + DO 120 J = 1,N + DO 110 I = M,1,-1 + TEMP = B(I,J) + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(I,I) + DO 90 K = 1,I - 1 + TEMP = TEMP + A(K,I)*B(K,J) + 90 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*DCONJG(A(I,I)) + DO 100 K = 1,I - 1 + TEMP = TEMP + DCONJG(A(K,I))*B(K,J) + 100 CONTINUE + END IF + B(I,J) = ALPHA*TEMP + 110 CONTINUE + 120 CONTINUE + ELSE + DO 160 J = 1,N + DO 150 I = 1,M + TEMP = B(I,J) + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(I,I) + DO 130 K = I + 1,M + TEMP = TEMP + A(K,I)*B(K,J) + 130 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*DCONJG(A(I,I)) + DO 140 K = I + 1,M + TEMP = TEMP + DCONJG(A(K,I))*B(K,J) + 140 CONTINUE + END IF + B(I,J) = ALPHA*TEMP + 150 CONTINUE + 160 CONTINUE + END IF + END IF + ELSE + IF (LSAME(TRANSA,'N')) THEN +* +* Form B := alpha*B*A. +* + IF (UPPER) THEN + DO 200 J = N,1,-1 + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 170 I = 1,M + B(I,J) = TEMP*B(I,J) + 170 CONTINUE + DO 190 K = 1,J - 1 + IF (A(K,J).NE.ZERO) THEN + TEMP = ALPHA*A(K,J) + DO 180 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 180 CONTINUE + END IF + 190 CONTINUE + 200 CONTINUE + ELSE + DO 240 J = 1,N + TEMP = ALPHA + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 210 I = 1,M + B(I,J) = TEMP*B(I,J) + 210 CONTINUE + DO 230 K = J + 1,N + IF (A(K,J).NE.ZERO) THEN + TEMP = ALPHA*A(K,J) + DO 220 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 220 CONTINUE + END IF + 230 CONTINUE + 240 CONTINUE + END IF + ELSE +* +* Form B := alpha*B*A**T or B := alpha*B*A**H. +* + IF (UPPER) THEN + DO 280 K = 1,N + DO 260 J = 1,K - 1 + IF (A(J,K).NE.ZERO) THEN + IF (NOCONJ) THEN + TEMP = ALPHA*A(J,K) + ELSE + TEMP = ALPHA*DCONJG(A(J,K)) + END IF + DO 250 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 250 CONTINUE + END IF + 260 CONTINUE + TEMP = ALPHA + IF (NOUNIT) THEN + IF (NOCONJ) THEN + TEMP = TEMP*A(K,K) + ELSE + TEMP = TEMP*DCONJG(A(K,K)) + END IF + END IF + IF (TEMP.NE.ONE) THEN + DO 270 I = 1,M + B(I,K) = TEMP*B(I,K) + 270 CONTINUE + END IF + 280 CONTINUE + ELSE + DO 320 K = N,1,-1 + DO 300 J = K + 1,N + IF (A(J,K).NE.ZERO) THEN + IF (NOCONJ) THEN + TEMP = ALPHA*A(J,K) + ELSE + TEMP = ALPHA*DCONJG(A(J,K)) + END IF + DO 290 I = 1,M + B(I,J) = B(I,J) + TEMP*B(I,K) + 290 CONTINUE + END IF + 300 CONTINUE + TEMP = ALPHA + IF (NOUNIT) THEN + IF (NOCONJ) THEN + TEMP = TEMP*A(K,K) + ELSE + TEMP = TEMP*DCONJG(A(K,K)) + END IF + END IF + IF (TEMP.NE.ONE) THEN + DO 310 I = 1,M + B(I,K) = TEMP*B(I,K) + 310 CONTINUE + END IF + 320 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTRMM . +* + END diff --git a/lib/linalg/ztrmv.f b/lib/linalg/ztrmv.f new file mode 100644 index 0000000000000000000000000000000000000000..8d7974a059112c0604fd63890060bf17a1ff446c --- /dev/null +++ b/lib/linalg/ztrmv.f @@ -0,0 +1,373 @@ +*> \brief \b ZTRMV +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +* Definition: +* =========== +* +* SUBROUTINE ZTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) +* +* .. Scalar Arguments .. +* INTEGER INCX,LDA,N +* CHARACTER DIAG,TRANS,UPLO +* .. +* .. Array Arguments .. +* COMPLEX*16 A(LDA,*),X(*) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZTRMV performs one of the matrix-vector operations +*> +*> x := A*x, or x := A**T*x, or x := A**H*x, +*> +*> where x is an n element vector and A is an n by n unit, or non-unit, +*> upper or lower triangular matrix. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> On entry, UPLO specifies whether the matrix is an upper or +*> lower triangular matrix as follows: +*> +*> UPLO = 'U' or 'u' A is an upper triangular matrix. +*> +*> UPLO = 'L' or 'l' A is a lower triangular matrix. +*> \endverbatim +*> +*> \param[in] TRANS +*> \verbatim +*> TRANS is CHARACTER*1 +*> On entry, TRANS specifies the operation to be performed as +*> follows: +*> +*> TRANS = 'N' or 'n' x := A*x. +*> +*> TRANS = 'T' or 't' x := A**T*x. +*> +*> TRANS = 'C' or 'c' x := A**H*x. +*> \endverbatim +*> +*> \param[in] DIAG +*> \verbatim +*> DIAG is CHARACTER*1 +*> On entry, DIAG specifies whether or not A is unit +*> triangular as follows: +*> +*> DIAG = 'U' or 'u' A is assumed to be unit triangular. +*> +*> DIAG = 'N' or 'n' A is not assumed to be unit +*> triangular. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> On entry, N specifies the order of the matrix A. +*> N must be at least zero. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 array of DIMENSION ( LDA, n ). +*> Before entry with UPLO = 'U' or 'u', the leading n by n +*> upper triangular part of the array A must contain the upper +*> triangular matrix and the strictly lower triangular part of +*> A is not referenced. +*> Before entry with UPLO = 'L' or 'l', the leading n by n +*> lower triangular part of the array A must contain the lower +*> triangular matrix and the strictly upper triangular part of +*> A is not referenced. +*> Note that when DIAG = 'U' or 'u', the diagonal elements of +*> A are not referenced either, but are assumed to be unity. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> On entry, LDA specifies the first dimension of A as declared +*> in the calling (sub) program. LDA must be at least +*> max( 1, n ). +*> \endverbatim +*> +*> \param[in] X +*> \verbatim +*> X is (input/output) COMPLEX*16 array of dimension at least +*> ( 1 + ( n - 1 )*abs( INCX ) ). +*> Before entry, the incremented array X must contain the n +*> element vector x. On exit, X is overwritten with the +*> tranformed vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> On entry, INCX specifies the increment for the elements of +*> X. INCX must not be zero. +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16_blas_level2 +* +*> \par Further Details: +* ===================== +*> +*> \verbatim +*> +*> Level 2 Blas routine. +*> The vector and matrix arguments are not referenced when N = 0, or M = 0 +*> +*> -- Written on 22-October-1986. +*> Jack Dongarra, Argonne National Lab. +*> Jeremy Du Croz, Nag Central Office. +*> Sven Hammarling, Nag Central Office. +*> Richard Hanson, Sandia National Labs. +*> \endverbatim +*> +* ===================================================================== + SUBROUTINE ZTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX) +* +* -- Reference BLAS level2 routine (version 3.4.0) -- +* -- Reference BLAS is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INCX,LDA,N + CHARACTER DIAG,TRANS,UPLO +* .. +* .. Array Arguments .. + COMPLEX*16 A(LDA,*),X(*) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER (ZERO= (0.0D+0,0.0D+0)) +* .. +* .. Local Scalars .. + COMPLEX*16 TEMP + INTEGER I,INFO,IX,J,JX,KX + LOGICAL NOCONJ,NOUNIT +* .. +* .. External Functions .. + LOGICAL LSAME + EXTERNAL LSAME +* .. +* .. External Subroutines .. + EXTERNAL XERBLA +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG,MAX +* .. +* +* Test the input parameters. +* + INFO = 0 + IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN + INFO = 1 + ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND. + + .NOT.LSAME(TRANS,'C')) THEN + INFO = 2 + ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN + INFO = 3 + ELSE IF (N.LT.0) THEN + INFO = 4 + ELSE IF (LDA.LT.MAX(1,N)) THEN + INFO = 6 + ELSE IF (INCX.EQ.0) THEN + INFO = 8 + END IF + IF (INFO.NE.0) THEN + CALL XERBLA('ZTRMV ',INFO) + RETURN + END IF +* +* Quick return if possible. +* + IF (N.EQ.0) RETURN +* + NOCONJ = LSAME(TRANS,'T') + NOUNIT = LSAME(DIAG,'N') +* +* Set up the start point in X if the increment is not unity. This +* will be ( N - 1 )*INCX too small for descending loops. +* + IF (INCX.LE.0) THEN + KX = 1 - (N-1)*INCX + ELSE IF (INCX.NE.1) THEN + KX = 1 + END IF +* +* Start the operations. In this version the elements of A are +* accessed sequentially with one pass through A. +* + IF (LSAME(TRANS,'N')) THEN +* +* Form x := A*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 20 J = 1,N + IF (X(J).NE.ZERO) THEN + TEMP = X(J) + DO 10 I = 1,J - 1 + X(I) = X(I) + TEMP*A(I,J) + 10 CONTINUE + IF (NOUNIT) X(J) = X(J)*A(J,J) + END IF + 20 CONTINUE + ELSE + JX = KX + DO 40 J = 1,N + IF (X(JX).NE.ZERO) THEN + TEMP = X(JX) + IX = KX + DO 30 I = 1,J - 1 + X(IX) = X(IX) + TEMP*A(I,J) + IX = IX + INCX + 30 CONTINUE + IF (NOUNIT) X(JX) = X(JX)*A(J,J) + END IF + JX = JX + INCX + 40 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 60 J = N,1,-1 + IF (X(J).NE.ZERO) THEN + TEMP = X(J) + DO 50 I = N,J + 1,-1 + X(I) = X(I) + TEMP*A(I,J) + 50 CONTINUE + IF (NOUNIT) X(J) = X(J)*A(J,J) + END IF + 60 CONTINUE + ELSE + KX = KX + (N-1)*INCX + JX = KX + DO 80 J = N,1,-1 + IF (X(JX).NE.ZERO) THEN + TEMP = X(JX) + IX = KX + DO 70 I = N,J + 1,-1 + X(IX) = X(IX) + TEMP*A(I,J) + IX = IX - INCX + 70 CONTINUE + IF (NOUNIT) X(JX) = X(JX)*A(J,J) + END IF + JX = JX - INCX + 80 CONTINUE + END IF + END IF + ELSE +* +* Form x := A**T*x or x := A**H*x. +* + IF (LSAME(UPLO,'U')) THEN + IF (INCX.EQ.1) THEN + DO 110 J = N,1,-1 + TEMP = X(J) + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 90 I = J - 1,1,-1 + TEMP = TEMP + A(I,J)*X(I) + 90 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J)) + DO 100 I = J - 1,1,-1 + TEMP = TEMP + DCONJG(A(I,J))*X(I) + 100 CONTINUE + END IF + X(J) = TEMP + 110 CONTINUE + ELSE + JX = KX + (N-1)*INCX + DO 140 J = N,1,-1 + TEMP = X(JX) + IX = JX + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 120 I = J - 1,1,-1 + IX = IX - INCX + TEMP = TEMP + A(I,J)*X(IX) + 120 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J)) + DO 130 I = J - 1,1,-1 + IX = IX - INCX + TEMP = TEMP + DCONJG(A(I,J))*X(IX) + 130 CONTINUE + END IF + X(JX) = TEMP + JX = JX - INCX + 140 CONTINUE + END IF + ELSE + IF (INCX.EQ.1) THEN + DO 170 J = 1,N + TEMP = X(J) + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 150 I = J + 1,N + TEMP = TEMP + A(I,J)*X(I) + 150 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J)) + DO 160 I = J + 1,N + TEMP = TEMP + DCONJG(A(I,J))*X(I) + 160 CONTINUE + END IF + X(J) = TEMP + 170 CONTINUE + ELSE + JX = KX + DO 200 J = 1,N + TEMP = X(JX) + IX = JX + IF (NOCONJ) THEN + IF (NOUNIT) TEMP = TEMP*A(J,J) + DO 180 I = J + 1,N + IX = IX + INCX + TEMP = TEMP + A(I,J)*X(IX) + 180 CONTINUE + ELSE + IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J)) + DO 190 I = J + 1,N + IX = IX + INCX + TEMP = TEMP + DCONJG(A(I,J))*X(IX) + 190 CONTINUE + END IF + X(JX) = TEMP + JX = JX + INCX + 200 CONTINUE + END IF + END IF + END IF +* + RETURN +* +* End of ZTRMV . +* + END diff --git a/lib/linalg/zung2l.f b/lib/linalg/zung2l.f new file mode 100644 index 0000000000000000000000000000000000000000..f8fd3667d26cf7230a3402eafe3b4d6c2868ac7a --- /dev/null +++ b/lib/linalg/zung2l.f @@ -0,0 +1,199 @@ +*> \brief \b ZUNG2L generates all or part of the unitary matrix Q from a QL factorization determined by cgeqlf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNG2L + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zung2l.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zung2l.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zung2l.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNG2L generates an m by n complex matrix Q with orthonormal columns, +*> which is defined as the last n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by ZGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by ZGEQLF in the last k columns of its array +*> argument A. +*> On exit, the m-by-n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, II, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNG2L', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns 1:n-k to columns of the unit matrix +* + DO 20 J = 1, N - K + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( M-N+J, J ) = ONE + 20 CONTINUE +* + DO 40 I = 1, K + II = N - K + I +* +* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left +* + A( M-N+II, II ) = ONE + CALL ZLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A, + $ LDA, WORK ) + CALL ZSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 ) + A( M-N+II, II ) = ONE - TAU( I ) +* +* Set A(m-k+i+1:m,n-k+i) to zero +* + DO 30 L = M - N + II + 1, M + A( L, II ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNG2L +* + END diff --git a/lib/linalg/zung2r.f b/lib/linalg/zung2r.f new file mode 100644 index 0000000000000000000000000000000000000000..63783ac01b65fabbe27433788720a21e1ec7bf9a --- /dev/null +++ b/lib/linalg/zung2r.f @@ -0,0 +1,201 @@ +*> \brief \b ZUNG2R +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNG2R + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zung2r.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zung2r.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zung2r.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNG2R generates an m by n complex matrix Q with orthonormal columns, +*> which is defined as the first n columns of a product of k elementary +*> reflectors of order m +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by ZGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by ZGEQRF in the first k columns of its array +*> argument A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (N) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNG2R', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Initialise columns k+1:n to columns of the unit matrix +* + DO 20 J = K + 1, N + DO 10 L = 1, M + A( L, J ) = ZERO + 10 CONTINUE + A( J, J ) = ONE + 20 CONTINUE +* + DO 40 I = K, 1, -1 +* +* Apply H(i) to A(i:m,i:n) from the left +* + IF( I.LT.N ) THEN + A( I, I ) = ONE + CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ), + $ A( I, I+1 ), LDA, WORK ) + END IF + IF( I.LT.M ) + $ CALL ZSCAL( M-I, -TAU( I ), A( I+1, I ), 1 ) + A( I, I ) = ONE - TAU( I ) +* +* Set A(1:i-1,i) to zero +* + DO 30 L = 1, I - 1 + A( L, I ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNG2R +* + END diff --git a/lib/linalg/zungl2.f b/lib/linalg/zungl2.f new file mode 100644 index 0000000000000000000000000000000000000000..44acba12a6e2885de1ae366063ec4174d1491301 --- /dev/null +++ b/lib/linalg/zungl2.f @@ -0,0 +1,207 @@ +*> \brief \b ZUNGL2 generates all or part of the unitary matrix Q from an LQ factorization determined by cgelqf (unblocked algorithm). +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGL2 + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungl2.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungl2.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungl2.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGL2 generates an m-by-n complex matrix Q with orthonormal rows, +*> which is defined as the first m rows of a product of k elementary +*> reflectors of order n +*> +*> Q = H(k)**H . . . H(2)**H H(1)**H +*> +*> as returned by ZGELQF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. N >= M. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. M >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the i-th row must contain the vector which defines +*> the elementary reflector H(i), for i = 1,2,...,k, as returned +*> by ZGELQF in the first k rows of its array argument A. +*> On exit, the m by n matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGELQF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (M) +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date September 2012 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.2) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* September 2012 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ONE, ZERO + PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ), + $ ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + INTEGER I, J, L +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLACGV, ZLARF, ZSCAL +* .. +* .. Intrinsic Functions .. + INTRINSIC DCONJG, MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.M ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGL2', -INFO ) + RETURN + END IF +* +* Quick return if possible +* + IF( M.LE.0 ) + $ RETURN +* + IF( K.LT.M ) THEN +* +* Initialise rows k+1:m to rows of the unit matrix +* + DO 20 J = 1, N + DO 10 L = K + 1, M + A( L, J ) = ZERO + 10 CONTINUE + IF( J.GT.K .AND. J.LE.M ) + $ A( J, J ) = ONE + 20 CONTINUE + END IF +* + DO 40 I = K, 1, -1 +* +* Apply H(i)**H to A(i:m,i:n) from the right +* + IF( I.LT.N ) THEN + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + IF( I.LT.M ) THEN + A( I, I ) = ONE + CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, + $ DCONJG( TAU( I ) ), A( I+1, I ), LDA, WORK ) + END IF + CALL ZSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA ) + CALL ZLACGV( N-I, A( I, I+1 ), LDA ) + END IF + A( I, I ) = ONE - DCONJG( TAU( I ) ) +* +* Set A(i,1:i-1) to zero +* + DO 30 L = 1, I - 1 + A( I, L ) = ZERO + 30 CONTINUE + 40 CONTINUE + RETURN +* +* End of ZUNGL2 +* + END diff --git a/lib/linalg/zungql.f b/lib/linalg/zungql.f new file mode 100644 index 0000000000000000000000000000000000000000..5c77abbd4621d85ac27c9bb672f2f298f720c140 --- /dev/null +++ b/lib/linalg/zungql.f @@ -0,0 +1,296 @@ +*> \brief \b ZUNGQL +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGQL + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungql.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungql.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungql.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGQL generates an M-by-N complex matrix Q with orthonormal columns, +*> which is defined as the last N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(k) . . . H(2) H(1) +*> +*> as returned by ZGEQLF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the (n-k+i)-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by ZGEQLF in the last k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQLF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT, + $ NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNG2L +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( N.EQ.0 ) THEN + LWKOPT = 1 + ELSE + NB = ILAENV( 1, 'ZUNGQL', ' ', M, N, K, -1 ) + LWKOPT = N*NB + END IF + WORK( 1 ) = LWKOPT +* + IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGQL', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZUNGQL', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNGQL', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the first block. +* The last kk columns are handled by the block method. +* + KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB ) +* +* Set A(m-kk+1:m,1:n-kk) to zero. +* + DO 20 J = 1, N - KK + DO 10 I = M - KK + 1, M + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the first or only block. +* + CALL ZUNG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = K - KK + 1, K, NB + IB = MIN( NB, K-I+1 ) + IF( N-K+I.GT.1 ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i+ib-1) . . . H(i+1) H(i) +* + CALL ZLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB, + $ A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left +* + CALL ZLARFB( 'Left', 'No transpose', 'Backward', + $ 'Columnwise', M-K+I+IB-1, N-K+I-1, IB, + $ A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA, + $ WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows 1:m-k+i+ib-1 of current block +* + CALL ZUNG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA, + $ TAU( I ), WORK, IINFO ) +* +* Set rows m-k+i+ib:m of current block to zero +* + DO 40 J = N - K + I, N - K + I + IB - 1 + DO 30 L = M - K + I + IB, M + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZUNGQL +* + END diff --git a/lib/linalg/zungqr.f b/lib/linalg/zungqr.f new file mode 100644 index 0000000000000000000000000000000000000000..6b3e9220cd41560a85637d0cbceedf4a77a4f8f2 --- /dev/null +++ b/lib/linalg/zungqr.f @@ -0,0 +1,290 @@ +*> \brief \b ZUNGQR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGQR + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungqr.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungqr.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungqr.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns, +*> which is defined as the first N columns of a product of K elementary +*> reflectors of order M +*> +*> Q = H(1) H(2) . . . H(k) +*> +*> as returned by ZGEQRF. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] M +*> \verbatim +*> M is INTEGER +*> The number of rows of the matrix Q. M >= 0. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of columns of the matrix Q. M >= N >= 0. +*> \endverbatim +*> +*> \param[in] K +*> \verbatim +*> K is INTEGER +*> The number of elementary reflectors whose product defines the +*> matrix Q. N >= K >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the i-th column must contain the vector which +*> defines the elementary reflector H(i), for i = 1,2,...,k, as +*> returned by ZGEQRF in the first k columns of its array +*> argument A. +*> On exit, the M-by-N matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The first dimension of the array A. LDA >= max(1,M). +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (K) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZGEQRF. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= max(1,N). +*> For optimum performance LWORK >= N*NB, where NB is the +*> optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument has an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + INTEGER INFO, K, LDA, LWORK, M, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY + INTEGER I, IB, IINFO, IWS, J, KI, KK, L, LDWORK, + $ LWKOPT, NB, NBMIN, NX +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZLARFB, ZLARFT, ZUNG2R +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX, MIN +* .. +* .. External Functions .. + INTEGER ILAENV + EXTERNAL ILAENV +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + NB = ILAENV( 1, 'ZUNGQR', ' ', M, N, K, -1 ) + LWKOPT = MAX( 1, N )*NB + WORK( 1 ) = LWKOPT + LQUERY = ( LWORK.EQ.-1 ) + IF( M.LT.0 ) THEN + INFO = -1 + ELSE IF( N.LT.0 .OR. N.GT.M ) THEN + INFO = -2 + ELSE IF( K.LT.0 .OR. K.GT.N ) THEN + INFO = -3 + ELSE IF( LDA.LT.MAX( 1, M ) ) THEN + INFO = -5 + ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN + INFO = -8 + END IF + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGQR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.LE.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + NBMIN = 2 + NX = 0 + IWS = N + IF( NB.GT.1 .AND. NB.LT.K ) THEN +* +* Determine when to cross over from blocked to unblocked code. +* + NX = MAX( 0, ILAENV( 3, 'ZUNGQR', ' ', M, N, K, -1 ) ) + IF( NX.LT.K ) THEN +* +* Determine if workspace is large enough for blocked code. +* + LDWORK = N + IWS = LDWORK*NB + IF( LWORK.LT.IWS ) THEN +* +* Not enough workspace to use optimal NB: reduce NB and +* determine the minimum value of NB. +* + NB = LWORK / LDWORK + NBMIN = MAX( 2, ILAENV( 2, 'ZUNGQR', ' ', M, N, K, -1 ) ) + END IF + END IF + END IF +* + IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN +* +* Use blocked code after the last block. +* The first kk columns are handled by the block method. +* + KI = ( ( K-NX-1 ) / NB )*NB + KK = MIN( K, KI+NB ) +* +* Set A(1:kk,kk+1:n) to zero. +* + DO 20 J = KK + 1, N + DO 10 I = 1, KK + A( I, J ) = ZERO + 10 CONTINUE + 20 CONTINUE + ELSE + KK = 0 + END IF +* +* Use unblocked code for the last or only block. +* + IF( KK.LT.N ) + $ CALL ZUNG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA, + $ TAU( KK+1 ), WORK, IINFO ) +* + IF( KK.GT.0 ) THEN +* +* Use blocked code +* + DO 50 I = KI + 1, 1, -NB + IB = MIN( NB, K-I+1 ) + IF( I+IB.LE.N ) THEN +* +* Form the triangular factor of the block reflector +* H = H(i) H(i+1) . . . H(i+ib-1) +* + CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB, + $ A( I, I ), LDA, TAU( I ), WORK, LDWORK ) +* +* Apply H to A(i:m,i+ib:n) from the left +* + CALL ZLARFB( 'Left', 'No transpose', 'Forward', + $ 'Columnwise', M-I+1, N-I-IB+1, IB, + $ A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ), + $ LDA, WORK( IB+1 ), LDWORK ) + END IF +* +* Apply H to rows i:m of current block +* + CALL ZUNG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK, + $ IINFO ) +* +* Set rows 1:i-1 of current block to zero +* + DO 40 J = I, I + IB - 1 + DO 30 L = 1, I - 1 + A( L, J ) = ZERO + 30 CONTINUE + 40 CONTINUE + 50 CONTINUE + END IF +* + WORK( 1 ) = IWS + RETURN +* +* End of ZUNGQR +* + END diff --git a/lib/linalg/zungtr.f b/lib/linalg/zungtr.f new file mode 100644 index 0000000000000000000000000000000000000000..422a55a921ffd166a06c76cd8f37d3da5420feb5 --- /dev/null +++ b/lib/linalg/zungtr.f @@ -0,0 +1,256 @@ +*> \brief \b ZUNGTR +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZUNGTR + dependencies +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zungtr.f"> +*> [TGZ]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zungtr.f"> +*> [ZIP]</a> +*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zungtr.f"> +*> [TXT]</a> +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* .. Scalar Arguments .. +* CHARACTER UPLO +* INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. +* COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZUNGTR generates a complex unitary matrix Q which is defined as the +*> product of n-1 elementary reflectors of order N, as returned by +*> ZHETRD: +*> +*> if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), +*> +*> if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] UPLO +*> \verbatim +*> UPLO is CHARACTER*1 +*> = 'U': Upper triangle of A contains elementary reflectors +*> from ZHETRD; +*> = 'L': Lower triangle of A contains elementary reflectors +*> from ZHETRD. +*> \endverbatim +*> +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The order of the matrix Q. N >= 0. +*> \endverbatim +*> +*> \param[in,out] A +*> \verbatim +*> A is COMPLEX*16 array, dimension (LDA,N) +*> On entry, the vectors which define the elementary reflectors, +*> as returned by ZHETRD. +*> On exit, the N-by-N unitary matrix Q. +*> \endverbatim +*> +*> \param[in] LDA +*> \verbatim +*> LDA is INTEGER +*> The leading dimension of the array A. LDA >= N. +*> \endverbatim +*> +*> \param[in] TAU +*> \verbatim +*> TAU is COMPLEX*16 array, dimension (N-1) +*> TAU(i) must contain the scalar factor of the elementary +*> reflector H(i), as returned by ZHETRD. +*> \endverbatim +*> +*> \param[out] WORK +*> \verbatim +*> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK)) +*> On exit, if INFO = 0, WORK(1) returns the optimal LWORK. +*> \endverbatim +*> +*> \param[in] LWORK +*> \verbatim +*> LWORK is INTEGER +*> The dimension of the array WORK. LWORK >= N-1. +*> For optimum performance LWORK >= (N-1)*NB, where NB is +*> the optimal blocksize. +*> +*> If LWORK = -1, then a workspace query is assumed; the routine +*> only calculates the optimal size of the WORK array, returns +*> this value as the first entry of the WORK array, and no error +*> message related to LWORK is issued by XERBLA. +*> \endverbatim +*> +*> \param[out] INFO +*> \verbatim +*> INFO is INTEGER +*> = 0: successful exit +*> < 0: if INFO = -i, the i-th argument had an illegal value +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \date November 2011 +* +*> \ingroup complex16OTHERcomputational +* +* ===================================================================== + SUBROUTINE ZUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO ) +* +* -- LAPACK computational routine (version 3.4.0) -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* November 2011 +* +* .. Scalar Arguments .. + CHARACTER UPLO + INTEGER INFO, LDA, LWORK, N +* .. +* .. Array Arguments .. + COMPLEX*16 A( LDA, * ), TAU( * ), WORK( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + COMPLEX*16 ZERO, ONE + PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), + $ ONE = ( 1.0D+0, 0.0D+0 ) ) +* .. +* .. Local Scalars .. + LOGICAL LQUERY, UPPER + INTEGER I, IINFO, J, LWKOPT, NB +* .. +* .. External Functions .. + LOGICAL LSAME + INTEGER ILAENV + EXTERNAL LSAME, ILAENV +* .. +* .. External Subroutines .. + EXTERNAL XERBLA, ZUNGQL, ZUNGQR +* .. +* .. Intrinsic Functions .. + INTRINSIC MAX +* .. +* .. Executable Statements .. +* +* Test the input arguments +* + INFO = 0 + LQUERY = ( LWORK.EQ.-1 ) + UPPER = LSAME( UPLO, 'U' ) + IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN + INFO = -1 + ELSE IF( N.LT.0 ) THEN + INFO = -2 + ELSE IF( LDA.LT.MAX( 1, N ) ) THEN + INFO = -4 + ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN + INFO = -7 + END IF +* + IF( INFO.EQ.0 ) THEN + IF( UPPER ) THEN + NB = ILAENV( 1, 'ZUNGQL', ' ', N-1, N-1, N-1, -1 ) + ELSE + NB = ILAENV( 1, 'ZUNGQR', ' ', N-1, N-1, N-1, -1 ) + END IF + LWKOPT = MAX( 1, N-1 )*NB + WORK( 1 ) = LWKOPT + END IF +* + IF( INFO.NE.0 ) THEN + CALL XERBLA( 'ZUNGTR', -INFO ) + RETURN + ELSE IF( LQUERY ) THEN + RETURN + END IF +* +* Quick return if possible +* + IF( N.EQ.0 ) THEN + WORK( 1 ) = 1 + RETURN + END IF +* + IF( UPPER ) THEN +* +* Q was determined by a call to ZHETRD with UPLO = 'U' +* +* Shift the vectors which define the elementary reflectors one +* column to the left, and set the last row and column of Q to +* those of the unit matrix +* + DO 20 J = 1, N - 1 + DO 10 I = 1, J - 1 + A( I, J ) = A( I, J+1 ) + 10 CONTINUE + A( N, J ) = ZERO + 20 CONTINUE + DO 30 I = 1, N - 1 + A( I, N ) = ZERO + 30 CONTINUE + A( N, N ) = ONE +* +* Generate Q(1:n-1,1:n-1) +* + CALL ZUNGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO ) +* + ELSE +* +* Q was determined by a call to ZHETRD with UPLO = 'L'. +* +* Shift the vectors which define the elementary reflectors one +* column to the right, and set the first row and column of Q to +* those of the unit matrix +* + DO 50 J = N, 2, -1 + A( 1, J ) = ZERO + DO 40 I = J + 1, N + A( I, J ) = A( I, J-1 ) + 40 CONTINUE + 50 CONTINUE + A( 1, 1 ) = ONE + DO 60 I = 2, N + A( I, 1 ) = ZERO + 60 CONTINUE + IF( N.GT.1 ) THEN +* +* Generate Q(2:n,2:n) +* + CALL ZUNGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK, + $ LWORK, IINFO ) + END IF + END IF + WORK( 1 ) = LWKOPT + RETURN +* +* End of ZUNGTR +* + END diff --git a/potentials/BN.extep b/potentials/BN.extep new file mode 100644 index 0000000000000000000000000000000000000000..8732ada84bf95dc6814d0ef839b18b686a4e914d --- /dev/null +++ b/potentials/BN.extep @@ -0,0 +1,109 @@ +# DATE: 2017-11-28 CONTRIBUTOR: J.H. Los, J.M.H. Kroes CITATION: Los et al. Phys. Rev. B 96, 184108 (2017) + +# B and N mixture, parameterized for ExTeP potential + +# ExTeP parameters for various elements and mixtures +# multiple entries can be added to this file, LAMMPS reads the ones it needs +# these entries are in LAMMPS "metal" units: +# A,B = eV; lambda1,lambda2,lambda3 = 1/Angstroms; R,D = Angstroms +# other quantities are unitless + +# format of a single entry (one or more lines): +#I J K m, gamma*, lambda3, c, d, h, n, gamma, lambda2, B, R, D, lambda1, A +B B B 3 1.0 0.0 26617.3000 141.2000 -0.1300 1.1422470 0.01498959 2.5211820 2768.7363631 2.0 0.2 2.6857244 3376.3350735 +N N N 3 1.0 0.0 23.5000 3.7500 -0.4000 0.6650000 0.01925100 2.6272721 2563.5603417 2.0 0.2 2.8293093 2978.9527928 +B B N 3 1.0 0.0 26617.3000 141.2000 -0.1300 1.1422470 0.01498959 2.5211820 2768.7363631 2.0 0.2 2.6857244 3376.3350735 +N N B 3 1.0 0.0 23.5000 3.7500 -0.4000 0.6650000 0.01925100 2.6272721 2563.5603417 2.0 0.2 2.8293093 2978.9527928 +B N B 3 1.0 0.0d0 306.586555205d0 10.d0 -0.7218d0 0.6576543657d0 0.0027024851d0 2.69335d0 2595.6860833266d0 2.d0 0.2d0 2.95d0 3330.0655849887d0 +B N N 3 1.0 0.0d0 306.586555205d0 10.d0 -0.7218d0 0.6576543657d0 0.0027024851d0 2.69335d0 2595.6860833266d0 2.d0 0.2d0 2.95d0 3330.0655849887d0 +N B B 3 1.0 0.0d0 306.586555205d0 10.d0 -0.7218d0 0.6576543657d0 0.0027024851d0 2.69335d0 2595.6860833266d0 2.d0 0.2d0 2.95d0 3330.0655849887d0 +N B N 3 1.0 0.0d0 306.586555205d0 10.d0 -0.7218d0 0.6576543657d0 0.0027024851d0 2.69335d0 2595.6860833266d0 2.d0 0.2d0 2.95d0 3330.0655849887d0 +# +# 1.9925 Bicubic Splines Parameters +# +# F_corr [ B, B] +# +#t1 t2 i j val dx dy dxy + B B 0 0 0.0000 0.0000 0.0000 0.0000 + B B 0 1 0.0054 0.0000 0.0000 0.0000 + B B 0 2 0.0182 0.0000 0.0000 0.0000 + B B 0 3 -0.0034 0.0000 0.0000 0.0000 + B B 0 4 -0.0034 0.0000 0.0000 0.0000 + B B 1 0 0.0054 0.0000 0.0000 0.0000 + B B 1 1 0.0100 0.0000 0.0000 0.0000 + B B 1 2 0.0062 0.0000 0.0000 0.0000 + B B 1 3 0.0154 0.0000 0.0000 0.0000 + B B 1 4 0.0154 0.0000 0.0000 0.0000 + B B 2 0 0.0182 0.0000 0.0000 0.0000 + B B 2 1 0.0062 0.0000 0.0000 0.0000 + B B 2 2 0.0154 0.0000 0.0000 0.0000 + B B 2 3 -0.0390 0.0000 -0.0727 0.0000 + B B 2 4 -0.0390 0.0000 -0.0727 0.0000 + B B 3 0 -0.0034 0.0000 0.0000 0.0000 + B B 3 1 0.0154 0.0000 0.0000 0.0000 + B B 3 2 -0.0390 -0.0727 0.0000 0.0000 + B B 3 3 -0.1300 0.0000 0.0000 0.0000 + B B 3 4 -0.1300 0.0000 0.0000 0.0000 + B B 4 0 -0.0034 0.0000 0.0000 0.0000 + B B 4 1 0.0154 0.0000 0.0000 0.0000 + B B 4 2 -0.0390 -0.0727 0.0000 0.0000 + B B 4 3 -0.1300 0.0000 0.0000 0.0000 + B B 4 4 -0.1300 0.0000 0.0000 0.0000 +# +# F_corr [ B, N] +# +#t1 t2 i j val dx dy dxy + B N 0 0 0.0170 0.0000 0.0000 0.0000 + B N 0 1 0.0078 0.0000 0.0000 0.0000 + B N 0 2 0.0000 0.0000 0.0000 0.0000 + B N 0 3 -0.0860 0.0000 0.0000 0.0000 + B N 0 4 -0.0860 0.0000 0.0000 0.0000 + B N 1 0 -0.0090 0.0000 0.0000 0.0000 + B N 1 1 0.0090 0.0000 0.0000 0.0000 + B N 1 2 -0.0068 0.0000 -0.0214 0.0000 + B N 1 3 -0.0338 0.0000 0.0388 0.0000 + B N 1 4 -0.0338 0.0000 0.0388 0.0000 + B N 2 0 0.0000 0.0000 0.0000 0.0000 + B N 2 1 -0.0198 0.0000 0.0000 0.0000 + B N 2 2 0.0000 0.0000 0.0000 0.0000 + B N 2 3 -0.0084 0.0000 0.0169 0.0000 + B N 2 4 -0.0084 0.0000 0.0169 0.0000 + B N 3 0 -0.0750 0.0000 0.0000 0.0000 + B N 3 1 -0.0168 0.0306 0.0000 0.0000 + B N 3 2 -0.0138 0.0084 0.0000 0.0000 + B N 3 3 0.0000 0.0000 0.0000 0.0000 + B N 3 4 0.0000 0.0000 0.0000 0.0000 + B N 4 0 -0.0750 0.0000 0.0000 0.0000 + B N 4 1 -0.0168 0.0306 0.0000 0.0000 + B N 4 2 -0.0138 0.0084 0.0000 0.0000 + B N 4 3 0.0000 0.0000 0.0000 0.0000 + B N 4 4 0.0000 0.0000 0.0000 0.0000 +# +# F_corr [ N, N] +# +#t1 t2 i j val dx dy dxy + N N 0 0 0.0000 0.0000 0.0000 0.0000 + N N 0 1 -0.0282 0.0000 0.0000 0.0000 + N N 0 2 -0.0018 0.0000 0.0000 0.0000 + N N 0 3 -0.0004 0.0000 0.0000 0.0000 + N N 0 4 -0.0004 0.0000 0.0000 0.0000 + N N 1 0 -0.0282 0.0000 0.0000 0.0000 + N N 1 1 0.0200 0.0000 0.0000 0.0000 + N N 1 2 0.0180 0.0162 -0.0027 0.0000 + N N 1 3 0.0146 0.0000 0.0000 0.0000 + N N 1 4 0.0146 0.0000 0.0000 0.0000 + N N 2 0 -0.0018 0.0000 0.0000 0.0000 + N N 2 1 0.0180 -0.0027 0.0162 0.0000 + N N 2 2 0.0306 0.0000 0.0000 0.0000 + N N 2 3 0.0060 0.0000 -0.0073 0.0000 + N N 2 4 0.0060 0.0000 -0.0073 0.0000 + N N 3 0 -0.0004 0.0000 0.0000 0.0000 + N N 3 1 0.0146 0.0000 0.0000 0.0000 + N N 3 2 0.0060 -0.0073 0.0000 0.0000 + N N 3 3 0.0000 0.0000 0.0000 0.0000 + N N 3 4 0.0000 0.0000 0.0000 0.0000 + N N 4 0 -0.0004 0.0000 0.0000 0.0000 + N N 4 1 0.0146 0.0000 0.0000 0.0000 + N N 4 2 0.0060 -0.0073 0.0000 0.0000 + N N 4 3 0.0000 0.0000 0.0000 0.0000 + N N 4 4 0.0000 0.0000 0.0000 0.0000 diff --git a/python/lammps.py b/python/lammps.py index 944eaeabf5a982afddfb79cc8a9f4f286c8aa94e..ee6e461d33976ca380c27fb3d3e50304748d0df1 100644 --- a/python/lammps.py +++ b/python/lammps.py @@ -603,6 +603,30 @@ class Atom2D(Atom): self.lmp.eval("fy[%d]" % self.index)) +class variable_set: + def __init__(self, name, variable_dict): + self._name = name + array_pattern = re.compile(r"(?P<arr>.+)\[(?P<index>[0-9]+)\]") + + for key, value in variable_dict.items(): + m = array_pattern.match(key) + if m: + g = m.groupdict() + varname = g['arr'] + idx = int(g['index']) + if varname not in self.__dict__: + self.__dict__[varname] = {} + self.__dict__[varname][idx] = value + else: + self.__dict__[key] = value + + def __str__(self): + return "{}({})".format(self._name, ','.join(["{}={}".format(k, self.__dict__[k]) for k in self.__dict__.keys() if not k.startswith('_')])) + + def __repr__(self): + return self.__str__() + + def get_thermo_data(output): """ traverse output of runs and extract thermo data columns """ if isinstance(output, str): @@ -630,7 +654,7 @@ def get_thermo_data(output): elif line.startswith("Loop time of "): in_run = False columns = None - thermo_data = namedtuple('ThermoData', list(current_run.keys()))(*list(current_run.values())) + thermo_data = variable_set('ThermoData', current_run) r = {'thermo' : thermo_data } runs.append(namedtuple('Run', list(r.keys()))(*list(r.values()))) elif in_run and len(columns) > 0: diff --git a/src/.gitignore b/src/.gitignore index 8d997760966204a8eb1da712f98df065299c2cc1..fe23bc1f55446fc6873390ef1fca32af96d94d8d 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -25,6 +25,7 @@ /kokkos.h /kokkos_type.h /kokkos_few.h +/kokkos_base.h /manifold*.cpp /manifold*.h @@ -1083,10 +1084,16 @@ /pair_born_coul_long_cs.h /pair_born_coul_dsf_cs.cpp /pair_born_coul_dsf_cs.h +/pair_born_coul_wolf_cs.cpp +/pair_born_coul_wolf_cs.h /pair_buck_coul_long_cs.cpp /pair_buck_coul_long_cs.h /pair_coul_long_cs.cpp /pair_coul_long_cs.h +/pair_coul_wolf_cs.cpp +/pair_coul_wolf_cs.h +/pair_extep.cpp +/pair_extep.h /pair_lj_cut_thole_long.cpp /pair_lj_cut_thole_long.h /pair_plum_hb.cpp diff --git a/src/GPU/pair_eam_alloy_gpu.cpp b/src/GPU/pair_eam_alloy_gpu.cpp index ab0f499a98e6813a875753d63302cfe34bd8fa67..9b42b0a14fab56b44652942973fd122a7fe113da 100644 --- a/src/GPU/pair_eam_alloy_gpu.cpp +++ b/src/GPU/pair_eam_alloy_gpu.cpp @@ -28,6 +28,7 @@ #include "error.h" #include "neigh_request.h" #include "gpu_extra.h" +#include "domain.h" using namespace LAMMPS_NS; diff --git a/src/GPU/pair_eam_fs_gpu.cpp b/src/GPU/pair_eam_fs_gpu.cpp index a2b339db9ac6ce197b19ad7705edfb24ea132599..c29b49631ce210121df7479b3fc60867bca31438 100644 --- a/src/GPU/pair_eam_fs_gpu.cpp +++ b/src/GPU/pair_eam_fs_gpu.cpp @@ -28,6 +28,7 @@ #include "error.h" #include "neigh_request.h" #include "gpu_extra.h" +#include "domain.h" using namespace LAMMPS_NS; diff --git a/src/GPU/pair_gauss_gpu.cpp b/src/GPU/pair_gauss_gpu.cpp index a8cb695b3aba077587028729e73a38b5731b7bba..01cc63a9f9a49c378b7318cb36b9b6143269d853 100644 --- a/src/GPU/pair_gauss_gpu.cpp +++ b/src/GPU/pair_gauss_gpu.cpp @@ -43,7 +43,7 @@ int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a, double **b, double **offset, double *special_lj, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen); -int gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a, +void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a, double **b, double **offset); void gauss_gpu_clear(); int ** gauss_gpu_compute_n(const int ago, const int inum, diff --git a/src/GPU/pair_lj_cut_coul_long_gpu.cpp b/src/GPU/pair_lj_cut_coul_long_gpu.cpp index 8ea7a7f07bbd8d660f149a9e0620b74ca75dae8c..0c09cb1d51a39beb952f85c4a29950d077df56f6 100644 --- a/src/GPU/pair_lj_cut_coul_long_gpu.cpp +++ b/src/GPU/pair_lj_cut_coul_long_gpu.cpp @@ -56,7 +56,7 @@ int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_cut_ljsq, double host_cut_coulsq, double *host_special_coul, const double qqrd2e, const double g_ewald); -int ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, +void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, double **host_lj_cutsq); void ljcl_gpu_clear(); diff --git a/src/GPU/pair_lj_cut_gpu.cpp b/src/GPU/pair_lj_cut_gpu.cpp index ef97269772b3dad8c21418ecd4d1881657b4c014..feba9ed88bb20a81e7ff3b016c36b636d8c4b388 100644 --- a/src/GPU/pair_lj_cut_gpu.cpp +++ b/src/GPU/pair_lj_cut_gpu.cpp @@ -45,7 +45,7 @@ int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, const int nall, const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen); -int ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, +void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset); diff --git a/src/GPU/pair_lj_expand_gpu.cpp b/src/GPU/pair_lj_expand_gpu.cpp index 33bbf60600be9db82911e3cf6400c6262db0e8e5..43ec1ec45a0dc2ba63ba8b2b49d5755aaf711bcc 100644 --- a/src/GPU/pair_lj_expand_gpu.cpp +++ b/src/GPU/pair_lj_expand_gpu.cpp @@ -45,7 +45,7 @@ int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen); -int lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, +void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, double **shift); void lje_gpu_clear(); diff --git a/src/GRANULAR/fix_wall_gran.cpp b/src/GRANULAR/fix_wall_gran.cpp index eeec94fdf729a76694188a715119781adfaaefe5..033c35dbacc0e96bb910cd1c3e6b2fa261eb815a 100644 --- a/src/GRANULAR/fix_wall_gran.cpp +++ b/src/GRANULAR/fix_wall_gran.cpp @@ -30,6 +30,7 @@ #include "math_const.h" #include "memory.h" #include "error.h" +#include "neighbor.h" using namespace LAMMPS_NS; using namespace FixConst; diff --git a/src/GRANULAR/fix_wall_gran_region.cpp b/src/GRANULAR/fix_wall_gran_region.cpp index a09b9dfa421b971df17992722ccc55c1308b1454..d1c5d4c9c771f8a73bd0d95165ee362976f50509 100644 --- a/src/GRANULAR/fix_wall_gran_region.cpp +++ b/src/GRANULAR/fix_wall_gran_region.cpp @@ -30,6 +30,8 @@ #include "math_const.h" #include "memory.h" #include "error.h" +#include "comm.h" +#include "neighbor.h" using namespace LAMMPS_NS; using namespace FixConst; diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index df5fc3e5f1a9d146a94f37435842d6076f4edff2..295e46bcaec54282a9511adb4ff5af91a9b75fff 100644 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -28,8 +28,20 @@ action () { # force rebuild of files with LMP_KOKKOS switch -touch ../accelerator_kokkos.h -touch ../memory.h +KOKKOS_INSTALLED=0 +if (test -e ../Makefile.package) then + KOKKOS_INSTALLED=`grep DLMP_KOKKOS ../Makefile.package | wc -l` +fi + +if (test $mode = 1) then + if (test $KOKKOS_INSTALLED = 0) then + touch ../accelerator_kokkos.h + fi +elif (test $mode = 0) then + if (test $KOKKOS_INSTALLED = 1) then + touch ../accelerator_kokkos.h + fi +fi # list of files with optional dependcies @@ -125,8 +137,9 @@ action improper_harmonic_kokkos.cpp improper_harmonic.cpp action improper_harmonic_kokkos.h improper_harmonic.h action kokkos.cpp action kokkos.h -action kokkos_type.h +action kokkos_base.h action kokkos_few.h +action kokkos_type.h action memory_kokkos.h action modify_kokkos.cpp action modify_kokkos.h @@ -229,6 +242,8 @@ action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp action pair_tersoff_mod_kokkos.h pair_tersoff_mod.h action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h +action pair_yukawa_kokkos.cpp +action pair_yukawa_kokkos.h action pppm_kokkos.cpp pppm.cpp action pppm_kokkos.h pppm.h action rand_pool_wrap_kokkos.cpp diff --git a/src/KOKKOS/angle_charmm_kokkos.cpp b/src/KOKKOS/angle_charmm_kokkos.cpp index 401a00c856ffcc86543f22024bbb0cec1d9dc2e3..59a20c25df36184d4a23a9bf9f6071c625f5eb38 100644 --- a/src/KOKKOS/angle_charmm_kokkos.cpp +++ b/src/KOKKOS/angle_charmm_kokkos.cpp @@ -24,7 +24,7 @@ #include "comm.h" #include "force.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -51,8 +51,8 @@ template<class DeviceType> AngleCharmmKokkos<DeviceType>::~AngleCharmmKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -71,15 +71,15 @@ void AngleCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in) if (eflag_atom) { //if(k_eatom.dimension_0()<maxeatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); d_eatom = k_eatom.template view<DeviceType>(); //} } if (vflag_atom) { //if(k_vatom.dimension_0()<maxvatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); d_vatom = k_vatom.template view<DeviceType>(); //} } diff --git a/src/KOKKOS/angle_class2_kokkos.cpp b/src/KOKKOS/angle_class2_kokkos.cpp index e851e9d500b10246571210a6c38daae5d472528a..108b4f48f225827228fe1a5667edbd7a7c8ad6aa 100644 --- a/src/KOKKOS/angle_class2_kokkos.cpp +++ b/src/KOKKOS/angle_class2_kokkos.cpp @@ -24,7 +24,7 @@ #include "comm.h" #include "force.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -51,8 +51,8 @@ template<class DeviceType> AngleClass2Kokkos<DeviceType>::~AngleClass2Kokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -70,13 +70,13 @@ void AngleClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom"); d_eatom = k_eatom.template view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"angle:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"angle:vatom"); d_vatom = k_vatom.template view<DeviceType>(); } diff --git a/src/KOKKOS/angle_harmonic_kokkos.cpp b/src/KOKKOS/angle_harmonic_kokkos.cpp index 9fd237ddb3e90404aa9e9a979be2ccaba3953e6d..dd5a1e26c7c55d135a5e37a0619dace64b49dbb1 100644 --- a/src/KOKKOS/angle_harmonic_kokkos.cpp +++ b/src/KOKKOS/angle_harmonic_kokkos.cpp @@ -24,7 +24,7 @@ #include "comm.h" #include "force.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -51,8 +51,8 @@ template<class DeviceType> AngleHarmonicKokkos<DeviceType>::~AngleHarmonicKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -70,13 +70,13 @@ void AngleHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"angle:eatom"); d_eatom = k_eatom.template view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"angle:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"angle:vatom"); d_vatom = k_vatom.template view<DeviceType>(); } diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index 31b33dbdc9a2c406e40e6e36f8898d88d1c50d5b..4ecead5b1d052be975c40069d169a7fad43cbf63 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -19,7 +19,7 @@ #include "update.h" #include "domain.h" #include "atom_masks.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "kokkos.h" @@ -33,59 +33,59 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp) {} AtomKokkos::~AtomKokkos() { - memory->destroy_kokkos(k_tag, tag); - memory->destroy_kokkos(k_mask, mask); - memory->destroy_kokkos(k_type, type); - memory->destroy_kokkos(k_image, image); - memory->destroy_kokkos(k_molecule, molecule); - - memory->destroy_kokkos(k_x, x); - memory->destroy_kokkos(k_v, v); - memory->destroy_kokkos(k_f, f); - - memory->destroy_kokkos(k_mass, mass); - memory->destroy_kokkos(k_q, q); - - memory->destroy_kokkos(k_radius, radius); - memory->destroy_kokkos(k_rmass, rmass); - memory->destroy_kokkos(k_omega, omega); - memory->destroy_kokkos(k_angmom, angmom); - memory->destroy_kokkos(k_torque, torque); - - memory->destroy_kokkos(k_nspecial, nspecial); - memory->destroy_kokkos(k_special, special); - memory->destroy_kokkos(k_num_bond, num_bond); - memory->destroy_kokkos(k_bond_type, bond_type); - memory->destroy_kokkos(k_bond_atom, bond_atom); - memory->destroy_kokkos(k_num_angle, num_angle); - memory->destroy_kokkos(k_angle_type, angle_type); - memory->destroy_kokkos(k_angle_atom1, angle_atom1); - memory->destroy_kokkos(k_angle_atom2, angle_atom2); - memory->destroy_kokkos(k_angle_atom3, angle_atom3); - memory->destroy_kokkos(k_num_dihedral, num_dihedral); - memory->destroy_kokkos(k_dihedral_type, dihedral_type); - memory->destroy_kokkos(k_dihedral_atom1, dihedral_atom1); - memory->destroy_kokkos(k_dihedral_atom2, dihedral_atom2); - memory->destroy_kokkos(k_dihedral_atom3, dihedral_atom3); - memory->destroy_kokkos(k_dihedral_atom4, dihedral_atom4); - memory->destroy_kokkos(k_num_improper, num_improper); - memory->destroy_kokkos(k_improper_type, improper_type); - memory->destroy_kokkos(k_improper_atom1, improper_atom1); - memory->destroy_kokkos(k_improper_atom2, improper_atom2); - memory->destroy_kokkos(k_improper_atom3, improper_atom3); - memory->destroy_kokkos(k_improper_atom4, improper_atom4); + memoryKK->destroy_kokkos(k_tag, tag); + memoryKK->destroy_kokkos(k_mask, mask); + memoryKK->destroy_kokkos(k_type, type); + memoryKK->destroy_kokkos(k_image, image); + memoryKK->destroy_kokkos(k_molecule, molecule); + + memoryKK->destroy_kokkos(k_x, x); + memoryKK->destroy_kokkos(k_v, v); + memoryKK->destroy_kokkos(k_f, f); + + memoryKK->destroy_kokkos(k_mass, mass); + memoryKK->destroy_kokkos(k_q, q); + + memoryKK->destroy_kokkos(k_radius, radius); + memoryKK->destroy_kokkos(k_rmass, rmass); + memoryKK->destroy_kokkos(k_omega, omega); + memoryKK->destroy_kokkos(k_angmom, angmom); + memoryKK->destroy_kokkos(k_torque, torque); + + memoryKK->destroy_kokkos(k_nspecial, nspecial); + memoryKK->destroy_kokkos(k_special, special); + memoryKK->destroy_kokkos(k_num_bond, num_bond); + memoryKK->destroy_kokkos(k_bond_type, bond_type); + memoryKK->destroy_kokkos(k_bond_atom, bond_atom); + memoryKK->destroy_kokkos(k_num_angle, num_angle); + memoryKK->destroy_kokkos(k_angle_type, angle_type); + memoryKK->destroy_kokkos(k_angle_atom1, angle_atom1); + memoryKK->destroy_kokkos(k_angle_atom2, angle_atom2); + memoryKK->destroy_kokkos(k_angle_atom3, angle_atom3); + memoryKK->destroy_kokkos(k_num_dihedral, num_dihedral); + memoryKK->destroy_kokkos(k_dihedral_type, dihedral_type); + memoryKK->destroy_kokkos(k_dihedral_atom1, dihedral_atom1); + memoryKK->destroy_kokkos(k_dihedral_atom2, dihedral_atom2); + memoryKK->destroy_kokkos(k_dihedral_atom3, dihedral_atom3); + memoryKK->destroy_kokkos(k_dihedral_atom4, dihedral_atom4); + memoryKK->destroy_kokkos(k_num_improper, num_improper); + memoryKK->destroy_kokkos(k_improper_type, improper_type); + memoryKK->destroy_kokkos(k_improper_atom1, improper_atom1); + memoryKK->destroy_kokkos(k_improper_atom2, improper_atom2); + memoryKK->destroy_kokkos(k_improper_atom3, improper_atom3); + memoryKK->destroy_kokkos(k_improper_atom4, improper_atom4); // USER-DPD package - memory->destroy_kokkos(k_uCond,uCond); - memory->destroy_kokkos(k_uMech,uMech); - memory->destroy_kokkos(k_uChem,uChem); - memory->destroy_kokkos(k_uCG,uCG); - memory->destroy_kokkos(k_uCGnew,uCGnew); - memory->destroy_kokkos(k_rho,rho); - memory->destroy_kokkos(k_dpdTheta,dpdTheta); - memory->destroy_kokkos(k_duChem,duChem); - - memory->destroy_kokkos(k_dvector,dvector); + memoryKK->destroy_kokkos(k_uCond,uCond); + memoryKK->destroy_kokkos(k_uMech,uMech); + memoryKK->destroy_kokkos(k_uChem,uChem); + memoryKK->destroy_kokkos(k_uCG,uCG); + memoryKK->destroy_kokkos(k_uCGnew,uCGnew); + memoryKK->destroy_kokkos(k_rho,rho); + memoryKK->destroy_kokkos(k_dpdTheta,dpdTheta); + memoryKK->destroy_kokkos(k_duChem,duChem); + + memoryKK->destroy_kokkos(k_dvector,dvector); dvector = NULL; } @@ -232,10 +232,10 @@ void AtomKokkos::sort() void AtomKokkos::grow(unsigned int mask){ if (mask & SPECIAL_MASK){ - memory->destroy_kokkos(k_special, special); + memoryKK->destroy_kokkos(k_special, special); sync(Device, mask); modified(Device, mask); - memory->grow_kokkos(k_special,special,nmax,maxspecial,"atom:special"); + memoryKK->grow_kokkos(k_special,special,nmax,maxspecial,"atom:special"); avec->grow_reset(); sync(Host, mask); } @@ -270,7 +270,7 @@ int AtomKokkos::add_custom(const char *name, int flag) int n = strlen(name) + 1; dname[index] = new char[n]; strcpy(dname[index],name); - memory->grow_kokkos(k_dvector,dvector,ndvector,nmax, + memoryKK->grow_kokkos(k_dvector,dvector,ndvector,nmax, "atom:dvector"); } @@ -291,7 +291,7 @@ void AtomKokkos::remove_custom(int flag, int index) delete [] iname[index]; iname[index] = NULL; } else { - //memory->destroy_kokkos(dvector); + //memoryKK->destroy_kokkos(dvector); dvector[index] = NULL; delete [] dname[index]; dname[index] = NULL; @@ -302,25 +302,25 @@ void AtomKokkos::remove_custom(int flag, int index) void AtomKokkos::deallocate_topology() { - memory->destroy_kokkos(k_bond_type, bond_type); - memory->destroy_kokkos(k_bond_atom, bond_atom); - - memory->destroy_kokkos(k_angle_type, angle_type); - memory->destroy_kokkos(k_angle_atom1, angle_atom1); - memory->destroy_kokkos(k_angle_atom2, angle_atom2); - memory->destroy_kokkos(k_angle_atom3, angle_atom3); - - memory->destroy_kokkos(k_dihedral_type, dihedral_type); - memory->destroy_kokkos(k_dihedral_atom1, dihedral_atom1); - memory->destroy_kokkos(k_dihedral_atom2, dihedral_atom2); - memory->destroy_kokkos(k_dihedral_atom3, dihedral_atom3); - memory->destroy_kokkos(k_dihedral_atom4, dihedral_atom4); - - memory->destroy_kokkos(k_improper_type, improper_type); - memory->destroy_kokkos(k_improper_atom1, improper_atom1); - memory->destroy_kokkos(k_improper_atom2, improper_atom2); - memory->destroy_kokkos(k_improper_atom3, improper_atom3); - memory->destroy_kokkos(k_improper_atom4, improper_atom4); + memoryKK->destroy_kokkos(k_bond_type, bond_type); + memoryKK->destroy_kokkos(k_bond_atom, bond_atom); + + memoryKK->destroy_kokkos(k_angle_type, angle_type); + memoryKK->destroy_kokkos(k_angle_atom1, angle_atom1); + memoryKK->destroy_kokkos(k_angle_atom2, angle_atom2); + memoryKK->destroy_kokkos(k_angle_atom3, angle_atom3); + + memoryKK->destroy_kokkos(k_dihedral_type, dihedral_type); + memoryKK->destroy_kokkos(k_dihedral_atom1, dihedral_atom1); + memoryKK->destroy_kokkos(k_dihedral_atom2, dihedral_atom2); + memoryKK->destroy_kokkos(k_dihedral_atom3, dihedral_atom3); + memoryKK->destroy_kokkos(k_dihedral_atom4, dihedral_atom4); + + memoryKK->destroy_kokkos(k_improper_type, improper_type); + memoryKK->destroy_kokkos(k_improper_atom1, improper_atom1); + memoryKK->destroy_kokkos(k_improper_atom2, improper_atom2); + memoryKK->destroy_kokkos(k_improper_atom3, improper_atom3); + memoryKK->destroy_kokkos(k_improper_atom4, improper_atom4); } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 05414cf2e45cdc92f9750c552cb390d915e151cb..a9e55f530a2c1ab3e93eb606f7375bf674c678aa 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -19,7 +19,7 @@ #include "modify.h" #include "fix.h" #include "atom_masks.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" using namespace LAMMPS_NS; @@ -68,33 +68,33 @@ void AtomVecAngleKokkos::grow(int n) sync(Device,ALL_MASK); modified(Device,ALL_MASK); - memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); - memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); - memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); - memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); - memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); - memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); - memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial, + memoryKK->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); + memoryKK->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); + memoryKK->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial, "atom:special"); - memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); - memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom, + memoryKK->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); + memoryKK->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom, "atom:bond_type"); - memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom, + memoryKK->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom, "atom:bond_atom"); - memory->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle"); - memory->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle"); + memoryKK->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom, "atom:angle_type"); - memory->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom, "atom:angle_atom1"); - memory->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom, "atom:angle_atom2"); - memory->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom, "atom:angle_atom3"); grow_reset(); diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 6c610c8c111409d8c18201d9be23e44a4fe9195c..f021c45db6feb9c1407115dd1cbeadebf9ddf1d6 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -19,7 +19,7 @@ #include "modify.h" #include "fix.h" #include "atom_masks.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" using namespace LAMMPS_NS; @@ -64,14 +64,14 @@ void AtomVecAtomicKokkos::grow(int n) sync(Device,ALL_MASK); modified(Device,ALL_MASK); - memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); - memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); - memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); - memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); grow_reset(); sync(Host,ALL_MASK); diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 076144420c1442c5db69008905263313b4757be2..bf682c507fd5c3210db20a1e50b35dc09d16a1c7 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -19,7 +19,7 @@ #include "modify.h" #include "fix.h" #include "atom_masks.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" using namespace LAMMPS_NS; @@ -65,21 +65,21 @@ void AtomVecBondKokkos::grow(int n) sync(Device,ALL_MASK); modified(Device,ALL_MASK); - memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); - memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); - memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); - memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - - memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); - - memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); - memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); - memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial,"atom:special"); - memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); - memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom,"atom:bond_type"); - memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,"atom:bond_atom"); + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + + memoryKK->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); + memoryKK->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); + memoryKK->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial,"atom:special"); + memoryKK->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); + memoryKK->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom,"atom:bond_type"); + memoryKK->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,"atom:bond_atom"); grow_reset(); sync(Host,ALL_MASK); diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 7b8b74b4051b63411f673b537ca4c1b9ce731198..a9ae5cc2d1e999d8dc94964e43a3f0d8be4a2a84 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -19,7 +19,7 @@ #include "modify.h" #include "fix.h" #include "atom_masks.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" using namespace LAMMPS_NS; @@ -67,16 +67,16 @@ void AtomVecChargeKokkos::grow(int n) sync(Device,ALL_MASK); modified(Device,ALL_MASK); - memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); - memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); - memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); - memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); - memory->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); + memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); grow_reset(); sync(Host,ALL_MASK); diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index c4e493bd8536dd036931e61c0e0f080fc9d925ad..9c54ffccc50213111d7e47bd39751f474c475438 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -19,7 +19,7 @@ #include "modify.h" #include "fix.h" #include "atom_masks.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" using namespace LAMMPS_NS; @@ -67,24 +67,24 @@ void AtomVecDPDKokkos::grow(int n) sync(Device,ALL_MASK); modified(Device,ALL_MASK); - memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); - memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); - memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); - memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); - memory->grow_kokkos(atomKK->k_rho,atomKK->rho,nmax,"atom:rho"); - memory->grow_kokkos(atomKK->k_dpdTheta,atomKK->dpdTheta,nmax,"atom:dpdTheta"); - memory->grow_kokkos(atomKK->k_uCond,atomKK->uCond,nmax,"atom:uCond"); - memory->grow_kokkos(atomKK->k_uMech,atomKK->uMech,nmax,"atom:uMech"); - memory->grow_kokkos(atomKK->k_uChem,atomKK->uChem,nmax,"atom:uChem"); - memory->grow_kokkos(atomKK->k_uCG,atomKK->uCG,nmax,"atom:uCG"); - memory->grow_kokkos(atomKK->k_uCGnew,atomKK->uCGnew,nmax,"atom:uCGnew"); - memory->grow_kokkos(atomKK->k_duChem,atomKK->duChem,nmax,"atom:duChem"); + memoryKK->grow_kokkos(atomKK->k_rho,atomKK->rho,nmax,"atom:rho"); + memoryKK->grow_kokkos(atomKK->k_dpdTheta,atomKK->dpdTheta,nmax,"atom:dpdTheta"); + memoryKK->grow_kokkos(atomKK->k_uCond,atomKK->uCond,nmax,"atom:uCond"); + memoryKK->grow_kokkos(atomKK->k_uMech,atomKK->uMech,nmax,"atom:uMech"); + memoryKK->grow_kokkos(atomKK->k_uChem,atomKK->uChem,nmax,"atom:uChem"); + memoryKK->grow_kokkos(atomKK->k_uCG,atomKK->uCG,nmax,"atom:uCG"); + memoryKK->grow_kokkos(atomKK->k_uCGnew,atomKK->uCGnew,nmax,"atom:uCGnew"); + memoryKK->grow_kokkos(atomKK->k_duChem,atomKK->duChem,nmax,"atom:duChem"); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index 8e9abe40675f7816ca969a23c0ab55855f71c5d1..9369d7e84437f184051aed76f129cbb3fe028efb 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -19,7 +19,7 @@ #include "modify.h" #include "fix.h" #include "atom_masks.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" using namespace LAMMPS_NS; @@ -67,59 +67,59 @@ void AtomVecFullKokkos::grow(int n) sync(Device,ALL_MASK); modified(Device,ALL_MASK); - memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); - memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); - memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); - memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); - memory->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); - memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); + memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); + memoryKK->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); - memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); - memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial, + memoryKK->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); + memoryKK->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial, "atom:special"); - memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); - memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom, + memoryKK->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); + memoryKK->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom, "atom:bond_type"); - memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom, + memoryKK->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom, "atom:bond_atom"); - memory->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle"); - memory->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle"); + memoryKK->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom, "atom:angle_type"); - memory->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom, "atom:angle_atom1"); - memory->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom, "atom:angle_atom2"); - memory->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom, "atom:angle_atom3"); - memory->grow_kokkos(atomKK->k_num_dihedral,atomKK->num_dihedral,nmax,"atom:num_dihedral"); - memory->grow_kokkos(atomKK->k_dihedral_type,atomKK->dihedral_type,nmax, + memoryKK->grow_kokkos(atomKK->k_num_dihedral,atomKK->num_dihedral,nmax,"atom:num_dihedral"); + memoryKK->grow_kokkos(atomKK->k_dihedral_type,atomKK->dihedral_type,nmax, atomKK->dihedral_per_atom,"atom:dihedral_type"); - memory->grow_kokkos(atomKK->k_dihedral_atom1,atomKK->dihedral_atom1,nmax, + memoryKK->grow_kokkos(atomKK->k_dihedral_atom1,atomKK->dihedral_atom1,nmax, atomKK->dihedral_per_atom,"atom:dihedral_atom1"); - memory->grow_kokkos(atomKK->k_dihedral_atom2,atomKK->dihedral_atom2,nmax, + memoryKK->grow_kokkos(atomKK->k_dihedral_atom2,atomKK->dihedral_atom2,nmax, atomKK->dihedral_per_atom,"atom:dihedral_atom2"); - memory->grow_kokkos(atomKK->k_dihedral_atom3,atomKK->dihedral_atom3,nmax, + memoryKK->grow_kokkos(atomKK->k_dihedral_atom3,atomKK->dihedral_atom3,nmax, atomKK->dihedral_per_atom,"atom:dihedral_atom3"); - memory->grow_kokkos(atomKK->k_dihedral_atom4,atomKK->dihedral_atom4,nmax, + memoryKK->grow_kokkos(atomKK->k_dihedral_atom4,atomKK->dihedral_atom4,nmax, atomKK->dihedral_per_atom,"atom:dihedral_atom4"); - memory->grow_kokkos(atomKK->k_num_improper,atomKK->num_improper,nmax,"atom:num_improper"); - memory->grow_kokkos(atomKK->k_improper_type,atomKK->improper_type,nmax, + memoryKK->grow_kokkos(atomKK->k_num_improper,atomKK->num_improper,nmax,"atom:num_improper"); + memoryKK->grow_kokkos(atomKK->k_improper_type,atomKK->improper_type,nmax, atomKK->improper_per_atom,"atom:improper_type"); - memory->grow_kokkos(atomKK->k_improper_atom1,atomKK->improper_atom1,nmax, + memoryKK->grow_kokkos(atomKK->k_improper_atom1,atomKK->improper_atom1,nmax, atomKK->improper_per_atom,"atom:improper_atom1"); - memory->grow_kokkos(atomKK->k_improper_atom2,atomKK->improper_atom2,nmax, + memoryKK->grow_kokkos(atomKK->k_improper_atom2,atomKK->improper_atom2,nmax, atomKK->improper_per_atom,"atom:improper_atom2"); - memory->grow_kokkos(atomKK->k_improper_atom3,atomKK->improper_atom3,nmax, + memoryKK->grow_kokkos(atomKK->k_improper_atom3,atomKK->improper_atom3,nmax, atomKK->improper_per_atom,"atom:improper_atom3"); - memory->grow_kokkos(atomKK->k_improper_atom4,atomKK->improper_atom4,nmax, + memoryKK->grow_kokkos(atomKK->k_improper_atom4,atomKK->improper_atom4,nmax, atomKK->improper_per_atom,"atom:improper_atom4"); grow_reset(); diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index e5e361e70ab21645b67608f6d4f68ab07fb19900..b5aadb18d612cfbd30a6c3666fd85a3dfe489d82 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -18,7 +18,7 @@ #include "domain.h" #include "modify.h" #include "fix.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index dbf6a857b2521874ba7dca32a4cf374030c546e1..6f232a319b06238c95ff903706ad759228045c82 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -19,7 +19,7 @@ #include "modify.h" #include "fix.h" #include "atom_masks.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" using namespace LAMMPS_NS; @@ -67,57 +67,57 @@ void AtomVecMolecularKokkos::grow(int n) sync(Device,ALL_MASK); modified(Device,ALL_MASK); - memory->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); - memory->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); - memory->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); - memory->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); + memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); + memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); + memoryKK->grow_kokkos(atomKK->k_mask,atomKK->mask,nmax,"atom:mask"); + memoryKK->grow_kokkos(atomKK->k_image,atomKK->image,nmax,"atom:image"); - memory->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); - memory->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); - memory->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); + memoryKK->grow_kokkos(atomKK->k_x,atomKK->x,nmax,3,"atom:x"); + memoryKK->grow_kokkos(atomKK->k_v,atomKK->v,nmax,3,"atom:v"); + memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); - memory->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); - memory->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); - memory->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial, + memoryKK->grow_kokkos(atomKK->k_molecule,atomKK->molecule,nmax,"atom:molecule"); + memoryKK->grow_kokkos(atomKK->k_nspecial,atomKK->nspecial,nmax,3,"atom:nspecial"); + memoryKK->grow_kokkos(atomKK->k_special,atomKK->special,nmax,atomKK->maxspecial, "atom:special"); - memory->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); - memory->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom, + memoryKK->grow_kokkos(atomKK->k_num_bond,atomKK->num_bond,nmax,"atom:num_bond"); + memoryKK->grow_kokkos(atomKK->k_bond_type,atomKK->bond_type,nmax,atomKK->bond_per_atom, "atom:bond_type"); - memory->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom, + memoryKK->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom, "atom:bond_atom"); - memory->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle"); - memory->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_num_angle,atomKK->num_angle,nmax,"atom:num_angle"); + memoryKK->grow_kokkos(atomKK->k_angle_type,atomKK->angle_type,nmax,atomKK->angle_per_atom, "atom:angle_type"); - memory->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_angle_atom1,atomKK->angle_atom1,nmax,atomKK->angle_per_atom, "atom:angle_atom1"); - memory->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_angle_atom2,atomKK->angle_atom2,nmax,atomKK->angle_per_atom, "atom:angle_atom2"); - memory->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom, + memoryKK->grow_kokkos(atomKK->k_angle_atom3,atomKK->angle_atom3,nmax,atomKK->angle_per_atom, "atom:angle_atom3"); - memory->grow_kokkos(atomKK->k_num_dihedral,atomKK->num_dihedral,nmax,"atom:num_dihedral"); - memory->grow_kokkos(atomKK->k_dihedral_type,atomKK->dihedral_type,nmax, + memoryKK->grow_kokkos(atomKK->k_num_dihedral,atomKK->num_dihedral,nmax,"atom:num_dihedral"); + memoryKK->grow_kokkos(atomKK->k_dihedral_type,atomKK->dihedral_type,nmax, atomKK->dihedral_per_atom,"atom:dihedral_type"); - memory->grow_kokkos(atomKK->k_dihedral_atom1,atomKK->dihedral_atom1,nmax, + memoryKK->grow_kokkos(atomKK->k_dihedral_atom1,atomKK->dihedral_atom1,nmax, atomKK->dihedral_per_atom,"atom:dihedral_atom1"); - memory->grow_kokkos(atomKK->k_dihedral_atom2,atomKK->dihedral_atom2,nmax, + memoryKK->grow_kokkos(atomKK->k_dihedral_atom2,atomKK->dihedral_atom2,nmax, atomKK->dihedral_per_atom,"atom:dihedral_atom2"); - memory->grow_kokkos(atomKK->k_dihedral_atom3,atomKK->dihedral_atom3,nmax, + memoryKK->grow_kokkos(atomKK->k_dihedral_atom3,atomKK->dihedral_atom3,nmax, atomKK->dihedral_per_atom,"atom:dihedral_atom3"); - memory->grow_kokkos(atomKK->k_dihedral_atom4,atomKK->dihedral_atom4,nmax, + memoryKK->grow_kokkos(atomKK->k_dihedral_atom4,atomKK->dihedral_atom4,nmax, atomKK->dihedral_per_atom,"atom:dihedral_atom4"); - memory->grow_kokkos(atomKK->k_num_improper,atomKK->num_improper,nmax,"atom:num_improper"); - memory->grow_kokkos(atomKK->k_improper_type,atomKK->improper_type,nmax, + memoryKK->grow_kokkos(atomKK->k_num_improper,atomKK->num_improper,nmax,"atom:num_improper"); + memoryKK->grow_kokkos(atomKK->k_improper_type,atomKK->improper_type,nmax, atomKK->improper_per_atom,"atom:improper_type"); - memory->grow_kokkos(atomKK->k_improper_atom1,atomKK->improper_atom1,nmax, + memoryKK->grow_kokkos(atomKK->k_improper_atom1,atomKK->improper_atom1,nmax, atomKK->improper_per_atom,"atom:improper_atom1"); - memory->grow_kokkos(atomKK->k_improper_atom2,atomKK->improper_atom2,nmax, + memoryKK->grow_kokkos(atomKK->k_improper_atom2,atomKK->improper_atom2,nmax, atomKK->improper_per_atom,"atom:improper_atom2"); - memory->grow_kokkos(atomKK->k_improper_atom3,atomKK->improper_atom3,nmax, + memoryKK->grow_kokkos(atomKK->k_improper_atom3,atomKK->improper_atom3,nmax, atomKK->improper_per_atom,"atom:improper_atom3"); - memory->grow_kokkos(atomKK->k_improper_atom4,atomKK->improper_atom4,nmax, + memoryKK->grow_kokkos(atomKK->k_improper_atom4,atomKK->improper_atom4,nmax, atomKK->improper_per_atom,"atom:improper_atom4"); grow_reset(); diff --git a/src/KOKKOS/bond_class2_kokkos.cpp b/src/KOKKOS/bond_class2_kokkos.cpp index b3c11c9a06c468be08f9ce2b2a29919c949ff475..df2f2c1e9be24a6d807ecf12e67c239735dde7f6 100644 --- a/src/KOKKOS/bond_class2_kokkos.cpp +++ b/src/KOKKOS/bond_class2_kokkos.cpp @@ -23,7 +23,7 @@ #include "domain.h" #include "comm.h" #include "force.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -47,8 +47,8 @@ template<class DeviceType> BondClass2Kokkos<DeviceType>::~BondClass2Kokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -67,15 +67,15 @@ void BondClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) if (eflag_atom) { //if(k_eatom.dimension_0()<maxeatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); d_eatom = k_eatom.template view<DeviceType>(); //} } if (vflag_atom) { //if(k_vatom.dimension_0()<maxvatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); d_vatom = k_vatom.template view<DeviceType>(); //} } diff --git a/src/KOKKOS/bond_fene_kokkos.cpp b/src/KOKKOS/bond_fene_kokkos.cpp index 8a716a98ef534bfdcc3daf31696d01e1af22b071..20c20542083b35c289d8efdab2f3dce5adc95077 100644 --- a/src/KOKKOS/bond_fene_kokkos.cpp +++ b/src/KOKKOS/bond_fene_kokkos.cpp @@ -23,7 +23,7 @@ #include "domain.h" #include "comm.h" #include "force.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -56,8 +56,8 @@ template<class DeviceType> BondFENEKokkos<DeviceType>::~BondFENEKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -75,13 +75,13 @@ void BondFENEKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"bond:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"bond:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"bond:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"bond:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/bond_harmonic_kokkos.cpp b/src/KOKKOS/bond_harmonic_kokkos.cpp index da45c70d6c7739f08ad0fb559e6d9d7f768437c4..c4e0c3a81763e765ee593ba13b84c4b0fcec3616 100644 --- a/src/KOKKOS/bond_harmonic_kokkos.cpp +++ b/src/KOKKOS/bond_harmonic_kokkos.cpp @@ -23,7 +23,7 @@ #include "domain.h" #include "comm.h" #include "force.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -48,8 +48,8 @@ template<class DeviceType> BondHarmonicKokkos<DeviceType>::~BondHarmonicKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -68,15 +68,15 @@ void BondHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) if (eflag_atom) { //if(k_eatom.dimension_0()<maxeatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); d_eatom = k_eatom.template view<DeviceType>(); //} } if (vflag_atom) { //if(k_vatom.dimension_0()<maxvatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); d_vatom = k_vatom.template view<DeviceType>(); //} } diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 5534341342d5017378169609dce1a0c5964d8338..e506fa1ad42c849d0b31b0e564a45842d6aa1cfe 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -20,7 +20,7 @@ #include "domain.h" #include "atom_masks.h" #include "error.h" -#include "memory.h" +#include "memory_kokkos.h" #include "force.h" #include "pair.h" #include "fix.h" @@ -28,6 +28,7 @@ #include "dump.h" #include "output.h" #include "modify.h" +#include "kokkos_base.h" using namespace LAMMPS_NS; @@ -71,7 +72,7 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) for (int i = 0; i < maxswap; i++) { maxsendlist[i] = BUFMIN; } - memory->create_kokkos(k_sendlist,sendlist,maxswap,BUFMIN,"comm:sendlist"); + memoryKK->create_kokkos(k_sendlist,sendlist,maxswap,BUFMIN,"comm:sendlist"); max_buf_pair = 0; k_buf_send_pair = DAT::tdual_xfloat_1d("comm:k_buf_send_pair",1); @@ -82,11 +83,11 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) CommKokkos::~CommKokkos() { - memory->destroy_kokkos(k_sendlist,sendlist); + memoryKK->destroy_kokkos(k_sendlist,sendlist); sendlist = NULL; - memory->destroy_kokkos(k_buf_send,buf_send); + memoryKK->destroy_kokkos(k_buf_send,buf_send); buf_send = NULL; - memory->destroy_kokkos(k_buf_recv,buf_recv); + memoryKK->destroy_kokkos(k_buf_recv,buf_recv); buf_recv = NULL; } @@ -379,6 +380,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) MPI_Request request; int nsize = pair->comm_forward; + KokkosBase* pairKKBase = dynamic_cast<KokkosBase*>(pair); for (iswap = 0; iswap < nswap; iswap++) { int n = MAX(max_buf_pair,nsize*sendnum[iswap]); @@ -391,7 +393,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) // pack buffer - n = pair->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist, + n = pairKKBase->pack_forward_comm_kokkos(sendnum[iswap],k_sendlist, iswap,k_buf_send_pair,pbc_flag[iswap],pbc[iswap]); // exchange with another proc @@ -408,7 +410,7 @@ void CommKokkos::forward_comm_pair_device(Pair *pair) // unpack buffer - pair->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv_pair); + pairKKBase->unpack_forward_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv_pair); } } @@ -1067,7 +1069,7 @@ void CommKokkos::grow_list(int iswap, int n) k_sendlist.modify<LMPHostType>(); } - memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist"); + memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist"); for(int i=0;i<maxswap;i++) { maxsendlist[i]=size; sendlist[i]=&k_sendlist.view<LMPHostType>()(i,0); @@ -1095,7 +1097,7 @@ void CommKokkos::grow_swap(int n) k_sendlist.modify<LMPHostType>(); } - memory->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist"); + memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist"); memory->grow(maxsendlist,n,"comm:maxsendlist"); for (int i=0;i<maxswap;i++) maxsendlist[i]=size; diff --git a/src/KOKKOS/comm_tiled_kokkos.cpp b/src/KOKKOS/comm_tiled_kokkos.cpp index adcc634aa1d766858c62e85a9ec6f7557d37da6c..33cd8eaa6e9eabc518a163d1ffbe68bbe0cffc30 100644 --- a/src/KOKKOS/comm_tiled_kokkos.cpp +++ b/src/KOKKOS/comm_tiled_kokkos.cpp @@ -25,7 +25,7 @@ #include "compute.h" #include "output.h" #include "dump.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" diff --git a/src/KOKKOS/dihedral_charmm_kokkos.cpp b/src/KOKKOS/dihedral_charmm_kokkos.cpp index 7f2117c97f545021ca35172c62f9299adb9e8275..71635ec76c48d5642e7eccccef8d360c2077fc60 100644 --- a/src/KOKKOS/dihedral_charmm_kokkos.cpp +++ b/src/KOKKOS/dihedral_charmm_kokkos.cpp @@ -25,7 +25,7 @@ #include "force.h" #include "pair.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -56,8 +56,8 @@ template<class DeviceType> DihedralCharmmKokkos<DeviceType>::~DihedralCharmmKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -81,8 +81,8 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in) if (eflag_atom) { //if(k_eatom.dimension_0()<maxeatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); d_eatom = k_eatom.template view<DeviceType>(); k_eatom_pair = Kokkos::DualView<E_FLOAT*,Kokkos::LayoutRight,DeviceType>("dihedral:eatom_pair",maxeatom); d_eatom_pair = k_eatom.template view<DeviceType>(); @@ -90,8 +90,8 @@ void DihedralCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_in) } if (vflag_atom) { //if(k_vatom.dimension_0()<maxvatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom"); d_vatom = k_vatom.template view<DeviceType>(); k_vatom_pair = Kokkos::DualView<F_FLOAT*[6],Kokkos::LayoutRight,DeviceType>("dihedral:vatom_pair",maxvatom); d_vatom_pair = k_vatom.template view<DeviceType>(); diff --git a/src/KOKKOS/dihedral_class2_kokkos.cpp b/src/KOKKOS/dihedral_class2_kokkos.cpp index 89e42c6f836a1f3fa9e502875332bffb8bb4cd08..d32ea4a4611089808f31faf7756967f6572e0f94 100644 --- a/src/KOKKOS/dihedral_class2_kokkos.cpp +++ b/src/KOKKOS/dihedral_class2_kokkos.cpp @@ -24,7 +24,7 @@ #include "domain.h" #include "force.h" #include "update.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -56,8 +56,8 @@ template<class DeviceType> DihedralClass2Kokkos<DeviceType>::~DihedralClass2Kokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -75,13 +75,13 @@ void DihedralClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); d_eatom = k_eatom.template view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom"); d_vatom = k_vatom.template view<DeviceType>(); } diff --git a/src/KOKKOS/dihedral_opls_kokkos.cpp b/src/KOKKOS/dihedral_opls_kokkos.cpp index 0ee00ca8db438c2c47eba7443e93039295ab2f93..4349aff48df93848f097bd1130c758ec6be91951 100644 --- a/src/KOKKOS/dihedral_opls_kokkos.cpp +++ b/src/KOKKOS/dihedral_opls_kokkos.cpp @@ -24,7 +24,7 @@ #include "domain.h" #include "force.h" #include "update.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -56,8 +56,8 @@ template<class DeviceType> DihedralOPLSKokkos<DeviceType>::~DihedralOPLSKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -75,13 +75,13 @@ void DihedralOPLSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp index 552141ced24f50d68108d372405c5cb82da03360..5c106c19f3432edcc5a432d96031b12270bb39bb 100644 --- a/src/KOKKOS/fix_eos_table_rx_kokkos.cpp +++ b/src/KOKKOS/fix_eos_table_rx_kokkos.cpp @@ -21,7 +21,7 @@ #include "atom_kokkos.h" #include "error.h" #include "force.h" -#include "memory.h" +#include "memory_kokkos.h" #include "comm.h" #include <math.h> #include "modify.h" @@ -517,14 +517,14 @@ void FixEOStableRXKokkos<DeviceType>::create_kokkos_tables() { const int tlm1 = tablength-1; - memory->create_kokkos(d_table->lo,h_table->lo,ntables,"Table::lo"); - memory->create_kokkos(d_table->hi,h_table->hi,ntables,"Table::hi"); - memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); + memoryKK->create_kokkos(d_table->lo,h_table->lo,ntables,"Table::lo"); + memoryKK->create_kokkos(d_table->hi,h_table->hi,ntables,"Table::hi"); + memoryKK->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); if(tabstyle == LINEAR) { - memory->create_kokkos(d_table->r,h_table->r,ntables,tablength,"Table::r"); - memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); - memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); + memoryKK->create_kokkos(d_table->r,h_table->r,ntables,tablength,"Table::r"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memoryKK->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); } for(int i=0; i < ntables; i++) { diff --git a/src/KOKKOS/fix_langevin_kokkos.cpp b/src/KOKKOS/fix_langevin_kokkos.cpp index fb0f329a91bb398bad243493e9d834ac3ab1aa8d..108c3b692ad7f4fcb176a407a558c5dca3685ca8 100644 --- a/src/KOKKOS/fix_langevin_kokkos.cpp +++ b/src/KOKKOS/fix_langevin_kokkos.cpp @@ -21,7 +21,7 @@ #include "update.h" #include "respa.h" #include "error.h" -#include "memory.h" +#include "memory_kokkos.h" #include "group.h" #include "random_mars.h" #include "compute.h" @@ -49,9 +49,9 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a int ntypes = atomKK->ntypes; // allocate per-type arrays for force prefactors - memory->create_kokkos(k_gfactor1,gfactor1,ntypes+1,"langevin:gfactor1"); - memory->create_kokkos(k_gfactor2,gfactor2,ntypes+1,"langevin:gfactor2"); - memory->create_kokkos(k_ratio,ratio,ntypes+1,"langevin:ratio"); + memoryKK->create_kokkos(k_gfactor1,gfactor1,ntypes+1,"langevin:gfactor1"); + memoryKK->create_kokkos(k_gfactor2,gfactor2,ntypes+1,"langevin:gfactor2"); + memoryKK->create_kokkos(k_ratio,ratio,ntypes+1,"langevin:ratio"); d_gfactor1 = k_gfactor1.template view<DeviceType>(); h_gfactor1 = k_gfactor1.template view<LMPHostType>(); d_gfactor2 = k_gfactor2.template view<DeviceType>(); @@ -92,12 +92,12 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a template<class DeviceType> FixLangevinKokkos<DeviceType>::~FixLangevinKokkos() { - memory->destroy_kokkos(k_gfactor1,gfactor1); - memory->destroy_kokkos(k_gfactor2,gfactor2); - memory->destroy_kokkos(k_ratio,ratio); - memory->destroy_kokkos(k_flangevin,flangevin); - if(gjfflag) memory->destroy_kokkos(k_franprev,franprev); - memory->destroy_kokkos(k_tforce,tforce); + memoryKK->destroy_kokkos(k_gfactor1,gfactor1); + memoryKK->destroy_kokkos(k_gfactor2,gfactor2); + memoryKK->destroy_kokkos(k_ratio,ratio); + memoryKK->destroy_kokkos(k_flangevin,flangevin); + if(gjfflag) memoryKK->destroy_kokkos(k_franprev,franprev); + memoryKK->destroy_kokkos(k_tforce,tforce); } /* ---------------------------------------------------------------------- */ @@ -121,7 +121,7 @@ void FixLangevinKokkos<DeviceType>::init() template<class DeviceType> void FixLangevinKokkos<DeviceType>::grow_arrays(int nmax) { - memory->grow_kokkos(k_franprev,franprev,nmax,3,"langevin:franprev"); + memoryKK->grow_kokkos(k_franprev,franprev,nmax,3,"langevin:franprev"); d_franprev = k_franprev.template view<DeviceType>(); h_franprev = k_franprev.template view<LMPHostType>(); } @@ -167,9 +167,9 @@ void FixLangevinKokkos<DeviceType>::post_force(int vflag) // reallocate flangevin if necessary if (tallyflag) { if (nlocal > maxatom1) { - memory->destroy_kokkos(k_flangevin,flangevin); + memoryKK->destroy_kokkos(k_flangevin,flangevin); maxatom1 = atomKK->nmax; - memory->create_kokkos(k_flangevin,flangevin,maxatom1,3,"langevin:flangevin"); + memoryKK->create_kokkos(k_flangevin,flangevin,maxatom1,3,"langevin:flangevin"); d_flangevin = k_flangevin.template view<DeviceType>(); h_flangevin = k_flangevin.template view<LMPHostType>(); } @@ -671,8 +671,8 @@ void FixLangevinKokkos<DeviceType>::compute_target() } else { if (atom->nmax > maxatom2) { maxatom2 = atom->nmax; - memory->destroy_kokkos(k_tforce,tforce); - memory->create_kokkos(k_tforce,tforce,maxatom2,"langevin:tforce"); + memoryKK->destroy_kokkos(k_tforce,tforce); + memoryKK->create_kokkos(k_tforce,tforce,maxatom2,"langevin:tforce"); d_tforce = k_tforce.template view<DeviceType>(); h_tforce = k_tforce.template view<LMPHostType>(); } diff --git a/src/KOKKOS/fix_nh_kokkos.cpp b/src/KOKKOS/fix_nh_kokkos.cpp index 7136c776a1de7e39d670fa53c69945f8f1d15b3f..345259e35519b24a6b6b669c175fc9e5bdeaf746 100644 --- a/src/KOKKOS/fix_nh_kokkos.cpp +++ b/src/KOKKOS/fix_nh_kokkos.cpp @@ -33,7 +33,7 @@ #include "update.h" #include "respa.h" #include "domain_kokkos.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" #include "atom_kokkos.h" diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp index cb52988c318fcbd147a534ac16d2ea5494f5f63d..fe2f101e561f1dca37852084d6be56a6896f8b2d 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.cpp +++ b/src/KOKKOS/fix_property_atom_kokkos.cpp @@ -16,7 +16,7 @@ #include "fix_property_atom_kokkos.h" #include "atom_kokkos.h" #include "comm.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "update.h" @@ -60,7 +60,7 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) size_t nbytes = (nmax-nmax_old) * sizeof(int); memset(&atom->ivector[index[m]][nmax_old],0,nbytes); } else if (style[m] == DOUBLE) { - memory->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.dimension_0(),nmax, + memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.dimension_0(),nmax, "atom:dvector"); //memory->grow(atom->dvector[index[m]],nmax,"atom:dvector"); //size_t nbytes = (nmax-nmax_old) * sizeof(double); diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 5d2f6a0438a400302785c70975beee07d4d4f32b..91a22361fc91b4371eb7c7638e4fdb1b9c229d46 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -35,7 +35,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "pair_reaxc_kokkos.h" diff --git a/src/KOKKOS/fix_reaxc_bonds_kokkos.cpp b/src/KOKKOS/fix_reaxc_bonds_kokkos.cpp index 0d74a49ed304498a5f11d55133898a4565e56002..586daadd55e64c3922f52ee16faa454f7b723da5 100644 --- a/src/KOKKOS/fix_reaxc_bonds_kokkos.cpp +++ b/src/KOKKOS/fix_reaxc_bonds_kokkos.cpp @@ -31,7 +31,7 @@ #include "compute.h" #include "input.h" #include "variable.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "reaxc_list.h" #include "reaxc_types.h" @@ -95,7 +95,7 @@ void FixReaxCBondsKokkos::Output_ReaxC_Bonds(bigint ntimestep, FILE *fp) MPI_Allreduce(&nlocal,&nlocal_max,1,MPI_INT,MPI_MAX,world); nbuf = 1+(numbonds_max*2+10)*nlocal_max; - memory->create_kokkos(k_buf,buf,nbuf,"reax/c/bonds:buf"); + memoryKK->create_kokkos(k_buf,buf,nbuf,"reax/c/bonds:buf"); // Pass information to buffer if (reaxc->execution_space == Device) @@ -107,7 +107,7 @@ void FixReaxCBondsKokkos::Output_ReaxC_Bonds(bigint ntimestep, FILE *fp) // Receive information from buffer for output RecvBuffer(buf, nbuf, nbuf_local, nlocal_tot, numbonds_max); - memory->destroy_kokkos(k_buf,buf); + memoryKK->destroy_kokkos(k_buf,buf); } /* ---------------------------------------------------------------------- */ diff --git a/src/KOKKOS/fix_reaxc_species_kokkos.cpp b/src/KOKKOS/fix_reaxc_species_kokkos.cpp index f2719f9f0e7048a6db47bf49dfd09979a6f5074b..a676c7ef27ae81cf7ec39e8467e7ecdceb816e06 100644 --- a/src/KOKKOS/fix_reaxc_species_kokkos.cpp +++ b/src/KOKKOS/fix_reaxc_species_kokkos.cpp @@ -33,7 +33,7 @@ #include "compute.h" #include "input.h" #include "variable.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "reaxc_list.h" #include "atom_masks.h" diff --git a/src/KOKKOS/fix_rx_kokkos.cpp b/src/KOKKOS/fix_rx_kokkos.cpp index b1cfd20be23f57a25981e151a5098c0b43b13ae6..61290ece33dfb72c50dc787542e4cea50fe5a077 100644 --- a/src/KOKKOS/fix_rx_kokkos.cpp +++ b/src/KOKKOS/fix_rx_kokkos.cpp @@ -17,7 +17,7 @@ #include "atom_masks.h" #include "atom_kokkos.h" #include "force.h" -#include "memory.h" +#include "memory_kokkos.h" #include "update.h" #include "respa.h" #include "modify.h" @@ -26,6 +26,9 @@ #include "neigh_request.h" #include "error.h" #include "math_special_kokkos.h" +#include "comm.h" +#include "domain.h" +#include "kokkos.h" #include <float.h> // DBL_EPSILON @@ -81,15 +84,15 @@ FixRxKokkos<DeviceType>::~FixRxKokkos() if (copymode) return; if (localTempFlag) - memory->destroy_kokkos(k_dpdThetaLocal, dpdThetaLocal); + memoryKK->destroy_kokkos(k_dpdThetaLocal, dpdThetaLocal); - memory->destroy_kokkos(k_sumWeights, sumWeights); - //memory->destroy_kokkos(k_sumWeights); + memoryKK->destroy_kokkos(k_sumWeights, sumWeights); + //memoryKK->destroy_kokkos(k_sumWeights); //delete [] scratchSpace; - memory->destroy_kokkos(d_scratchSpace); + memoryKK->destroy_kokkos(d_scratchSpace); - memory->destroy_kokkos(k_cutsq); + memoryKK->destroy_kokkos(k_cutsq); } /* ---------------------------------------------------------------------- */ @@ -1233,9 +1236,9 @@ void FixRxKokkos<DeviceType>::create_kinetics_data(void) { //printf("Inside FixRxKokkos::create_kinetics_data\n"); - memory->create_kokkos( d_kineticsData.Arr, h_kineticsData.Arr, nreactions, "KineticsType::Arr"); - memory->create_kokkos( d_kineticsData.nArr, h_kineticsData.nArr, nreactions, "KineticsType::nArr"); - memory->create_kokkos( d_kineticsData.Ea, h_kineticsData.Ea, nreactions, "KineticsType::Ea"); + memoryKK->create_kokkos( d_kineticsData.Arr, h_kineticsData.Arr, nreactions, "KineticsType::Arr"); + memoryKK->create_kokkos( d_kineticsData.nArr, h_kineticsData.nArr, nreactions, "KineticsType::nArr"); + memoryKK->create_kokkos( d_kineticsData.Ea, h_kineticsData.Ea, nreactions, "KineticsType::Ea"); for (int i = 0; i < nreactions; ++i) { @@ -1251,8 +1254,8 @@ void FixRxKokkos<DeviceType>::create_kinetics_data(void) if (useSparseKinetics) { - memory->create_kokkos( d_kineticsData.nu , h_kineticsData.nu , nreactions, sparseKinetics_maxSpecies, "KineticsType::nu"); - memory->create_kokkos( d_kineticsData.nuk, h_kineticsData.nuk, nreactions, sparseKinetics_maxSpecies, "KineticsType::nuk"); + memoryKK->create_kokkos( d_kineticsData.nu , h_kineticsData.nu , nreactions, sparseKinetics_maxSpecies, "KineticsType::nu"); + memoryKK->create_kokkos( d_kineticsData.nuk, h_kineticsData.nuk, nreactions, sparseKinetics_maxSpecies, "KineticsType::nuk"); for (int i = 0; i < nreactions; ++i) for (int k = 0; k < sparseKinetics_maxSpecies; ++k) @@ -1266,8 +1269,8 @@ void FixRxKokkos<DeviceType>::create_kinetics_data(void) if (SparseKinetics_enableIntegralReactions) { - memory->create_kokkos( d_kineticsData.inu, h_kineticsData.inu, nreactions, sparseKinetics_maxSpecies, "KineticsType::inu"); - memory->create_kokkos( d_kineticsData.isIntegral, h_kineticsData.isIntegral, nreactions, "KineticsType::isIntegral"); + memoryKK->create_kokkos( d_kineticsData.inu, h_kineticsData.inu, nreactions, sparseKinetics_maxSpecies, "KineticsType::inu"); + memoryKK->create_kokkos( d_kineticsData.isIntegral, h_kineticsData.isIntegral, nreactions, "KineticsType::isIntegral"); for (int i = 0; i < nreactions; ++i) { @@ -1286,9 +1289,9 @@ void FixRxKokkos<DeviceType>::create_kinetics_data(void) //{ // Dense option - memory->create_kokkos( d_kineticsData.stoich, h_kineticsData.stoich, nreactions, nspecies, "KineticsType::stoich"); - memory->create_kokkos( d_kineticsData.stoichReactants, h_kineticsData.stoichReactants, nreactions, nspecies, "KineticsType::stoichReactants"); - memory->create_kokkos( d_kineticsData.stoichProducts, h_kineticsData.stoichProducts, nreactions, nspecies, "KineticsType::stoichProducts"); + memoryKK->create_kokkos( d_kineticsData.stoich, h_kineticsData.stoich, nreactions, nspecies, "KineticsType::stoich"); + memoryKK->create_kokkos( d_kineticsData.stoichReactants, h_kineticsData.stoichReactants, nreactions, nspecies, "KineticsType::stoichReactants"); + memoryKK->create_kokkos( d_kineticsData.stoichProducts, h_kineticsData.stoichProducts, nreactions, nspecies, "KineticsType::stoichProducts"); for (int i = 0; i < nreactions; ++i) for (int k = 0; k < nspecies; ++k) @@ -1445,8 +1448,8 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int vflag, const bool isPreF const int count = nlocal + (newton_pair ? nghost : 0); if (count > k_dpdThetaLocal.template view<DeviceType>().dimension_0()) { - memory->destroy_kokkos (k_dpdThetaLocal, dpdThetaLocal); - memory->create_kokkos (k_dpdThetaLocal, dpdThetaLocal, count, "FixRxKokkos::dpdThetaLocal"); + memoryKK->destroy_kokkos (k_dpdThetaLocal, dpdThetaLocal); + memoryKK->create_kokkos (k_dpdThetaLocal, dpdThetaLocal, count, "FixRxKokkos::dpdThetaLocal"); this->d_dpdThetaLocal = k_dpdThetaLocal.template view<DeviceType>(); this->h_dpdThetaLocal = k_dpdThetaLocal.h_view; } @@ -1511,8 +1514,8 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int vflag, const bool isPreF if (odeIntegrationFlag == ODE_LAMMPS_RKF45 && diagnosticFrequency == 1) { - memory->create_kokkos (k_diagnosticCounterPerODEnSteps, diagnosticCounterPerODEnSteps, nlocal, "FixRxKokkos::diagnosticCounterPerODEnSteps"); - memory->create_kokkos (k_diagnosticCounterPerODEnFuncs, diagnosticCounterPerODEnFuncs, nlocal, "FixRxKokkos::diagnosticCounterPerODEnFuncs"); + memoryKK->create_kokkos (k_diagnosticCounterPerODEnSteps, diagnosticCounterPerODEnSteps, nlocal, "FixRxKokkos::diagnosticCounterPerODEnSteps"); + memoryKK->create_kokkos (k_diagnosticCounterPerODEnFuncs, diagnosticCounterPerODEnFuncs, nlocal, "FixRxKokkos::diagnosticCounterPerODEnFuncs"); d_diagnosticCounterPerODEnSteps = k_diagnosticCounterPerODEnSteps.template view<DeviceType>(); d_diagnosticCounterPerODEnFuncs = k_diagnosticCounterPerODEnFuncs.template view<DeviceType>(); @@ -1542,8 +1545,8 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int vflag, const bool isPreF //typename ArrayTypes<DeviceType>::t_double_1d d_scratchSpace("d_scratchSpace", scratchSpaceSize * nlocal); if (nlocal*scratchSpaceSize > d_scratchSpace.dimension_0()) { - memory->destroy_kokkos (d_scratchSpace); - memory->create_kokkos (d_scratchSpace, nlocal*scratchSpaceSize, "FixRxKokkos::d_scratchSpace"); + memoryKK->destroy_kokkos (d_scratchSpace); + memoryKK->create_kokkos (d_scratchSpace, nlocal*scratchSpaceSize, "FixRxKokkos::d_scratchSpace"); } #if 0 @@ -1811,8 +1814,8 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void) my_min[FuncSum] = std::min( my_min[FuncSum], (double)nFuncs ); } - memory->destroy_kokkos( k_diagnosticCounterPerODEnSteps, diagnosticCounterPerODEnSteps ); - memory->destroy_kokkos( k_diagnosticCounterPerODEnFuncs, diagnosticCounterPerODEnFuncs ); + memoryKK->destroy_kokkos( k_diagnosticCounterPerODEnSteps, diagnosticCounterPerODEnSteps ); + memoryKK->destroy_kokkos( k_diagnosticCounterPerODEnFuncs, diagnosticCounterPerODEnFuncs ); MPI_Reduce (my_sum_sq, sum_sq, 2*numCounters, MPI_DOUBLE, MPI_SUM, 0, world); @@ -2022,10 +2025,10 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature() { const int ntypes = atom->ntypes; - //memory->create_kokkos (k_cutsq, h_cutsq, ntypes+1, ntypes+1, "pair:cutsq"); + //memoryKK->create_kokkos (k_cutsq, h_cutsq, ntypes+1, ntypes+1, "pair:cutsq"); if (ntypes+1 > k_cutsq.dimension_0()) { - memory->destroy_kokkos (k_cutsq); - memory->create_kokkos (k_cutsq, ntypes+1, ntypes+1, "FixRxKokkos::k_cutsq"); + memoryKK->destroy_kokkos (k_cutsq); + memoryKK->create_kokkos (k_cutsq, ntypes+1, ntypes+1, "FixRxKokkos::k_cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); } @@ -2043,10 +2046,10 @@ void FixRxKokkos<DeviceType>::computeLocalTemperature() // Initialize the local temperature weight array int sumWeightsCt = nlocal + (NEWTON_PAIR ? nghost : 0); - //memory->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); + //memoryKK->create_kokkos (k_sumWeights, sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); if (sumWeightsCt > k_sumWeights.template view<DeviceType>().dimension_0()) { - memory->destroy_kokkos(k_sumWeights, sumWeights); - memory->create_kokkos (k_sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); + memoryKK->destroy_kokkos(k_sumWeights, sumWeights); + memoryKK->create_kokkos (k_sumWeights, sumWeightsCt, "FixRxKokkos::sumWeights"); d_sumWeights = k_sumWeights.template view<DeviceType>(); h_sumWeights = k_sumWeights.h_view; } diff --git a/src/KOKKOS/fix_setforce_kokkos.cpp b/src/KOKKOS/fix_setforce_kokkos.cpp index 2ef04ad6ee3285534ab1f36a26842641fcf5802c..e99160989497381ef63985fad512ec3e2d3af664 100644 --- a/src/KOKKOS/fix_setforce_kokkos.cpp +++ b/src/KOKKOS/fix_setforce_kokkos.cpp @@ -22,10 +22,11 @@ #include "respa.h" #include "input.h" #include "variable.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "force.h" #include "atom_masks.h" +#include "kokkos_base.h" using namespace LAMMPS_NS; using namespace FixConst; @@ -45,7 +46,7 @@ FixSetForceKokkos<DeviceType>::FixSetForceKokkos(LAMMPS *lmp, int narg, char **a datamask_modify = EMPTY_MASK; memory->destroy(sforce); - memory->create_kokkos(k_sforce,sforce,maxatom,3,"setforce:sforce"); + memoryKK->create_kokkos(k_sforce,sforce,maxatom,3,"setforce:sforce"); } /* ---------------------------------------------------------------------- */ @@ -55,7 +56,7 @@ FixSetForceKokkos<DeviceType>::~FixSetForceKokkos() { if (copymode) return; - memory->destroy_kokkos(k_sforce,sforce); + memoryKK->destroy_kokkos(k_sforce,sforce); sforce = NULL; } @@ -90,7 +91,8 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag) region = domain->regions[iregion]; region->prematch(); DAT::tdual_int_1d k_match = DAT::tdual_int_1d("setforce:k_match",nlocal); - region->match_all_kokkos(groupbit,k_match); + KokkosBase* regionKKBase = dynamic_cast<KokkosBase*>(region); + regionKKBase->match_all_kokkos(groupbit,k_match); k_match.template sync<DeviceType>(); d_match = k_match.template view<DeviceType>(); } @@ -99,8 +101,8 @@ void FixSetForceKokkos<DeviceType>::post_force(int vflag) if (varflag == ATOM && atom->nmax > maxatom) { maxatom = atom->nmax; - memory->destroy_kokkos(k_sforce,sforce); - memory->create_kokkos(k_sforce,sforce,maxatom,3,"setforce:sforce"); + memoryKK->destroy_kokkos(k_sforce,sforce); + memoryKK->create_kokkos(k_sforce,sforce,maxatom,3,"setforce:sforce"); } foriginal[0] = foriginal[1] = foriginal[2] = 0.0; diff --git a/src/KOKKOS/fix_shardlow_kokkos.cpp b/src/KOKKOS/fix_shardlow_kokkos.cpp index 98bbb02714ca454b01e4ac3b65351b3d029ac06e..cc1bd6bede9fabe94f270464cd1ead9558ecad8e 100644 --- a/src/KOKKOS/fix_shardlow_kokkos.cpp +++ b/src/KOKKOS/fix_shardlow_kokkos.cpp @@ -50,7 +50,7 @@ #include "neighbor.h" #include "neigh_list_kokkos.h" #include "neigh_request.h" -#include "memory.h" +#include "memory_kokkos.h" #include "domain.h" #include "modify.h" // #include "pair_dpd_fdt.h" diff --git a/src/KOKKOS/gridcomm_kokkos.cpp b/src/KOKKOS/gridcomm_kokkos.cpp index 6871ef67aee6464b442e67e21dc0776e486d5a43..fdfaf296ef335f30654c1aea300db1102dfca772 100644 --- a/src/KOKKOS/gridcomm_kokkos.cpp +++ b/src/KOKKOS/gridcomm_kokkos.cpp @@ -15,8 +15,9 @@ #include "gridcomm_kokkos.h" #include "comm.h" #include "kspace.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" +#include "kokkos_base.h" using namespace LAMMPS_NS; @@ -126,8 +127,8 @@ template<class DeviceType> GridCommKokkos<DeviceType>::~GridCommKokkos() { for (int i = 0; i < nswap; i++) { - //memory->destroy_kokkos(swap[i].k_packlist,swap[i].packlist); - //memory->destroy_kokkos(swap[i].k_unpacklist,swap[i].unpacklist); + //memoryKK->destroy_kokkos(swap[i].k_packlist,swap[i].packlist); + //memoryKK->destroy_kokkos(swap[i].k_unpacklist,swap[i].unpacklist); } memory->sfree(swap); @@ -515,11 +516,13 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which) k_packlist.sync<DeviceType>(); k_unpacklist.sync<DeviceType>(); + KokkosBase* kspaceKKBase = dynamic_cast<KokkosBase*>(kspace); + for (int m = 0; m < nswap; m++) { if (swap[m].sendproc == me) - kspace->pack_forward_kokkos(which,k_buf2,swap[m].npack,k_packlist,m); + kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m); else - kspace->pack_forward_kokkos(which,k_buf1,swap[m].npack,k_packlist,m); + kspaceKKBase->pack_forward_kspace_kokkos(which,k_buf1,swap[m].npack,k_packlist,m); if (swap[m].sendproc != me) { MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nforward*swap[m].nunpack,MPI_FFT_SCALAR, @@ -529,7 +532,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which) MPI_Wait(&request,MPI_STATUS_IGNORE); } - kspace->unpack_forward_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m); + kspaceKKBase->unpack_forward_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m); } } @@ -544,11 +547,13 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which) k_packlist.sync<DeviceType>(); k_unpacklist.sync<DeviceType>(); + KokkosBase* kspaceKKBase = dynamic_cast<KokkosBase*>(kspace); + for (int m = nswap-1; m >= 0; m--) { if (swap[m].recvproc == me) - kspace->pack_reverse_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m); + kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m); else - kspace->pack_reverse_kokkos(which,k_buf1,swap[m].nunpack,k_unpacklist,m); + kspaceKKBase->pack_reverse_kspace_kokkos(which,k_buf1,swap[m].nunpack,k_unpacklist,m); if (swap[m].recvproc != me) { MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nreverse*swap[m].npack,MPI_FFT_SCALAR, @@ -558,7 +563,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which) MPI_Wait(&request,MPI_STATUS_IGNORE); } - kspace->unpack_reverse_kokkos(which,k_buf2,swap[m].npack,k_packlist,m); + kspaceKKBase->unpack_reverse_kspace_kokkos(which,k_buf2,swap[m].npack,k_packlist,m); } } diff --git a/src/KOKKOS/improper_class2_kokkos.cpp b/src/KOKKOS/improper_class2_kokkos.cpp index c2cb7dfe2bf55df1f64988abe60a152d42984654..d2d465a250f1ff97bfd4668fe26eec489f62f850 100644 --- a/src/KOKKOS/improper_class2_kokkos.cpp +++ b/src/KOKKOS/improper_class2_kokkos.cpp @@ -26,7 +26,7 @@ #include "force.h" #include "update.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -58,8 +58,8 @@ template<class DeviceType> ImproperClass2Kokkos<DeviceType>::~ImproperClass2Kokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -78,15 +78,15 @@ void ImproperClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) if (eflag_atom) { //if(k_eatom.dimension_0()<maxeatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); d_eatom = k_eatom.template view<DeviceType>(); //} } if (vflag_atom) { //if(k_vatom.dimension_0()<maxvatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); d_vatom = k_vatom.template view<DeviceType>(); //} } diff --git a/src/KOKKOS/improper_harmonic_kokkos.cpp b/src/KOKKOS/improper_harmonic_kokkos.cpp index 73e105864f630b0516ac90ba41058cf101878bea..49dd36ed194da6ea94df99144f75602ed11af5ea 100644 --- a/src/KOKKOS/improper_harmonic_kokkos.cpp +++ b/src/KOKKOS/improper_harmonic_kokkos.cpp @@ -26,7 +26,7 @@ #include "force.h" #include "update.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -58,8 +58,8 @@ template<class DeviceType> ImproperHarmonicKokkos<DeviceType>::~ImproperHarmonicKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -78,15 +78,15 @@ void ImproperHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in) if (eflag_atom) { //if(k_eatom.dimension_0()<maxeatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"improper:eatom"); d_eatom = k_eatom.template view<DeviceType>(); //} } if (vflag_atom) { //if(k_vatom.dimension_0()<maxvatom) { // won't work without adding zero functor - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"improper:vatom"); d_vatom = k_vatom.template view<DeviceType>(); //} } diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index 2b02624dcef30f29947183cb4f2a5acb68916779..5a742233534403a48b99ca12eae8f8873583401f 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -23,6 +23,7 @@ #include "neighbor_kokkos.h" #include "neigh_list_kokkos.h" #include "error.h" +#include "memory_kokkos.h" using namespace LAMMPS_NS; @@ -33,6 +34,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) kokkos_exists = 1; lmp->kokkos = this; + delete memory; + memory = new MemoryKokkos(lmp); + memoryKK = (MemoryKokkos*) memory; + auto_sync = 1; int me = 0; diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h new file mode 100644 index 0000000000000000000000000000000000000000..3279cb2947272022704036cbd51ddcb10ff788ef --- /dev/null +++ b/src/KOKKOS/kokkos_base.h @@ -0,0 +1,47 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef KOKKOS_BASE_H +#define KOKKOS_BASE_H + +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class KokkosBase { + public: + KokkosBase() {} + + //Kspace + virtual void pack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + + // Pair + virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, + int, DAT::tdual_xfloat_1d &, + int, int *) {return 0;}; + virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {} + + // Region + virtual void match_all_kokkos(int, DAT::tdual_int_1d) {} +}; + +} + +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/memory_kokkos.h b/src/KOKKOS/memory_kokkos.h index 8ade198c4056727f78b620300b72783f15375431..9f930faae2e4f973a0849064a585888e29a8483f 100644 --- a/src/KOKKOS/memory_kokkos.h +++ b/src/KOKKOS/memory_kokkos.h @@ -11,6 +11,18 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#ifndef LMP_MEMORY_KOKKOS_H +#define LMP_MEMORY_KOKKOS_H + +#include "memory.h" +#include "kokkos_type.h" + +namespace LAMMPS_NS { + +class MemoryKokkos : public Memory { + public: + MemoryKokkos(class LAMMPS *lmp) : Memory(lmp) {} + /* ---------------------------------------------------------------------- Kokkos versions of create/grow/destroy multi-dimensional arrays ------------------------------------------------------------------------- */ @@ -279,3 +291,10 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type** &array) sfree(array); array = NULL; } + +}; + +} + +#endif + diff --git a/src/KOKKOS/nbin_ssa_kokkos.cpp b/src/KOKKOS/nbin_ssa_kokkos.cpp index ab97cb584805b6328df67cfe954d02ba44fc3907..1a881658567524b181bbeb52af3b97a17246fc12 100644 --- a/src/KOKKOS/nbin_ssa_kokkos.cpp +++ b/src/KOKKOS/nbin_ssa_kokkos.cpp @@ -26,7 +26,7 @@ #include "error.h" #include "atom_masks.h" -// #include "memory.h" +// #include "memory_kokkos.h" using namespace LAMMPS_NS; diff --git a/src/KOKKOS/neigh_bond_kokkos.cpp b/src/KOKKOS/neigh_bond_kokkos.cpp index a674e7cec48a0e16ff9aa91f36ec85c53b23c0a5..3ecc8b5e516c8243a36a668fcdceb7b434e7145c 100644 --- a/src/KOKKOS/neigh_bond_kokkos.cpp +++ b/src/KOKKOS/neigh_bond_kokkos.cpp @@ -24,7 +24,7 @@ #include "domain_kokkos.h" #include "output.h" #include "thermo.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "modify.h" #include "fix.h" @@ -80,27 +80,27 @@ void NeighBondKokkos<DeviceType>::init_topology_kk() { if (atom->molecular && atom->nbonds && maxbond == 0) { if (nprocs == 1) maxbond = atom->nbonds; else maxbond = static_cast<int> (LB_FACTOR * atom->nbonds / nprocs); - memory->create_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neigh:neighbor->bondlist"); + memoryKK->create_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neigh:neighbor->bondlist"); } if (atom->molecular && atom->nangles && maxangle == 0) { if (nprocs == 1) maxangle = atom->nangles; else maxangle = static_cast<int> (LB_FACTOR * atom->nangles / nprocs); - memory->create_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neigh:neighbor->anglelist"); + memoryKK->create_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neigh:neighbor->anglelist"); } if (atom->molecular && atom->ndihedrals && maxdihedral == 0) { if (nprocs == 1) maxdihedral = atom->ndihedrals; else maxdihedral = static_cast<int> (LB_FACTOR * atom->ndihedrals / nprocs); - memory->create_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neigh:neighbor->dihedrallist"); + memoryKK->create_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neigh:neighbor->dihedrallist"); } if (atom->molecular && atom->nimpropers && maximproper == 0) { if (nprocs == 1) maximproper = atom->nimpropers; else maximproper = static_cast<int> (LB_FACTOR * atom->nimpropers / nprocs); - memory->create_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neigh:neighbor->improperlist"); + memoryKK->create_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neigh:neighbor->improperlist"); } // set flags that determine which topology neighboring routines to use @@ -283,7 +283,7 @@ void NeighBondKokkos<DeviceType>::bond_all() k_fail_flag.template sync<LMPHostType>(); if (h_fail_flag()) { maxbond = neighbor->nbondlist + BONDDELTA; - memory->grow_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neighbor:neighbor->bondlist"); + memoryKK->grow_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neighbor:neighbor->bondlist"); v_bondlist = k_bondlist.view<DeviceType>(); } } while (h_fail_flag()); @@ -378,7 +378,7 @@ void NeighBondKokkos<DeviceType>::bond_partial() k_fail_flag.template sync<LMPHostType>(); if (h_fail_flag()) { maxbond = neighbor->nbondlist + BONDDELTA; - memory->grow_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neighbor:neighbor->bondlist"); + memoryKK->grow_kokkos(k_bondlist,neighbor->bondlist,maxbond,3,"neighbor:neighbor->bondlist"); v_bondlist = k_bondlist.view<DeviceType>(); } } while (h_fail_flag()); @@ -500,7 +500,7 @@ void NeighBondKokkos<DeviceType>::angle_all() k_fail_flag.template sync<LMPHostType>(); if (h_fail_flag()) { maxangle = neighbor->nanglelist + BONDDELTA; - memory->grow_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neighbor:neighbor->anglelist"); + memoryKK->grow_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neighbor:neighbor->anglelist"); v_anglelist = k_anglelist.view<DeviceType>(); } } while (h_fail_flag()); @@ -602,7 +602,7 @@ void NeighBondKokkos<DeviceType>::angle_partial() k_fail_flag.template sync<LMPHostType>(); if (h_fail_flag()) { maxangle = neighbor->nanglelist + BONDDELTA; - memory->grow_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neighbor:neighbor->anglelist"); + memoryKK->grow_kokkos(k_anglelist,neighbor->anglelist,maxangle,4,"neighbor:neighbor->anglelist"); v_anglelist = k_anglelist.view<DeviceType>(); } } while (h_fail_flag()); @@ -744,7 +744,7 @@ void NeighBondKokkos<DeviceType>::dihedral_all() k_fail_flag.template sync<LMPHostType>(); if (h_fail_flag()) { maxdihedral = neighbor->ndihedrallist + BONDDELTA; - memory->grow_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neighbor:neighbor->dihedrallist"); + memoryKK->grow_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neighbor:neighbor->dihedrallist"); v_dihedrallist = k_dihedrallist.view<DeviceType>(); } } while (h_fail_flag()); @@ -851,7 +851,7 @@ void NeighBondKokkos<DeviceType>::dihedral_partial() k_fail_flag.template sync<LMPHostType>(); if (h_fail_flag()) { maxdihedral = neighbor->ndihedrallist + BONDDELTA; - memory->grow_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neighbor:neighbor->dihedrallist"); + memoryKK->grow_kokkos(k_dihedrallist,neighbor->dihedrallist,maxdihedral,5,"neighbor:neighbor->dihedrallist"); v_dihedrallist = k_dihedrallist.view<DeviceType>(); } } while (h_fail_flag()); @@ -1015,7 +1015,7 @@ void NeighBondKokkos<DeviceType>::improper_all() k_fail_flag.template sync<LMPHostType>(); if (h_fail_flag()) { maximproper = neighbor->nimproperlist + BONDDELTA; - memory->grow_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neighbor:neighbor->improperlist"); + memoryKK->grow_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neighbor:neighbor->improperlist"); v_improperlist = k_improperlist.view<DeviceType>(); } } while (h_fail_flag()); @@ -1122,7 +1122,7 @@ void NeighBondKokkos<DeviceType>::improper_partial() k_fail_flag.template sync<LMPHostType>(); if (h_fail_flag()) { maximproper = neighbor->nimproperlist + BONDDELTA; - memory->grow_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neighbor:neighbor->improperlist"); + memoryKK->grow_kokkos(k_improperlist,neighbor->improperlist,maximproper,5,"neighbor:neighbor->improperlist"); v_improperlist = k_improperlist.view<DeviceType>(); } } while (h_fail_flag()); diff --git a/src/KOKKOS/neigh_list_kokkos.cpp b/src/KOKKOS/neigh_list_kokkos.cpp index 04454e53cb302e2ad696809c034e1925a8aaa83d..98294a802aaa88e16a2c1ca8269cf235d149ff3a 100644 --- a/src/KOKKOS/neigh_list_kokkos.cpp +++ b/src/KOKKOS/neigh_list_kokkos.cpp @@ -13,7 +13,7 @@ #include "neigh_list_kokkos.h" #include "atom.h" -#include "memory.h" +#include "memory_kokkos.h" using namespace LAMMPS_NS; diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index f34b149864d1c24179fdf2f41be36143ed5f9807..8d36add10bd6a10d8d861cd0cff5a27cef5fd1f3 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -12,11 +12,11 @@ ------------------------------------------------------------------------- */ #include "neighbor_kokkos.h" -#include "atom.h" +#include "atom_kokkos.h" #include "pair.h" #include "fix.h" #include "neigh_request.h" -#include "memory.h" +#include "memory_kokkos.h" #include "update.h" #include "atom_masks.h" #include "error.h" @@ -52,24 +52,24 @@ NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp), NeighborKokkos::~NeighborKokkos() { if (!copymode) { - memory->destroy_kokkos(k_cutneighsq,cutneighsq); + memoryKK->destroy_kokkos(k_cutneighsq,cutneighsq); cutneighsq = NULL; - memory->destroy_kokkos(k_ex_type,ex_type); - memory->destroy_kokkos(k_ex1_type,ex1_type); - memory->destroy_kokkos(k_ex2_type,ex2_type); - memory->destroy_kokkos(k_ex1_group,ex1_group); - memory->destroy_kokkos(k_ex2_group,ex2_group); - memory->destroy_kokkos(k_ex_mol_group,ex_mol_group); - memory->destroy_kokkos(k_ex1_bit,ex1_bit); - memory->destroy_kokkos(k_ex2_bit,ex2_bit); - memory->destroy_kokkos(k_ex_mol_bit,ex_mol_bit); - memory->destroy_kokkos(k_ex_mol_intra,ex_mol_intra); - - memory->destroy_kokkos(k_bondlist,bondlist); - memory->destroy_kokkos(k_anglelist,anglelist); - memory->destroy_kokkos(k_dihedrallist,dihedrallist); - memory->destroy_kokkos(k_improperlist,improperlist); + memoryKK->destroy_kokkos(k_ex_type,ex_type); + memoryKK->destroy_kokkos(k_ex1_type,ex1_type); + memoryKK->destroy_kokkos(k_ex2_type,ex2_type); + memoryKK->destroy_kokkos(k_ex1_group,ex1_group); + memoryKK->destroy_kokkos(k_ex2_group,ex2_group); + memoryKK->destroy_kokkos(k_ex_mol_group,ex_mol_group); + memoryKK->destroy_kokkos(k_ex1_bit,ex1_bit); + memoryKK->destroy_kokkos(k_ex2_bit,ex2_bit); + memoryKK->destroy_kokkos(k_ex_mol_bit,ex_mol_bit); + memoryKK->destroy_kokkos(k_ex_mol_intra,ex_mol_intra); + + memoryKK->destroy_kokkos(k_bondlist,bondlist); + memoryKK->destroy_kokkos(k_anglelist,anglelist); + memoryKK->destroy_kokkos(k_dihedrallist,dihedrallist); + memoryKK->destroy_kokkos(k_improperlist,improperlist); } } @@ -90,7 +90,7 @@ void NeighborKokkos::init() void NeighborKokkos::init_cutneighsq_kokkos(int n) { - memory->create_kokkos(k_cutneighsq,cutneighsq,n+1,n+1,"neigh:cutneighsq"); + memoryKK->create_kokkos(k_cutneighsq,cutneighsq,n+1,n+1,"neigh:cutneighsq"); k_cutneighsq.modify<LMPHostType>(); } @@ -112,7 +112,7 @@ void NeighborKokkos::create_kokkos_list(int i) void NeighborKokkos::init_ex_type_kokkos(int n) { - memory->create_kokkos(k_ex_type,ex_type,n+1,n+1,"neigh:ex_type"); + memoryKK->create_kokkos(k_ex_type,ex_type,n+1,n+1,"neigh:ex_type"); k_ex_type.modify<LMPHostType>(); } @@ -120,9 +120,9 @@ void NeighborKokkos::init_ex_type_kokkos(int n) void NeighborKokkos::init_ex_bit_kokkos() { - memory->create_kokkos(k_ex1_bit, ex1_bit, nex_group, "neigh:ex1_bit"); + memoryKK->create_kokkos(k_ex1_bit, ex1_bit, nex_group, "neigh:ex1_bit"); k_ex1_bit.modify<LMPHostType>(); - memory->create_kokkos(k_ex2_bit, ex2_bit, nex_group, "neigh:ex2_bit"); + memoryKK->create_kokkos(k_ex2_bit, ex2_bit, nex_group, "neigh:ex2_bit"); k_ex2_bit.modify<LMPHostType>(); } @@ -130,7 +130,7 @@ void NeighborKokkos::init_ex_bit_kokkos() void NeighborKokkos::init_ex_mol_bit_kokkos() { - memory->create_kokkos(k_ex_mol_bit, ex_mol_bit, nex_mol, "neigh:ex_mol_bit"); + memoryKK->create_kokkos(k_ex_mol_bit, ex_mol_bit, nex_mol, "neigh:ex_mol_bit"); k_ex_mol_bit.modify<LMPHostType>(); } @@ -138,7 +138,7 @@ void NeighborKokkos::init_ex_mol_bit_kokkos() void NeighborKokkos::grow_ex_mol_intra_kokkos() { - memory->grow_kokkos(k_ex_mol_intra, ex_mol_intra, maxex_mol, "neigh:ex_mol_intra"); + memoryKK->grow_kokkos(k_ex_mol_intra, ex_mol_intra, maxex_mol, "neigh:ex_mol_intra"); k_ex_mol_intra.modify<LMPHostType>(); } @@ -335,29 +335,29 @@ void NeighborKokkos::operator()(TagNeighborXhold<DeviceType>, const int &i) cons /* ---------------------------------------------------------------------- */ void NeighborKokkos::modify_ex_type_grow_kokkos(){ - memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type"); + memoryKK->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type"); k_ex1_type.modify<LMPHostType>(); - memory->grow_kokkos(k_ex2_type,ex2_type,maxex_type,"neigh:ex2_type"); + memoryKK->grow_kokkos(k_ex2_type,ex2_type,maxex_type,"neigh:ex2_type"); k_ex2_type.modify<LMPHostType>(); } /* ---------------------------------------------------------------------- */ void NeighborKokkos::modify_ex_group_grow_kokkos(){ - memory->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group"); + memoryKK->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group"); k_ex1_group.modify<LMPHostType>(); - memory->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group"); + memoryKK->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group"); k_ex2_group.modify<LMPHostType>(); } /* ---------------------------------------------------------------------- */ void NeighborKokkos::modify_mol_group_grow_kokkos(){ - memory->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group"); + memoryKK->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group"); k_ex_mol_group.modify<LMPHostType>(); } /* ---------------------------------------------------------------------- */ void NeighborKokkos::modify_mol_intra_grow_kokkos(){ - memory->grow_kokkos(k_ex_mol_intra,ex_mol_intra,maxex_mol,"neigh:ex_mol_intra"); + memoryKK->grow_kokkos(k_ex_mol_intra,ex_mol_intra,maxex_mol,"neigh:ex_mol_intra"); k_ex_mol_intra.modify<LMPHostType>(); } diff --git a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp index 0da8a0a3d6a9267bc6e45a74aa4b52f2f2471c47..ba3eda64ddee4e389b771f5e47168c98c5a6fa08 100644 --- a/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_buck_coul_cut_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -65,8 +65,8 @@ PairBuckCoulCutKokkos<DeviceType>::~PairBuckCoulCutKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); k_cut_ljsq = DAT::tdual_ffloat_2d(); k_cut_coulsq = DAT::tdual_ffloat_2d(); @@ -98,13 +98,13 @@ void PairBuckCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -252,13 +252,13 @@ void PairBuckCoulCutKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); memory->destroy(cut_coulsq); - memory->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_buck_coul**,Kokkos::LayoutRight,DeviceType>("PairBuckCoulCut::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_buck_coul_long_kokkos.cpp b/src/KOKKOS/pair_buck_coul_long_kokkos.cpp index 3a5cbd868f725c54a95c6c6cb0fc9a2335137ea5..19af349a63588a464f4e1785be291510fd91acfa 100644 --- a/src/KOKKOS/pair_buck_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_buck_coul_long_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -73,8 +73,8 @@ template<class DeviceType> PairBuckCoulLongKokkos<DeviceType>::~PairBuckCoulLongKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); k_cut_ljsq = DAT::tdual_ffloat_2d(); k_cut_coulsq = DAT::tdual_ffloat_2d(); @@ -117,13 +117,13 @@ void PairBuckCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -308,14 +308,14 @@ void PairBuckCoulLongKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); - memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_buck_coul**,Kokkos::LayoutRight,DeviceType>("PairBuckCoulLong::params",n+1,n+1); diff --git a/src/KOKKOS/pair_buck_kokkos.cpp b/src/KOKKOS/pair_buck_kokkos.cpp index e7640471d501bc8be6778c4acb32395e18cf950a..fcf14533dc991d469ad1c0e65951ca80d515b0b2 100644 --- a/src/KOKKOS/pair_buck_kokkos.cpp +++ b/src/KOKKOS/pair_buck_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -61,8 +61,8 @@ template<class DeviceType> PairBuckKokkos<DeviceType>::~PairBuckKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); memory->sfree(cutsq); eatom = NULL; @@ -87,13 +87,13 @@ void PairBuckKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -194,7 +194,7 @@ void PairBuckKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_buck**,Kokkos::LayoutRight,DeviceType>("PairBuck::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_coul_cut_kokkos.cpp b/src/KOKKOS/pair_coul_cut_kokkos.cpp index 8edf093e2e118a4315b899aa6c304a9e78f670f9..e20e243c09aefb7313cbe13ce50a498356ea405d 100644 --- a/src/KOKKOS/pair_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_coul_cut_kokkos.cpp @@ -27,7 +27,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -55,7 +55,7 @@ template<class DeviceType> PairCoulCutKokkos<DeviceType>::~PairCoulCutKokkos() { if (allocated) - memory->destroy_kokkos(k_cutsq, cutsq); + memoryKK->destroy_kokkos(k_cutsq, cutsq); } /* ---------------------------------------------------------------------- */ @@ -86,13 +86,13 @@ void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -190,7 +190,7 @@ void PairCoulCutKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_cut_ljsq = typename ArrayTypes<DeviceType>::tdual_ffloat_2d("pair:cut_ljsq",n+1,n+1); diff --git a/src/KOKKOS/pair_coul_debye_kokkos.cpp b/src/KOKKOS/pair_coul_debye_kokkos.cpp index c331ab8da8f5881441985a0bfdf69d7f9da8bc8d..4cac18cacfcaf2ba3274a3feeb6e6f3d3a65c4bf 100644 --- a/src/KOKKOS/pair_coul_debye_kokkos.cpp +++ b/src/KOKKOS/pair_coul_debye_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -62,7 +62,7 @@ template<class DeviceType> PairCoulDebyeKokkos<DeviceType>::~PairCoulDebyeKokkos() { if (!copymode) { - memory->destroy_kokkos(k_cutsq, cutsq); + memoryKK->destroy_kokkos(k_cutsq, cutsq); } } @@ -93,13 +93,13 @@ void PairCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -218,7 +218,7 @@ void PairCoulDebyeKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_coul**,Kokkos::LayoutRight,DeviceType>("PairCoulDebye::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_coul_dsf_kokkos.cpp index e6f5407f2d16990085495915562a4c564861aa81..f77e63bbf0551b0412b21e0b7631c098ff082f78 100644 --- a/src/KOKKOS/pair_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_dsf_kokkos.cpp @@ -27,7 +27,7 @@ #include "neighbor.h" #include "neigh_list_kokkos.h" #include "neigh_request.h" -#include "memory.h" +#include "memory_kokkos.h" #include "update.h" #include "integrate.h" #include "respa.h" @@ -65,8 +65,8 @@ template<class DeviceType> PairCoulDSFKokkos<DeviceType>::~PairCoulDSFKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -86,13 +86,13 @@ void PairCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_coul_long_kokkos.cpp b/src/KOKKOS/pair_coul_long_kokkos.cpp index 721e140e3374794dad4016fce79888cb60a07145..f2ade3f367c0e01b8118d454a3d0259618cffddf 100644 --- a/src/KOKKOS/pair_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_coul_long_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -72,8 +72,8 @@ template<class DeviceType> PairCoulLongKokkos<DeviceType>::~PairCoulLongKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); k_cut_coulsq = DAT::tdual_ffloat_2d(); memory->sfree(cutsq); @@ -110,13 +110,13 @@ void PairCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -272,10 +272,10 @@ void PairCoulLongKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); - memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_coul**,Kokkos::LayoutRight,DeviceType>("PairCoulLong::params",n+1,n+1); diff --git a/src/KOKKOS/pair_coul_wolf_kokkos.cpp b/src/KOKKOS/pair_coul_wolf_kokkos.cpp index 75177e2d81e7a3a9a70ccc754db14651a8d29e56..0f3e9b94290918dd78cb8940cc762d5e82494977 100644 --- a/src/KOKKOS/pair_coul_wolf_kokkos.cpp +++ b/src/KOKKOS/pair_coul_wolf_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -60,8 +60,8 @@ template<class DeviceType> PairCoulWolfKokkos<DeviceType>::~PairCoulWolfKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -81,13 +81,13 @@ void PairCoulWolfKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp index c559ab412f355c1dc10a51adb4308fbf031003d4..7d71719e0a115d99cf25a3a89a91487f9042edc2 100644 --- a/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp +++ b/src/KOKKOS/pair_dpd_fdt_energy_kokkos.cpp @@ -28,11 +28,12 @@ #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" -#include "memory.h" +#include "memory_kokkos.h" #include "modify.h" #include "pair_dpd_fdt_energy_kokkos.h" #include "error.h" #include "atom_masks.h" +#include "kokkos.h" using namespace LAMMPS_NS; @@ -62,15 +63,15 @@ PairDPDfdtEnergyKokkos<DeviceType>::~PairDPDfdtEnergyKokkos() { if (copymode) return; - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); if (allocated) { - memory->destroy_kokkos(k_duCond,duCond); - memory->destroy_kokkos(k_duMech,duMech); + memoryKK->destroy_kokkos(k_duCond,duCond); + memoryKK->destroy_kokkos(k_duMech,duMech); } - memory->destroy_kokkos(k_cutsq,cutsq); + memoryKK->destroy_kokkos(k_cutsq,cutsq); #ifdef DPD_USE_RAN_MARS rand_pool.destroy(); @@ -167,13 +168,13 @@ void PairDPDfdtEnergyKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.template view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.template view<DeviceType>(); } @@ -274,11 +275,11 @@ void PairDPDfdtEnergyKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // Allocate memory for duCond and duMech if (allocated) { - memory->destroy_kokkos(k_duCond,duCond); - memory->destroy_kokkos(k_duMech,duMech); + memoryKK->destroy_kokkos(k_duCond,duCond); + memoryKK->destroy_kokkos(k_duMech,duMech); } - memory->create_kokkos(k_duCond,duCond,nlocal+nghost,"pair:duCond"); - memory->create_kokkos(k_duMech,duMech,nlocal+nghost,"pair:duMech"); + memoryKK->create_kokkos(k_duCond,duCond,nlocal+nghost,"pair:duCond"); + memoryKK->create_kokkos(k_duMech,duMech,nlocal+nghost,"pair:duMech"); d_duCond = k_duCond.view<DeviceType>(); d_duMech = k_duMech.view<DeviceType>(); h_duCond = k_duCond.h_view; @@ -641,7 +642,7 @@ void PairDPDfdtEnergyKokkos<DeviceType>::allocate() int nghost = atom->nghost; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_dpd**,Kokkos::LayoutRight,DeviceType>("PairDPDfdtEnergy::params",n+1,n+1); @@ -650,8 +651,8 @@ void PairDPDfdtEnergyKokkos<DeviceType>::allocate() if (!splitFDT_flag) { memory->destroy(duCond); memory->destroy(duMech); - memory->create_kokkos(k_duCond,duCond,nlocal+nghost+1,"pair:duCond"); - memory->create_kokkos(k_duMech,duMech,nlocal+nghost+1,"pair:duMech"); + memoryKK->create_kokkos(k_duCond,duCond,nlocal+nghost+1,"pair:duCond"); + memoryKK->create_kokkos(k_duMech,duMech,nlocal+nghost+1,"pair:duMech"); d_duCond = k_duCond.view<DeviceType>(); d_duMech = k_duMech.view<DeviceType>(); h_duCond = k_duCond.h_view; diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index aa68d0a05438ad2db1256e62cbb979861be47c47..f263cbb6d90be1cd34d5f0f9abab2b4e04174794 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -28,7 +28,7 @@ #include "neighbor.h" #include "neigh_list_kokkos.h" #include "neigh_request.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -59,8 +59,8 @@ template<class DeviceType> PairEAMAlloyKokkos<DeviceType>::~PairEAMAlloyKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -80,13 +80,13 @@ void PairEAMAlloyKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.h b/src/KOKKOS/pair_eam_alloy_kokkos.h index fb07eec32b1253c61d309f60f1010fb31d4441a6..a962f559085d767f10c1b347a83b9d006c68a548 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.h +++ b/src/KOKKOS/pair_eam_alloy_kokkos.h @@ -24,6 +24,7 @@ PairStyle(eam/alloy/kk/host,PairEAMAlloyKokkos<LMPHostType>) #define LMP_PAIR_EAM_ALLOY_KOKKOS_H #include <stdio.h> +#include "kokkos_base.h" #include "pair_kokkos.h" #include "pair_eam.h" #include "neigh_list_kokkos.h" @@ -49,7 +50,7 @@ struct TagPairEAMAlloyKernelC{}; // Cannot use virtual inheritance on the GPU template<class DeviceType> -class PairEAMAlloyKokkos : public PairEAM { +class PairEAMAlloyKokkos : public PairEAM, public KokkosBase { public: enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; enum {COUL_FLAG=0}; @@ -59,7 +60,7 @@ class PairEAMAlloyKokkos : public PairEAM { PairEAMAlloyKokkos(class LAMMPS *); virtual ~PairEAMAlloyKokkos(); - virtual void compute(int, int); + void compute(int, int); void init_style(); void *extract(const char *, int &) { return NULL; } void coeff(int, char **); @@ -107,11 +108,11 @@ class PairEAMAlloyKokkos : public PairEAM { const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const; - virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, - int, int *); - virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); - virtual int pack_forward_comm(int, int *, double *, int, int *); - virtual void unpack_forward_comm(int, int, double *); + int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, + int, int *); + void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); + int pack_forward_comm(int, int *, double *, int, int *); + void unpack_forward_comm(int, int, double *); int pack_reverse_comm(int, int, double *); void unpack_reverse_comm(int, int *, double *); @@ -148,7 +149,7 @@ class PairEAMAlloyKokkos : public PairEAM { t_ffloat_2d_n7_randomread d_rhor_spline; t_ffloat_2d_n7_randomread d_z2r_spline; - virtual void file2array(); + void file2array(); void file2array_alloy(); void array2spline(); void interpolate(int, double, double *, t_host_ffloat_2d_n7, int); diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index a982f94ec4d607ae2e83d1773ed38a98a082ad09..05ec1a644b829cc587ca30a4f99148d78dbeeb11 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -28,7 +28,7 @@ #include "neighbor.h" #include "neigh_list_kokkos.h" #include "neigh_request.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -59,8 +59,8 @@ template<class DeviceType> PairEAMFSKokkos<DeviceType>::~PairEAMFSKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -80,13 +80,13 @@ void PairEAMFSKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_eam_fs_kokkos.h b/src/KOKKOS/pair_eam_fs_kokkos.h index d71ec2b8870f103ae1f839d5a90ff791c367afaf..ec87e44ece248f93f0193684311f3587be8ec5d3 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.h +++ b/src/KOKKOS/pair_eam_fs_kokkos.h @@ -24,6 +24,7 @@ PairStyle(eam/fs/kk/host,PairEAMFSKokkos<LMPHostType>) #define LMP_PAIR_EAM_FS_KOKKOS_H #include <stdio.h> +#include "kokkos_base.h" #include "pair_kokkos.h" #include "pair_eam.h" #include "neigh_list_kokkos.h" @@ -49,7 +50,7 @@ struct TagPairEAMFSKernelC{}; // Cannot use virtual inheritance on the GPU template<class DeviceType> -class PairEAMFSKokkos : public PairEAM { +class PairEAMFSKokkos : public PairEAM, public KokkosBase { public: enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; enum {COUL_FLAG=0}; @@ -59,7 +60,7 @@ class PairEAMFSKokkos : public PairEAM { PairEAMFSKokkos(class LAMMPS *); virtual ~PairEAMFSKokkos(); - virtual void compute(int, int); + void compute(int, int); void init_style(); void *extract(const char *, int &) { return NULL; } void coeff(int, char **); @@ -107,11 +108,11 @@ class PairEAMFSKokkos : public PairEAM { const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const; - virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, - int, int *); - virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); - virtual int pack_forward_comm(int, int *, double *, int, int *); - virtual void unpack_forward_comm(int, int, double *); + int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, + int, int *); + void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); + int pack_forward_comm(int, int *, double *, int, int *); + void unpack_forward_comm(int, int, double *); int pack_reverse_comm(int, int, double *); void unpack_reverse_comm(int, int *, double *); @@ -148,7 +149,7 @@ class PairEAMFSKokkos : public PairEAM { t_ffloat_2d_n7_randomread d_rhor_spline; t_ffloat_2d_n7_randomread d_z2r_spline; - virtual void file2array(); + void file2array(); void file2array_fs(); void array2spline(); void interpolate(int, double, double *, t_host_ffloat_2d_n7, int); diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index 8ac92a1766c62eedbaf37663d5ba1092bbab4599..0aa43b26b2112f6970bae5562b506b4cabe6718d 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -28,7 +28,7 @@ #include "neighbor.h" #include "neigh_list_kokkos.h" #include "neigh_request.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -54,8 +54,8 @@ template<class DeviceType> PairEAMKokkos<DeviceType>::~PairEAMKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -75,13 +75,13 @@ void PairEAMKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_eam_kokkos.h b/src/KOKKOS/pair_eam_kokkos.h index 856cc51f7709f24cec67df5d0e8de930f4796c69..34a4795ec54e97986b9cd39e0e79e7932d5d6bd4 100644 --- a/src/KOKKOS/pair_eam_kokkos.h +++ b/src/KOKKOS/pair_eam_kokkos.h @@ -24,6 +24,7 @@ PairStyle(eam/kk/host,PairEAMKokkos<LMPHostType>) #define LMP_PAIR_EAM_KOKKOS_H #include <stdio.h> +#include "kokkos_base.h" #include "pair_kokkos.h" #include "pair_eam.h" #include "neigh_list_kokkos.h" @@ -47,7 +48,7 @@ template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG> struct TagPairEAMKernelC{}; template<class DeviceType> -class PairEAMKokkos : public PairEAM { +class PairEAMKokkos : public PairEAM, public KokkosBase { public: enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; enum {COUL_FLAG=0}; @@ -57,7 +58,7 @@ class PairEAMKokkos : public PairEAM { PairEAMKokkos(class LAMMPS *); virtual ~PairEAMKokkos(); - virtual void compute(int, int); + void compute(int, int); void init_style(); void *extract(const char *, int &) { return NULL; } @@ -104,11 +105,11 @@ class PairEAMKokkos : public PairEAM { const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, const F_FLOAT &dely, const F_FLOAT &delz) const; - virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, - int, int *); - virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); - virtual int pack_forward_comm(int, int *, double *, int, int *); - virtual void unpack_forward_comm(int, int, double *); + int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d&, + int, int *); + void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d&); + int pack_forward_comm(int, int *, double *, int, int *); + void unpack_forward_comm(int, int, double *); int pack_reverse_comm(int, int, double *); void unpack_reverse_comm(int, int *, double *); @@ -146,7 +147,7 @@ class PairEAMKokkos : public PairEAM { t_ffloat_2d_n7_randomread d_z2r_spline; void interpolate(int, double, double *, t_host_ffloat_2d_n7, int); - virtual void file2array(); + void file2array(); void array2spline(); typename AT::t_neighbors_2d d_neighbors; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 8d65be23af00e7c37e4ce2ad0f1f781a822fc6c4..a09a1d27e34c544c15c79de45ff28ee6a2650e54 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -26,13 +26,15 @@ #include "neigh_list.h" #include "math_const.h" #include "math_special_kokkos.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "modify.h" #include "fix.h" #include <float.h> #include "atom_masks.h" #include "neigh_request.h" +#include "atom_kokkos.h" +#include "kokkos.h" using namespace LAMMPS_NS; using namespace MathConst; @@ -89,18 +91,18 @@ PairExp6rxKokkos<DeviceType>::~PairExp6rxKokkos() { if (copymode) return; - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); - memory->destroy_kokkos(k_cutsq,cutsq); + memoryKK->destroy_kokkos(k_cutsq,cutsq); for (int i=0; i < nparams; ++i) { delete[] params[i].name; delete[] params[i].potential; } - memory->destroy_kokkos(k_params,params); + memoryKK->destroy_kokkos(k_params,params); - memory->destroy_kokkos(k_mol2param,mol2param); + memoryKK->destroy_kokkos(k_mol2param,mol2param); } /* ---------------------------------------------------------------------- */ @@ -151,13 +153,13 @@ void PairExp6rxKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.template view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.template view<DeviceType>(); } @@ -1660,7 +1662,7 @@ void PairExp6rxKokkos<DeviceType>::allocate() for (int j = i; j <= ntypes; j++) setflag[i][j] = 0; - memory->create_kokkos(k_cutsq,cutsq,ntypes+1,ntypes+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,ntypes+1,ntypes+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_cutsq.template modify<LMPHostType>(); @@ -1697,7 +1699,7 @@ void PairExp6rxKokkos<DeviceType>::read_file(char *file) int params_per_line = 5; char **words = new char*[params_per_line+1]; - memory->destroy_kokkos(k_params,params); + memoryKK->destroy_kokkos(k_params,params); params = NULL; nparams = maxparam = 0; @@ -1777,7 +1779,7 @@ void PairExp6rxKokkos<DeviceType>::read_file(char *file) if (nparams == maxparam) { k_params.template modify<LMPHostType>(); maxparam += DELTA; - memory->grow_kokkos(k_params,params,maxparam, + memoryKK->grow_kokkos(k_params,params,maxparam, "pair:params"); } @@ -1816,8 +1818,8 @@ void PairExp6rxKokkos<DeviceType>::setup() // set mol2param for all combinations // must be a single exact match to lines read from file - memory->destroy_kokkos(k_mol2param,mol2param); - memory->create_kokkos(k_mol2param,mol2param,nspecies,"pair:mol2param"); + memoryKK->destroy_kokkos(k_mol2param,mol2param); + memoryKK->create_kokkos(k_mol2param,mol2param,nspecies,"pair:mol2param"); for (i = 0; i < nspecies; i++) { n = -1; diff --git a/src/KOKKOS/pair_hybrid_kokkos.cpp b/src/KOKKOS/pair_hybrid_kokkos.cpp index 337b56c6cee7e11e3a03dcd810bd62e161bb6813..db757f6ce1847806d266de13037d99b6489732a3 100644 --- a/src/KOKKOS/pair_hybrid_kokkos.cpp +++ b/src/KOKKOS/pair_hybrid_kokkos.cpp @@ -23,10 +23,11 @@ #include "neigh_request.h" #include "update.h" #include "comm.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "respa.h" #include "atom_masks.h" +#include "kokkos.h" using namespace LAMMPS_NS; diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index f0e357270c1d5b8340c570f4a1d5be4f26d36a27..4c215ed16287d54ed3237aa678612c74c5faabf0 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -20,6 +20,7 @@ #include "Kokkos_Macros.hpp" #include "pair.h" +#include "neighbor_kokkos.h" #include "neigh_list_kokkos.h" #include "Kokkos_Vectorization.hpp" diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp index e7cf7ba42ad80ba7166322a2dd1999c3e3c19f3c..7d395cb3b464746f7efeb9e52232f122dcfb8b4d 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_implicit_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -73,8 +73,8 @@ template<class DeviceType> PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::~PairLJCharmmCoulCharmmImplicitKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); k_cut_ljsq = DAT::tdual_ffloat_2d(); k_cut_coulsq = DAT::tdual_ffloat_2d(); @@ -118,13 +118,13 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::compute(int eflag_in, int // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -315,14 +315,14 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); //memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); - memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCharmmCoulCharmmImplicit::params",n+1,n+1); diff --git a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp index a456d6e27600c36911da43be21f8151a91943148..2663d71a1febe0aced1bacec03b1c80ff6136715 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_charmm_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -73,8 +73,8 @@ template<class DeviceType> PairLJCharmmCoulCharmmKokkos<DeviceType>::~PairLJCharmmCoulCharmmKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); k_cut_ljsq = DAT::tdual_ffloat_2d(); k_cut_coulsq = DAT::tdual_ffloat_2d(); @@ -118,13 +118,13 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_i // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -316,14 +316,14 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); //memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); - memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCharmmCoulCharmm::params",n+1,n+1); diff --git a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp index dffbbb638ff0d1f25819d99b504e58d2d9eae300..81271c7d8ab7b7341601d1141e51c3f5edb5351b 100644 --- a/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_charmm_coul_long_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -73,8 +73,8 @@ template<class DeviceType> PairLJCharmmCoulLongKokkos<DeviceType>::~PairLJCharmmCoulLongKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); k_cut_ljsq = DAT::tdual_ffloat_2d(); k_cut_coulsq = DAT::tdual_ffloat_2d(); @@ -118,13 +118,13 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -336,14 +336,14 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); //memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); - memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCharmmCoulLong::params",n+1,n+1); diff --git a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp index 0081aca4f15ddf196ceb6d104ce75798fa55c736..3f355b1b160bf34df464b833cf1c938e76f5a17e 100644 --- a/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_coul_cut_kokkos.cpp @@ -27,7 +27,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -60,9 +60,9 @@ template<class DeviceType> PairLJClass2CoulCutKokkos<DeviceType>::~PairLJClass2CoulCutKokkos() { if (!copymode) { - memory->destroy_kokkos(k_cutsq, cutsq); - memory->destroy_kokkos(k_cut_ljsq, cut_ljsq); - memory->destroy_kokkos(k_cut_coulsq, cut_coulsq); + memoryKK->destroy_kokkos(k_cutsq, cutsq); + memoryKK->destroy_kokkos(k_cut_ljsq, cut_ljsq); + memoryKK->destroy_kokkos(k_cut_coulsq, cut_coulsq); } } @@ -95,13 +95,13 @@ void PairLJClass2CoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -255,13 +255,13 @@ void PairLJClass2CoulCutKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); memory->destroy(cut_coulsq); - memory->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJClass2CoulCut::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp index b5dc358feb3fed379c7236e164a0f15d0104e60b..47976ec610aecdef5d6c41bd9d612533e6bc16aa 100644 --- a/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_coul_long_kokkos.cpp @@ -27,7 +27,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -69,8 +69,8 @@ template<class DeviceType> PairLJClass2CoulLongKokkos<DeviceType>::~PairLJClass2CoulLongKokkos() { if (!copymode){ - memory->destroy_kokkos(k_cutsq, cutsq); - memory->destroy_kokkos(k_cut_ljsq, cut_ljsq); + memoryKK->destroy_kokkos(k_cutsq, cutsq); + memoryKK->destroy_kokkos(k_cut_ljsq, cut_ljsq); } } @@ -103,13 +103,13 @@ void PairLJClass2CoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -307,13 +307,13 @@ void PairLJClass2CoulLongKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); - memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJClass2CoulLong::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_lj_class2_kokkos.cpp b/src/KOKKOS/pair_lj_class2_kokkos.cpp index 34cc15279b4d7a2708fc5c537d14ea44fe4e0dca..5beb520c00feb1971f5248e4ab31a7442133faa7 100644 --- a/src/KOKKOS/pair_lj_class2_kokkos.cpp +++ b/src/KOKKOS/pair_lj_class2_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -95,13 +95,13 @@ void PairLJClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -200,7 +200,7 @@ void PairLJClass2Kokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>("PairLJClass2::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp index c3fda01ce63244bfa591deb80c23e4327288c68a..86e37bea77a6ffadf9ab082a36005ebca6da8aba 100644 --- a/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_cut_kokkos.cpp @@ -27,7 +27,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -60,9 +60,9 @@ template<class DeviceType> PairLJCutCoulCutKokkos<DeviceType>::~PairLJCutCoulCutKokkos() { if (allocated){ - memory->destroy_kokkos(k_cutsq, cutsq); - memory->destroy_kokkos(k_cut_ljsq, cut_ljsq); - memory->destroy_kokkos(k_cut_coulsq, cut_coulsq); + memoryKK->destroy_kokkos(k_cutsq, cutsq); + memoryKK->destroy_kokkos(k_cut_ljsq, cut_ljsq); + memoryKK->destroy_kokkos(k_cut_coulsq, cut_coulsq); } } @@ -95,13 +95,13 @@ void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -247,13 +247,13 @@ void PairLJCutCoulCutKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); memory->destroy(cut_coulsq); - memory->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulCut::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp index b7a71cb99ac3d7f22ace36d5537a1dcb920dd60b..76e488c68667e4ac7b19150ba70e876ebc81fe7e 100644 --- a/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_debye_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -64,9 +64,9 @@ template<class DeviceType> PairLJCutCoulDebyeKokkos<DeviceType>::~PairLJCutCoulDebyeKokkos() { if (!copymode) { - memory->destroy_kokkos(k_cutsq, cutsq); - memory->destroy_kokkos(k_cut_ljsq, cut_ljsq); - memory->destroy_kokkos(k_cut_coulsq, cut_coulsq); + memoryKK->destroy_kokkos(k_cutsq, cutsq); + memoryKK->destroy_kokkos(k_cut_ljsq, cut_ljsq); + memoryKK->destroy_kokkos(k_cut_coulsq, cut_coulsq); } } @@ -99,13 +99,13 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -261,13 +261,13 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); memory->destroy(cut_coulsq); - memory->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulDebye::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp index 9df59636769a25dc3d512f36d4912486cb43be66..0da5e7f5f511432e5b8e81036caed9ec779dc992 100644 --- a/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_dsf_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -72,9 +72,9 @@ template<class DeviceType> PairLJCutCoulDSFKokkos<DeviceType>::~PairLJCutCoulDSFKokkos() { if (!copymode) { - memory->destroy_kokkos(k_cutsq, cutsq); - memory->destroy_kokkos(k_cut_ljsq, cut_ljsq); - //memory->destroy_kokkos(k_cut_coulsq, cut_coulsq); + memoryKK->destroy_kokkos(k_cutsq, cutsq); + memoryKK->destroy_kokkos(k_cut_ljsq, cut_ljsq); + //memoryKK->destroy_kokkos(k_cut_coulsq, cut_coulsq); } } @@ -107,13 +107,13 @@ void PairLJCutCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -279,13 +279,13 @@ void PairLJCutCoulDSFKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); //memory->destroy(cut_coulsq); - memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulDSF::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp index 9bd79c7341a7116d74eb789ace8f5a984564278c..77a604534e2ae907a16db82ed013c26af2ec1042 100644 --- a/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_coul_long_kokkos.cpp @@ -27,7 +27,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -68,13 +68,13 @@ PairLJCutCoulLongKokkos<DeviceType>::PairLJCutCoulLongKokkos(LAMMPS *lmp):PairLJ template<class DeviceType> PairLJCutCoulLongKokkos<DeviceType>::~PairLJCutCoulLongKokkos() { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); eatom = NULL; vatom = NULL; if (allocated){ - memory->destroy_kokkos(k_cutsq, cutsq); - memory->destroy_kokkos(k_cut_ljsq, cut_ljsq); + memoryKK->destroy_kokkos(k_cutsq, cutsq); + memoryKK->destroy_kokkos(k_cut_ljsq, cut_ljsq); } } @@ -108,13 +108,13 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -304,13 +304,13 @@ void PairLJCutCoulLongKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); - memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj_coul**,Kokkos::LayoutRight,DeviceType>("PairLJCutCoulLong::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_lj_cut_kokkos.cpp b/src/KOKKOS/pair_lj_cut_kokkos.cpp index c90d5ad11c5295083eaaa4516c6f108665280def..2a228fb168be97184fda12d80b012745b286fa80 100644 --- a/src/KOKKOS/pair_lj_cut_kokkos.cpp +++ b/src/KOKKOS/pair_lj_cut_kokkos.cpp @@ -27,7 +27,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -57,8 +57,8 @@ template<class DeviceType> PairLJCutKokkos<DeviceType>::~PairLJCutKokkos() { if (allocated) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); memory->sfree(cutsq); eatom = NULL; @@ -95,13 +95,13 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -195,7 +195,7 @@ void PairLJCutKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>("PairLJCut::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_lj_expand_kokkos.cpp b/src/KOKKOS/pair_lj_expand_kokkos.cpp index 95ec252ad5fdebf3d4829abc524927d17ffa6eef..aca7202b78be66a04ce7443e7eb4f34c34a6a6e0 100644 --- a/src/KOKKOS/pair_lj_expand_kokkos.cpp +++ b/src/KOKKOS/pair_lj_expand_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -94,13 +94,13 @@ void PairLJExpandKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -202,7 +202,7 @@ void PairLJExpandKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>("PairLJExpand::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp index 4b21b08eb352ad95c8806bff4ae3175204eaf767..bf269288e076b3c69edf2cbff706426740a5e9f6 100644 --- a/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp +++ b/src/KOKKOS/pair_lj_gromacs_coul_gromacs_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -64,8 +64,8 @@ template<class DeviceType> PairLJGromacsCoulGromacsKokkos<DeviceType>::~PairLJGromacsCoulGromacsKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); k_cut_ljsq = DAT::tdual_ffloat_2d(); k_cut_coulsq = DAT::tdual_ffloat_2d(); @@ -109,13 +109,13 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -299,14 +299,14 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); //memory->destroy(cut_ljsq); - memory->create_kokkos(k_cut_ljsq,n+1,n+1,"pair:cut_ljsq"); + memoryKK->create_kokkos(k_cut_ljsq,n+1,n+1,"pair:cut_ljsq"); d_cut_ljsq = k_cut_ljsq.template view<DeviceType>(); - memory->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); + memoryKK->create_kokkos(k_cut_coulsq,n+1,n+1,"pair:cut_coulsq"); d_cut_coulsq = k_cut_coulsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj_coul_gromacs**,Kokkos::LayoutRight,DeviceType>("PairLJGromacsCoulGromacs::params",n+1,n+1); diff --git a/src/KOKKOS/pair_lj_gromacs_kokkos.cpp b/src/KOKKOS/pair_lj_gromacs_kokkos.cpp index 73a17d7b22f577fc31e4d6f3271ff4bab1b7694a..f24ff718c1042992bf47e18a6ce12e54ec7c169f 100644 --- a/src/KOKKOS/pair_lj_gromacs_kokkos.cpp +++ b/src/KOKKOS/pair_lj_gromacs_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -64,8 +64,8 @@ template<class DeviceType> PairLJGromacsKokkos<DeviceType>::~PairLJGromacsKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); k_cut_inner_sq = DAT::tdual_ffloat_2d(); memory->sfree(cutsq); @@ -106,13 +106,13 @@ void PairLJGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -240,15 +240,15 @@ void PairLJGromacsKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); memory->destroy(cut_inner); - memory->create_kokkos(k_cut_inner,cut_inner,n+1,n+1,"pair:cut_inner"); + memoryKK->create_kokkos(k_cut_inner,cut_inner,n+1,n+1,"pair:cut_inner"); d_cut_inner = k_cut_inner.template view<DeviceType>(); memory->destroy(cut_inner_sq); - memory->create_kokkos(k_cut_inner_sq,cut_inner_sq,n+1,n+1,"pair:cut_inner_sq"); + memoryKK->create_kokkos(k_cut_inner_sq,cut_inner_sq,n+1,n+1,"pair:cut_inner_sq"); d_cut_inner_sq = k_cut_inner_sq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>("PairLJGromacs::params",n+1,n+1); diff --git a/src/KOKKOS/pair_lj_sdk_kokkos.cpp b/src/KOKKOS/pair_lj_sdk_kokkos.cpp index 2063f62b203546f4dfa18fd505b8e64ed7148968..aa579d5dc12995475731589da686ee4271867db9 100644 --- a/src/KOKKOS/pair_lj_sdk_kokkos.cpp +++ b/src/KOKKOS/pair_lj_sdk_kokkos.cpp @@ -27,7 +27,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -94,13 +94,13 @@ void PairLJSDKKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -231,7 +231,7 @@ void PairLJSDKKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_lj**,Kokkos::LayoutRight,DeviceType>("PairLJSDK::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_morse_kokkos.cpp b/src/KOKKOS/pair_morse_kokkos.cpp index 08a9b82640b4ea439c67457210d6baf31cb34027..5768d7e42cedba7cbdfb16bec74a765af657b837 100644 --- a/src/KOKKOS/pair_morse_kokkos.cpp +++ b/src/KOKKOS/pair_morse_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -61,8 +61,8 @@ template<class DeviceType> PairMorseKokkos<DeviceType>::~PairMorseKokkos() { if (allocated) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); k_cutsq = DAT::tdual_ffloat_2d(); memory->sfree(cutsq); eatom = NULL; @@ -99,13 +99,13 @@ void PairMorseKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -209,7 +209,7 @@ void PairMorseKokkos<DeviceType>::allocate() int n = atom->ntypes; memory->destroy(cutsq); - memory->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_params = Kokkos::DualView<params_morse**,Kokkos::LayoutRight,DeviceType>("PairMorse::params",n+1,n+1); params = k_params.template view<DeviceType>(); diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp index d9a4f1ab83643ebd27f692c6290e436079c91522..0961cf44eb09ea1211ab7898c9c6532cbd518a14 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.cpp @@ -31,13 +31,14 @@ #include "force.h" #include "comm.h" #include "neigh_list.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "citeme.h" #include "modify.h" #include "fix.h" #include "atom_masks.h" #include "neigh_request.h" +#include "kokkos.h" using namespace LAMMPS_NS; @@ -80,10 +81,10 @@ PairMultiLucyRXKokkos<DeviceType>::~PairMultiLucyRXKokkos() { if (copymode) return; - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); - memory->destroy_kokkos(k_cutsq,cutsq); + memoryKK->destroy_kokkos(k_cutsq,cutsq); delete h_table; delete d_table; @@ -153,13 +154,13 @@ void PairMultiLucyRXKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.template view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.template view<DeviceType>(); } @@ -864,20 +865,20 @@ void PairMultiLucyRXKokkos<DeviceType>::create_kokkos_tables() { const int tlm1 = tablength-1; - memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq"); - memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); + memoryKK->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq"); + memoryKK->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); if(tabstyle == LOOKUP) { - memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e"); - memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e"); + memoryKK->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f"); } if(tabstyle == LINEAR) { - memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); - memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); - memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); - memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); - memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df"); + memoryKK->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memoryKK->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); + memoryKK->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); + memoryKK->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df"); } for(int i=0; i < ntables; i++) { @@ -931,11 +932,11 @@ void PairMultiLucyRXKokkos<DeviceType>::allocate() memory->create(setflag,nt,nt,"pair:setflag"); - memory->create_kokkos(k_cutsq,cutsq,nt,nt,"pair:cutsq"); + memoryKK->create_kokkos(k_cutsq,cutsq,nt,nt,"pair:cutsq"); d_cutsq = k_cutsq.template view<DeviceType>(); k_cutsq.template modify<LMPHostType>(); - memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); + memoryKK->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); d_table_const.tabindex = d_table->tabindex; memset(&setflag[0][0],0,nt*nt*sizeof(int)); diff --git a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h index b8ced4c847f6088efe98c09f50ea706626531883..aee1763b06e25c019a996c8b1c0c2b5df399188a 100644 --- a/src/KOKKOS/pair_multi_lucy_rx_kokkos.h +++ b/src/KOKKOS/pair_multi_lucy_rx_kokkos.h @@ -25,6 +25,7 @@ PairStyle(multi/lucy/rx/kk/host,PairMultiLucyRXKokkos<LMPHostType>) #include "pair_multi_lucy_rx.h" #include "pair_kokkos.h" +#include "kokkos_base.h" #include "kokkos_type.h" namespace LAMMPS_NS { @@ -43,7 +44,7 @@ template<int NEIGHFLAG, int NEWTON_PAIR, bool ONE_TYPE> struct TagPairMultiLucyRXComputeLocalDensity{}; template<class DeviceType> -class PairMultiLucyRXKokkos : public PairMultiLucyRX { +class PairMultiLucyRXKokkos : public PairMultiLucyRX, public KokkosBase { public: typedef DeviceType device_type; typedef ArrayTypes<DeviceType> AT; diff --git a/src/KOKKOS/pair_reaxc_kokkos.cpp b/src/KOKKOS/pair_reaxc_kokkos.cpp index d5f83f45373d9e4d32d6032f0fe343489077e885..1f596377679fcac252660addc37109675da0260e 100644 --- a/src/KOKKOS/pair_reaxc_kokkos.cpp +++ b/src/KOKKOS/pair_reaxc_kokkos.cpp @@ -32,12 +32,13 @@ #include "respa.h" #include "math_const.h" #include "math_special.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" #include "reaxc_defs.h" #include "reaxc_lookup.h" #include "reaxc_tool_box.h" +#include "modify.h" #define TEAMSIZE 128 @@ -81,12 +82,12 @@ PairReaxCKokkos<DeviceType>::~PairReaxCKokkos() { if (copymode) return; - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); - memory->destroy_kokkos(k_tmpid,tmpid); + memoryKK->destroy_kokkos(k_tmpid,tmpid); tmpid = NULL; - memory->destroy_kokkos(k_tmpbo,tmpbo); + memoryKK->destroy_kokkos(k_tmpbo,tmpbo); tmpbo = NULL; } @@ -1339,10 +1340,10 @@ void PairReaxCKokkos<DeviceType>::allocate_array() // FixReaxCSpecies if (fixspecies_flag) { - memory->destroy_kokkos(k_tmpid,tmpid); - memory->destroy_kokkos(k_tmpbo,tmpbo); - memory->create_kokkos(k_tmpid,tmpid,nmax,MAXSPECBOND,"pair:tmpid"); - memory->create_kokkos(k_tmpbo,tmpbo,nmax,MAXSPECBOND,"pair:tmpbo"); + memoryKK->destroy_kokkos(k_tmpid,tmpid); + memoryKK->destroy_kokkos(k_tmpbo,tmpbo); + memoryKK->create_kokkos(k_tmpid,tmpid,nmax,MAXSPECBOND,"pair:tmpid"); + memoryKK->create_kokkos(k_tmpbo,tmpbo,nmax,MAXSPECBOND,"pair:tmpbo"); } // FixReaxCBonds @@ -1448,6 +1449,8 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxBuildListsFull, const int & } } + if (rsq > cut_bosq) continue; + // bond_list const F_FLOAT rij = sqrt(rsq); const F_FLOAT p_bo1 = paramstwbp(itype,jtype).p_bo1; @@ -1635,6 +1638,8 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxBuildListsHalf<NEIGHFLAG>, } } + if (rsq > cut_bosq) continue; + // bond_list const F_FLOAT rij = sqrt(rsq); const F_FLOAT p_bo1 = paramstwbp(itype,jtype).p_bo1; @@ -1856,6 +1861,8 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxBuildListsHalf_LessAtomics< } } + if (rsq > cut_bosq) continue; + // bond_list const F_FLOAT rij = sqrt(rsq); const F_FLOAT p_bo1 = paramstwbp(itype,jtype).p_bo1; @@ -3905,14 +3912,14 @@ void PairReaxCKokkos<DeviceType>::ev_setup(int eflag, int vflag) if (eflag_atom && atom->nmax > maxeatom) { maxeatom = atom->nmax; - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); v_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom && atom->nmax > maxvatom) { maxvatom = atom->nmax; - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); v_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_sw_kokkos.cpp b/src/KOKKOS/pair_sw_kokkos.cpp index 3440f7c63923da0467c61b702c1184ab3cb1856c..63b4c19ae1d7c40d4e3234562b6b7eb8c54e9470 100644 --- a/src/KOKKOS/pair_sw_kokkos.cpp +++ b/src/KOKKOS/pair_sw_kokkos.cpp @@ -27,10 +27,10 @@ #include "neigh_request.h" #include "force.h" #include "comm.h" -#include "memory.h" +#include "memory_kokkos.h" #include "neighbor.h" #include "neigh_list_kokkos.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" #include "math_const.h" @@ -63,8 +63,8 @@ template<class DeviceType> PairSWKokkos<DeviceType>::~PairSWKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); eatom = NULL; vatom = NULL; } @@ -86,13 +86,13 @@ void PairSWKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_table_kokkos.cpp b/src/KOKKOS/pair_table_kokkos.cpp index 7f763baae6fe7b40eb4830d243c0b714a6bcf2b1..b3e80d1f664e012555a826834df2d6b78ec7bba4 100644 --- a/src/KOKKOS/pair_table_kokkos.cpp +++ b/src/KOKKOS/pair_table_kokkos.cpp @@ -27,7 +27,7 @@ #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -92,13 +92,13 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -265,41 +265,41 @@ void PairTableKokkos<DeviceType>::create_kokkos_tables() { const int tlm1 = tablength-1; - memory->create_kokkos(d_table->nshiftbits,h_table->nshiftbits,ntables,"Table::nshiftbits"); - memory->create_kokkos(d_table->nmask,h_table->nmask,ntables,"Table::nmask"); - memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq"); - memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); - memory->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6"); + memoryKK->create_kokkos(d_table->nshiftbits,h_table->nshiftbits,ntables,"Table::nshiftbits"); + memoryKK->create_kokkos(d_table->nmask,h_table->nmask,ntables,"Table::nmask"); + memoryKK->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq"); + memoryKK->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); + memoryKK->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6"); if(tabstyle == LOOKUP) { - memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e"); - memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e"); + memoryKK->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f"); } if(tabstyle == LINEAR) { - memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); - memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); - memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); - memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); - memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df"); + memoryKK->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memoryKK->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); + memoryKK->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); + memoryKK->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df"); } if(tabstyle == SPLINE) { - memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); - memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); - memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); - memory->create_kokkos(d_table->e2,h_table->e2,ntables,tablength,"Table::e2"); - memory->create_kokkos(d_table->f2,h_table->f2,ntables,tablength,"Table::f2"); + memoryKK->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memoryKK->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); + memoryKK->create_kokkos(d_table->e2,h_table->e2,ntables,tablength,"Table::e2"); + memoryKK->create_kokkos(d_table->f2,h_table->f2,ntables,tablength,"Table::f2"); } if(tabstyle == BITMAP) { int ntable = 1 << tablength; - memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,ntable,"Table::rsq"); - memory->create_kokkos(d_table->e,h_table->e,ntables,ntable,"Table::e"); - memory->create_kokkos(d_table->f,h_table->f,ntables,ntable,"Table::f"); - memory->create_kokkos(d_table->de,h_table->de,ntables,ntable,"Table::de"); - memory->create_kokkos(d_table->df,h_table->df,ntables,ntable,"Table::df"); - memory->create_kokkos(d_table->drsq,h_table->drsq,ntables,ntable,"Table::drsq"); + memoryKK->create_kokkos(d_table->rsq,h_table->rsq,ntables,ntable,"Table::rsq"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,ntable,"Table::e"); + memoryKK->create_kokkos(d_table->f,h_table->f,ntables,ntable,"Table::f"); + memoryKK->create_kokkos(d_table->de,h_table->de,ntables,ntable,"Table::de"); + memoryKK->create_kokkos(d_table->df,h_table->df,ntables,ntable,"Table::df"); + memoryKK->create_kokkos(d_table->drsq,h_table->drsq,ntables,ntable,"Table::drsq"); } @@ -410,8 +410,8 @@ void PairTableKokkos<DeviceType>::allocate() const int nt = atom->ntypes + 1; memory->create(setflag,nt,nt,"pair:setflag"); - memory->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq"); - memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); + memoryKK->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq"); + memoryKK->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); d_table_const.cutsq = d_table->cutsq; d_table_const.tabindex = d_table->tabindex; diff --git a/src/KOKKOS/pair_table_rx_kokkos.cpp b/src/KOKKOS/pair_table_rx_kokkos.cpp index 2f5a670537ce05f528d1c789c3070292cacd8c80..7bc5198d8cef6e8bbfcf40ba24e4d0bb575b9cec 100644 --- a/src/KOKKOS/pair_table_rx_kokkos.cpp +++ b/src/KOKKOS/pair_table_rx_kokkos.cpp @@ -27,11 +27,13 @@ #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" #include "fix.h" #include "kokkos_few.h" +#include "kokkos.h" +#include "modify.h" #include <cassert> using namespace LAMMPS_NS; @@ -162,12 +164,12 @@ PairTableRXKokkos<DeviceType>::~PairTableRXKokkos() delete [] site1; delete [] site2; - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); if (allocated) { - memory->destroy_kokkos(d_table->cutsq, cutsq); - memory->destroy_kokkos(d_table->tabindex, tabindex); + memoryKK->destroy_kokkos(d_table->cutsq, cutsq); + memoryKK->destroy_kokkos(d_table->tabindex, tabindex); } delete h_table; @@ -621,13 +623,13 @@ void PairTableRXKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in) else evflag = vflag_fdotr = 0; if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.template view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.template view<DeviceType>(); } @@ -798,41 +800,41 @@ void PairTableRXKokkos<DeviceType>::create_kokkos_tables() { const int tlm1 = tablength-1; - memory->create_kokkos(d_table->nshiftbits,h_table->nshiftbits,ntables,"Table::nshiftbits"); - memory->create_kokkos(d_table->nmask,h_table->nmask,ntables,"Table::nmask"); - memory->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq"); - memory->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); - memory->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6"); + memoryKK->create_kokkos(d_table->nshiftbits,h_table->nshiftbits,ntables,"Table::nshiftbits"); + memoryKK->create_kokkos(d_table->nmask,h_table->nmask,ntables,"Table::nmask"); + memoryKK->create_kokkos(d_table->innersq,h_table->innersq,ntables,"Table::innersq"); + memoryKK->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta"); + memoryKK->create_kokkos(d_table->deltasq6,h_table->deltasq6,ntables,"Table::deltasq6"); if(tabstyle == LOOKUP) { - memory->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e"); - memory->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tlm1,"Table::e"); + memoryKK->create_kokkos(d_table->f,h_table->f,ntables,tlm1,"Table::f"); } if(tabstyle == LINEAR) { - memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); - memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); - memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); - memory->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); - memory->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df"); + memoryKK->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memoryKK->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); + memoryKK->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de"); + memoryKK->create_kokkos(d_table->df,h_table->df,ntables,tlm1,"Table::df"); } if(tabstyle == SPLINE) { - memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); - memory->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); - memory->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); - memory->create_kokkos(d_table->e2,h_table->e2,ntables,tablength,"Table::e2"); - memory->create_kokkos(d_table->f2,h_table->f2,ntables,tablength,"Table::f2"); + memoryKK->create_kokkos(d_table->rsq,h_table->rsq,ntables,tablength,"Table::rsq"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e"); + memoryKK->create_kokkos(d_table->f,h_table->f,ntables,tablength,"Table::f"); + memoryKK->create_kokkos(d_table->e2,h_table->e2,ntables,tablength,"Table::e2"); + memoryKK->create_kokkos(d_table->f2,h_table->f2,ntables,tablength,"Table::f2"); } if(tabstyle == BITMAP) { int ntable = 1 << tablength; - memory->create_kokkos(d_table->rsq,h_table->rsq,ntables,ntable,"Table::rsq"); - memory->create_kokkos(d_table->e,h_table->e,ntables,ntable,"Table::e"); - memory->create_kokkos(d_table->f,h_table->f,ntables,ntable,"Table::f"); - memory->create_kokkos(d_table->de,h_table->de,ntables,ntable,"Table::de"); - memory->create_kokkos(d_table->df,h_table->df,ntables,ntable,"Table::df"); - memory->create_kokkos(d_table->drsq,h_table->drsq,ntables,ntable,"Table::drsq"); + memoryKK->create_kokkos(d_table->rsq,h_table->rsq,ntables,ntable,"Table::rsq"); + memoryKK->create_kokkos(d_table->e,h_table->e,ntables,ntable,"Table::e"); + memoryKK->create_kokkos(d_table->f,h_table->f,ntables,ntable,"Table::f"); + memoryKK->create_kokkos(d_table->de,h_table->de,ntables,ntable,"Table::de"); + memoryKK->create_kokkos(d_table->df,h_table->df,ntables,ntable,"Table::df"); + memoryKK->create_kokkos(d_table->drsq,h_table->drsq,ntables,ntable,"Table::drsq"); } @@ -943,8 +945,8 @@ void PairTableRXKokkos<DeviceType>::allocate() const int nt = atom->ntypes + 1; memory->create(setflag,nt,nt,"pair:setflag"); - memory->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq"); - memory->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); + memoryKK->create_kokkos(d_table->cutsq,h_table->cutsq,cutsq,nt,nt,"pair:cutsq"); + memoryKK->create_kokkos(d_table->tabindex,h_table->tabindex,tabindex,nt,nt,"pair:tabindex"); d_table_const.cutsq = d_table->cutsq; d_table_const.tabindex = d_table->tabindex; diff --git a/src/KOKKOS/pair_tersoff_kokkos.cpp b/src/KOKKOS/pair_tersoff_kokkos.cpp index 3a5c2227efe1b9f71e9b1277096882e86027336e..c585da60297b4a852b50ab19d00a060edaaf6f83 100644 --- a/src/KOKKOS/pair_tersoff_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -60,8 +60,8 @@ template<class DeviceType> PairTersoffKokkos<DeviceType>::~PairTersoffKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -170,13 +170,13 @@ void PairTersoffKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp index 9a59979f4c9d4d0279f5b830d0e66bcb1af39148..8e718fbf6f16df5017468b116c1b140a82eb0b4c 100644 --- a/src/KOKKOS/pair_tersoff_mod_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_mod_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -60,8 +60,8 @@ template<class DeviceType> PairTersoffMODKokkos<DeviceType>::~PairTersoffMODKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -170,13 +170,13 @@ void PairTersoffMODKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp index 8468bb01f5fc545d177ad637de70ae8514087693..11a5ff100753ec02dafa657dd481222bfaf7875c 100644 --- a/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp +++ b/src/KOKKOS/pair_tersoff_zbl_kokkos.cpp @@ -31,7 +31,7 @@ #include "integrate.h" #include "respa.h" #include "math_const.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" @@ -71,8 +71,8 @@ template<class DeviceType> PairTersoffZBLKokkos<DeviceType>::~PairTersoffZBLKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); } } @@ -184,13 +184,13 @@ void PairTersoffZBLKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_vashishta_kokkos.cpp b/src/KOKKOS/pair_vashishta_kokkos.cpp index fe2394ae84a9fea6847c5f22eef598c25719b95a..e7dd01bfe4b48e903149db04ab8bb3b7af6cd3b6 100644 --- a/src/KOKKOS/pair_vashishta_kokkos.cpp +++ b/src/KOKKOS/pair_vashishta_kokkos.cpp @@ -27,10 +27,10 @@ #include "neigh_request.h" #include "force.h" #include "comm.h" -#include "memory.h" +#include "memory_kokkos.h" #include "neighbor.h" #include "neigh_list_kokkos.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" #include "math_const.h" @@ -62,8 +62,8 @@ template<class DeviceType> PairVashishtaKokkos<DeviceType>::~PairVashishtaKokkos() { if (!copymode) { - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); eatom = NULL; vatom = NULL; } @@ -85,13 +85,13 @@ void PairVashishtaKokkos<DeviceType>::compute(int eflag_in, int vflag_in) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } diff --git a/src/KOKKOS/pair_yukawa_kokkos.cpp b/src/KOKKOS/pair_yukawa_kokkos.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6560ec9684b3f88abb0143aeb39ca791de02affb --- /dev/null +++ b/src/KOKKOS/pair_yukawa_kokkos.cpp @@ -0,0 +1,301 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Stefan Paquay (Brandeis University) +------------------------------------------------------------------------- */ +#include <math.h> +#include <stdlib.h> +#include "pair_yukawa_kokkos.h" +#include "kokkos.h" +#include "atom_kokkos.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "update.h" +#include "integrate.h" +#include "respa.h" +#include "math_const.h" +#include "memory_kokkos.h" +#include "error.h" +#include "atom_masks.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define KOKKOS_CUDA_MAX_THREADS 256 +#define KOKKOS_CUDA_MIN_BLOCKS 8 + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairYukawaKokkos<DeviceType>::PairYukawaKokkos(LAMMPS *lmp) : PairYukawa(lmp) +{ + respa_enable = 0; + + atomKK = (AtomKokkos *) atom; + execution_space = ExecutionSpaceFromDevice<DeviceType>::space; + datamask_read = X_MASK | F_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK; + datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK; + cutsq = NULL; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +PairYukawaKokkos<DeviceType>::~PairYukawaKokkos() +{ + if (allocated) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); + k_cutsq = DAT::tdual_ffloat_2d(); + memory->sfree(cutsq); + eatom = NULL; + vatom = NULL; + cutsq = NULL; + } +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairYukawaKokkos<DeviceType>::cleanup_copy() { + // WHY needed: this prevents parent copy from deallocating any arrays + allocated = 0; + cutsq = NULL; + eatom = NULL; + vatom = NULL; +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairYukawaKokkos<DeviceType>::allocate() +{ + PairYukawa::allocate(); + + int n = atom->ntypes; + memory->destroy(cutsq); + memoryKK->create_kokkos(k_cutsq,cutsq,n+1,n+1,"pair:cutsq"); + d_cutsq = k_cutsq.template view<DeviceType>(); + k_params = Kokkos::DualView<params_yukawa**, + Kokkos::LayoutRight,DeviceType>( + "PairYukawa::params",n+1,n+1); + + params = k_params.template view<DeviceType>(); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +template<class DeviceType> +void PairYukawaKokkos<DeviceType>::init_style() +{ + PairYukawa::init_style(); + + // error if rRESPA with inner levels + + if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { + int respa = 0; + if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; + if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; + if (respa) + error->all(FLERR,"Cannot use Kokkos pair style with rRESPA inner/middle"); + } + + // irequest = neigh request made by parent class + + neighflag = lmp->kokkos->neighflag; + int irequest = neighbor->nrequest - 1; + + neighbor->requests[irequest]-> + kokkos_host = Kokkos::Impl::is_same<DeviceType,LMPHostType>::value && + !Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + neighbor->requests[irequest]-> + kokkos_device = Kokkos::Impl::is_same<DeviceType,LMPDeviceType>::value; + + if (neighflag == FULL) { + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->half = 0; + } else if (neighflag == HALF || neighflag == HALFTHREAD) { + neighbor->requests[irequest]->full = 0; + neighbor->requests[irequest]->half = 1; + } else { + error->all(FLERR,"Cannot use chosen neighbor list style with yukawa/kk"); + } +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ +// Rewrite this. +template<class DeviceType> +double PairYukawaKokkos<DeviceType>::init_one(int i, int j) +{ + double cutone = PairYukawa::init_one(i,j); + + k_params.h_view(i,j).a = a[i][j]; + k_params.h_view(i,j).offset = offset[i][j]; + k_params.h_view(i,j).cutsq = cutone*cutone; + k_params.h_view(j,i) = k_params.h_view(i,j); + + if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) { + m_params[i][j] = m_params[j][i] = k_params.h_view(i,j); + m_cutsq[j][i] = m_cutsq[i][j] = cutone*cutone; + } + + k_cutsq.h_view(i,j) = k_cutsq.h_view(j,i) = cutone*cutone; + k_cutsq.template modify<LMPHostType>(); + k_params.template modify<LMPHostType>(); + + return cutone; +} + +/* ---------------------------------------------------------------------- */ + +template<class DeviceType> +void PairYukawaKokkos<DeviceType>::compute(int eflag_in, int vflag_in) +{ + eflag = eflag_in; + vflag = vflag_in; + + + if (neighflag == FULL) no_virial_fdotr_compute = 1; + + if (eflag || vflag) ev_setup(eflag,vflag,0); + else evflag = vflag_fdotr = 0; + + // reallocate per-atom arrays if necessary + + if (eflag_atom) { + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + d_eatom = k_eatom.view<DeviceType>(); + } + if (vflag_atom) { + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + d_vatom = k_vatom.view<DeviceType>(); + } + + atomKK->sync(execution_space,datamask_read); + k_cutsq.template sync<DeviceType>(); + k_params.template sync<DeviceType>(); + if (eflag || vflag) atomKK->modified(execution_space,datamask_modify); + else atomKK->modified(execution_space,F_MASK); + + x = atomKK->k_x.view<DeviceType>(); + c_x = atomKK->k_x.view<DeviceType>(); + f = atomKK->k_f.view<DeviceType>(); + type = atomKK->k_type.view<DeviceType>(); + tag = atomKK->k_tag.view<DeviceType>(); + nlocal = atom->nlocal; + nall = atom->nlocal + atom->nghost; + newton_pair = force->newton_pair; + special_lj[0] = force->special_lj[0]; + special_lj[1] = force->special_lj[1]; + special_lj[2] = force->special_lj[2]; + special_lj[3] = force->special_lj[3]; + + // loop over neighbors of my atoms + + EV_FLOAT ev = pair_compute<PairYukawaKokkos<DeviceType>,void >( + this,(NeighListKokkos<DeviceType>*)list); + + if (eflag_global) eng_vdwl += ev.evdwl; + if (vflag_global) { + virial[0] += ev.v[0]; + virial[1] += ev.v[1]; + virial[2] += ev.v[2]; + virial[3] += ev.v[3]; + virial[4] += ev.v[4]; + virial[5] += ev.v[5]; + } + + if (vflag_fdotr) pair_virial_fdotr_compute(this); + + if (eflag_atom) { + k_eatom.template modify<DeviceType>(); + k_eatom.template sync<LMPHostType>(); + } + + if (vflag_atom) { + k_vatom.template modify<DeviceType>(); + k_vatom.template sync<LMPHostType>(); + } +} + + + +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairYukawaKokkos<DeviceType>:: +compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + (void) i; + (void) j; + const F_FLOAT rr = sqrt(rsq); + // Fetch the params either off the stack or from some mapped memory? + const F_FLOAT aa = STACKPARAMS ? m_params[itype][jtype].a + : params(itype,jtype).a; + + // U = a * exp(-kappa*r) / r + // f = (kappa * a * exp(-kappa*r) / r + a*exp(-kappa*r)/r^2)*grad(r) + // = (kappa + 1/r) * (a * exp(-kappa*r) / r) + // f/r = (kappa + 1/r) * (a * exp(-kappa*r) / r^2) + const F_FLOAT rinv = 1.0 / rr; + const F_FLOAT rinv2 = rinv*rinv; + const F_FLOAT screening = exp(-kappa*rr); + const F_FLOAT forceyukawa = aa * screening * (kappa + rinv); + const F_FLOAT fpair = forceyukawa * rinv2; + + return fpair; +} + +template<class DeviceType> +template<bool STACKPARAMS, class Specialisation> +KOKKOS_INLINE_FUNCTION +F_FLOAT PairYukawaKokkos<DeviceType>:: +compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const { + (void) i; + (void) j; + const F_FLOAT rr = sqrt(rsq); + const F_FLOAT aa = STACKPARAMS ? m_params[itype][jtype].a + : params(itype,jtype).a; + const F_FLOAT offset = STACKPARAMS ? m_params[itype][jtype].offset + : params(itype,jtype).offset; + + // U = a * exp(-kappa*r) / r + // f = (kappa * a * exp(-kappa*r) / r + a*exp(-kappa*r)/r^2)*grad(r) + // = (kappa + 1/r) * (a * exp(-kappa*r) / r) + // f/r = (kappa + 1/r) * (a * exp(-kappa*r) / r^2) + const F_FLOAT rinv = 1.0 / rr; + const F_FLOAT screening = exp(-kappa*rr); + + return aa * screening * rinv - offset; +} + + +namespace LAMMPS_NS { +template class PairYukawaKokkos<LMPDeviceType>; +#ifdef KOKKOS_HAVE_CUDA +template class PairYukawaKokkos<LMPHostType>; +#endif +} diff --git a/src/KOKKOS/pair_yukawa_kokkos.h b/src/KOKKOS/pair_yukawa_kokkos.h new file mode 100644 index 0000000000000000000000000000000000000000..a4c8cf05b781ddbfd7ad65c53eb900aba7e4a9a0 --- /dev/null +++ b/src/KOKKOS/pair_yukawa_kokkos.h @@ -0,0 +1,146 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(yukawa/kk,PairYukawaKokkos<LMPDeviceType>) +PairStyle(yukawa/kk/device,PairYukawaKokkos<LMPDeviceType>) +PairStyle(yukawa/kk/host,PairYukawaKokkos<LMPHostType>) + +#else + +#ifndef LMP_PAIR_YUKAWA_KOKKOS_H +#define LMP_PAIR_YUKAWA_KOKKOS_H + +#include "pair_kokkos.h" +#include "pair_yukawa.h" +#include "neigh_list_kokkos.h" + +namespace LAMMPS_NS { + +template<class DeviceType> +class PairYukawaKokkos : public PairYukawa { + public: + enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF}; + enum {COUL_FLAG=0}; + typedef DeviceType device_type; + typedef ArrayTypes<DeviceType> AT; + + PairYukawaKokkos(class LAMMPS *); + virtual ~PairYukawaKokkos(); + + void compute(int, int); + void init_style(); + double init_one(int,int); + + struct params_yukawa { + KOKKOS_INLINE_FUNCTION + params_yukawa(){ cutsq=0, a = 0; offset = 0; } + KOKKOS_INLINE_FUNCTION + params_yukawa(int i){ cutsq=0, a = 0; offset = 0; } + F_FLOAT cutsq, a, offset; + }; + + + protected: + void cleanup_copy(); + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_fpair(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_evdwl(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const; + + template<bool STACKPARAMS, class Specialisation> + KOKKOS_INLINE_FUNCTION + F_FLOAT compute_ecoul(const F_FLOAT& rsq, const int& i, const int&j, + const int& itype, const int& jtype) const + { + return 0; + } + + + Kokkos::DualView<params_yukawa**,Kokkos::LayoutRight,DeviceType> k_params; + typename Kokkos::DualView<params_yukawa**,Kokkos::LayoutRight,DeviceType>::t_dev_const_um params; + params_yukawa m_params[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + F_FLOAT m_cutsq[MAX_TYPES_STACKPARAMS+1][MAX_TYPES_STACKPARAMS+1]; + typename AT::t_x_array_randomread x; + typename AT::t_x_array c_x; + typename AT::t_f_array f; + typename AT::t_int_1d_randomread type; + + DAT::tdual_efloat_1d k_eatom; + DAT::tdual_virial_array k_vatom; + typename AT::t_efloat_1d d_eatom; + typename AT::t_virial_array d_vatom; + typename AT::t_tagint_1d tag; + + int newton_pair; + double special_lj[4]; + + typename AT::tdual_ffloat_2d k_cutsq; + typename AT::t_ffloat_2d d_cutsq; + + + int neighflag; + int nlocal,nall,eflag,vflag; + + void allocate(); + friend class PairComputeFunctor<PairYukawaKokkos,FULL,true>; + friend class PairComputeFunctor<PairYukawaKokkos,HALF,true>; + friend class PairComputeFunctor<PairYukawaKokkos,HALFTHREAD,true>; + friend class PairComputeFunctor<PairYukawaKokkos,N2,true>; + friend class PairComputeFunctor<PairYukawaKokkos,FULL,false>; + friend class PairComputeFunctor<PairYukawaKokkos,HALF,false>; + friend class PairComputeFunctor<PairYukawaKokkos,HALFTHREAD,false>; + friend class PairComputeFunctor<PairYukawaKokkos,N2,false>; + friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,FULL,void>( + PairYukawaKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALF,void>( + PairYukawaKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,HALFTHREAD,void>( + PairYukawaKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute_neighlist<PairYukawaKokkos,N2,void>( + PairYukawaKokkos*,NeighListKokkos<DeviceType>*); + friend EV_FLOAT pair_compute<PairYukawaKokkos,void>( + PairYukawaKokkos*,NeighListKokkos<DeviceType>*); + friend void pair_virial_fdotr_compute<PairYukawaKokkos>(PairYukawaKokkos*); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Cannot use chosen neighbor list style with yukawa/kk + +That style is not supported by Kokkos. + +*/ diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index bd3ed3644f53373305a995382144a6d5c27a5432..cf6e2814c0e0bb56f5a19b2a2481ea019259e114 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -32,9 +32,10 @@ #include "domain.h" #include "fft3d_wrap.h" #include "remap_wrap.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" #include "atom_masks.h" +#include "kokkos.h" #include "math_const.h" #include "math_special_kokkos.h" @@ -162,8 +163,8 @@ PPPMKokkos<DeviceType>::~PPPMKokkos() //memory->destroy(part2grid); //memory->destroy(acons); - memory->destroy_kokkos(k_eatom,eatom); - memory->destroy_kokkos(k_vatom,vatom); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->destroy_kokkos(k_vatom,vatom); eatom = NULL; vatom = NULL; } @@ -618,13 +619,13 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag) // reallocate per-atom arrays if necessary if (eflag_atom) { - memory->destroy_kokkos(k_eatom,eatom); - memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); + memoryKK->destroy_kokkos(k_eatom,eatom); + memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom"); d_eatom = k_eatom.view<DeviceType>(); } if (vflag_atom) { - memory->destroy_kokkos(k_vatom,vatom); - memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); + memoryKK->destroy_kokkos(k_vatom,vatom); + memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom"); d_vatom = k_vatom.view<DeviceType>(); } @@ -805,12 +806,12 @@ void PPPMKokkos<DeviceType>::allocate() { d_density_brick = typename AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - memory->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); + memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view<DeviceType>(); d_greensfn = typename AT::t_float_1d("pppm:greensfn",nfft_both); - memory->create_kokkos(k_work1,work1,2*nfft_both,"pppm:work1"); - memory->create_kokkos(k_work2,work2,2*nfft_both,"pppm:work2"); + memoryKK->create_kokkos(k_work1,work1,2*nfft_both,"pppm:work1"); + memoryKK->create_kokkos(k_work2,work2,2*nfft_both,"pppm:work2"); d_work1 = k_work1.view<DeviceType>(); d_work2 = k_work2.view<DeviceType>(); d_vg = typename AT::t_virial_array("pppm:vg",nfft_both); @@ -878,13 +879,13 @@ void PPPMKokkos<DeviceType>::allocate() template<class DeviceType> void PPPMKokkos<DeviceType>::deallocate() { - memory->destroy_kokkos(d_density_fft,density_fft); + memoryKK->destroy_kokkos(d_density_fft,density_fft); density_fft = NULL; - memory->destroy_kokkos(d_greensfn,greensfn); + memoryKK->destroy_kokkos(d_greensfn,greensfn); greensfn = NULL; - memory->destroy_kokkos(d_work1,work1); + memoryKK->destroy_kokkos(d_work1,work1); work1 = NULL; - memory->destroy_kokkos(d_work2,work2); + memoryKK->destroy_kokkos(d_work2,work2); work2 = NULL; delete fft1; @@ -2631,7 +2632,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_fieldforce_peratom, const int &i ------------------------------------------------------------------------- */ template<class DeviceType> -void PPPMKokkos<DeviceType>::pack_forward_kokkos(int flag, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos<DeviceType>::pack_forward_kspace_kokkos(int flag, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view<DeviceType>(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2687,7 +2688,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_pack_forward2, const int &i) con ------------------------------------------------------------------------- */ template<class DeviceType> -void PPPMKokkos<DeviceType>::unpack_forward_kokkos(int flag, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos<DeviceType>::unpack_forward_kspace_kokkos(int flag, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view<DeviceType>(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2744,7 +2745,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_unpack_forward2, const int &i) c ------------------------------------------------------------------------- */ template<class DeviceType> -void PPPMKokkos<DeviceType>::pack_reverse_kokkos(int flag, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos<DeviceType>::pack_reverse_kspace_kokkos(int flag, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view<DeviceType>(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); @@ -2774,7 +2775,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_pack_reverse, const int &i) cons ------------------------------------------------------------------------- */ template<class DeviceType> -void PPPMKokkos<DeviceType>::unpack_reverse_kokkos(int flag, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) +void PPPMKokkos<DeviceType>::unpack_reverse_kspace_kokkos(int flag, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &k_buf, int nlist, DAT::tdual_int_2d &k_list, int index) { typename AT::t_int_2d_um d_list = k_list.view<DeviceType>(); d_list_index = Kokkos::subview(d_list,index,Kokkos::ALL()); diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index 4e6bb1d74c293a74397b94dcc355b2972216186b..c328b488d0260e5e4c45993f6b83ee4846a392e9 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -24,6 +24,7 @@ KSpaceStyle(pppm/kk/host,PPPMKokkos<LMPHostType>) #include "pppm.h" #include "gridcomm_kokkos.h" +#include "kokkos_base.h" #include "kokkos_type.h" namespace LAMMPS_NS { @@ -86,7 +87,7 @@ struct TagPPPM_slabcorr4{}; struct TagPPPM_timing_zero{}; template<class DeviceType> -class PPPMKokkos : public PPPM { +class PPPMKokkos : public PPPM, public KokkosBase { public: typedef DeviceType device_type; typedef ArrayTypes<DeviceType> AT; @@ -379,10 +380,10 @@ class PPPMKokkos : public PPPM { // grid communication - virtual void pack_forward_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int); - virtual void unpack_forward_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int); - virtual void pack_reverse_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int); - virtual void unpack_reverse_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int); + void pack_forward_kspace_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int); + void unpack_forward_kspace_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int); + void pack_reverse_kspace_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int); + void unpack_reverse_kspace_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int); // triclinic diff --git a/src/KOKKOS/region_block_kokkos.h b/src/KOKKOS/region_block_kokkos.h index e14ac4d0c0db75fe98f3e7525f43d8505ab30d7e..532bc588e29746f16a427d44264c39d8f48b0608 100644 --- a/src/KOKKOS/region_block_kokkos.h +++ b/src/KOKKOS/region_block_kokkos.h @@ -23,6 +23,7 @@ RegionStyle(block/kk/host,RegBlockKokkos<LMPHostType>) #define LMP_REGION_BLOCK_KOKKOS_H #include "region_block.h" +#include "kokkos_base.h" #include "kokkos_type.h" namespace LAMMPS_NS { @@ -30,7 +31,7 @@ namespace LAMMPS_NS { struct TagRegBlockMatchAll{}; template<class DeviceType> -class RegBlockKokkos : public RegBlock { +class RegBlockKokkos : public RegBlock, public KokkosBase { friend class FixPour; public: diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index adec5ff1bd9d7520c2c469638d497759375c362a..5fa03a098925430d50f5dd09bc59612398a9d759 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -32,8 +32,9 @@ #include "compute.h" #include "fix.h" #include "timer.h" -#include "memory.h" +#include "memory_kokkos.h" #include "error.h" +#include "kokkos.h" #include <ctime> diff --git a/src/Makefile b/src/Makefile index e0f0db77fe3d5eea0ed95b04541047344919ee45..c3c84b366575930279960c624dec871a42869342 100644 --- a/src/Makefile +++ b/src/Makefile @@ -17,12 +17,12 @@ SHLINK = liblammps.so OBJDIR = Obj_$@ OBJSHDIR = Obj_shared_$@ -SRC = $(wildcard *.cpp) -INC = $(wildcard *.h) +SRC = $(filter-out library.cpp,$(wildcard *.cpp)) +INC = $(filter-out library.h,$(wildcard *.h)) OBJ = $(SRC:.cpp=.o) -SRCLIB = $(filter-out main.cpp,$(SRC)) -OBJLIB = $(filter-out main.o,$(OBJ)) +SRCLIB = $(filter-out main.cpp,$(SRC)) library.cpp +OBJLIB = $(filter-out main.o,$(OBJ)) library.o # Command-line options for mode: exe (default), shexe, lib, shlib @@ -176,7 +176,7 @@ help: then cp Makefile.package.settings.empty Makefile.package.settings; fi @cp Makefile.package Makefile.package.settings $(objdir) @cd $(objdir); rm -f .depend; \ - $(MAKE) $(MFLAGS) "SRC = $(SRC)" "INC = $(INC)" depend || : + $(MAKE) $(MFLAGS) "SRC = $(SRC) library.cpp" "INC = $(INC) library.h" depend || : ifeq ($(mode),exe) @cd $(objdir); \ $(MAKE) $(MFLAGS) "OBJ = $(OBJ)" "INC = $(INC)" "SHFLAGS =" \ diff --git a/src/Purge.list b/src/Purge.list index 312994fdb7cf413464728e45dfec655a7b6feb95..93808b784ca58254f1c2076f47da5b773de3ea51 100644 --- a/src/Purge.list +++ b/src/Purge.list @@ -16,6 +16,9 @@ style_region.h style_neigh_bin.h style_neigh_pair.h style_neigh_stencil.h +# deleted on 1 December 2017 +npair_half_bin_newtoff_intel.cpp +npair_half_bin_newtoff_intel.h # deleted on 11 October 2017 fix_shear_history_omp.cpp fix_shear_history_omp.h diff --git a/src/USER-DRUDE/pair_lj_cut_thole_long.cpp b/src/USER-DRUDE/pair_lj_cut_thole_long.cpp index 4163a816ac9bcd8662fd25facb1200019071cd76..f78ced3d8bd96caaa35798db21f1bce5f19a05e2 100644 --- a/src/USER-DRUDE/pair_lj_cut_thole_long.cpp +++ b/src/USER-DRUDE/pair_lj_cut_thole_long.cpp @@ -32,6 +32,8 @@ #include "math_const.h" #include "memory.h" #include "error.h" +#include "modify.h" +#include "domain.h" using namespace LAMMPS_NS; using namespace MathConst; diff --git a/src/USER-DRUDE/pair_thole.cpp b/src/USER-DRUDE/pair_thole.cpp index abb37b82b766581aafadb5519f76169ca8289491..ace119ce523892b0ad7a8d52c0288f0bc0f725b4 100644 --- a/src/USER-DRUDE/pair_thole.cpp +++ b/src/USER-DRUDE/pair_thole.cpp @@ -25,6 +25,8 @@ #include "error.h" #include "fix.h" #include "fix_store.h" +#include "domain.h" +#include "modify.h" using namespace LAMMPS_NS; diff --git a/src/USER-INTEL/angle_charmm_intel.cpp b/src/USER-INTEL/angle_charmm_intel.cpp index 031c9642000c22a6aa7f4156db319de5409fce6e..bcaecb4696fa1d93e277d473e931b268298c5153 100644 --- a/src/USER-INTEL/angle_charmm_intel.cpp +++ b/src/USER-INTEL/angle_charmm_intel.cpp @@ -23,6 +23,7 @@ #include "domain.h" #include "comm.h" #include "force.h" +#include "modify.h" #include "math_const.h" #include "memory.h" #include "suffix.h" diff --git a/src/USER-INTEL/angle_harmonic_intel.cpp b/src/USER-INTEL/angle_harmonic_intel.cpp index 84220277d7a736131c30fa37d79fd93364e0c4ca..ffc81c496d77c654f35d6f0e1ad71a39a06844ee 100644 --- a/src/USER-INTEL/angle_harmonic_intel.cpp +++ b/src/USER-INTEL/angle_harmonic_intel.cpp @@ -23,6 +23,7 @@ #include "domain.h" #include "comm.h" #include "force.h" +#include "modify.h" #include "math_const.h" #include "memory.h" #include "suffix.h" diff --git a/src/USER-INTEL/bond_fene_intel.cpp b/src/USER-INTEL/bond_fene_intel.cpp index 93d64ed631f5466baec735c52a2770160b2eb8fb..004a2e5413d6d566cb536b9d4290e80540315e81 100644 --- a/src/USER-INTEL/bond_fene_intel.cpp +++ b/src/USER-INTEL/bond_fene_intel.cpp @@ -19,6 +19,7 @@ #include <stdlib.h> #include "bond_fene_intel.h" #include "atom.h" +#include "modify.h" #include "neighbor.h" #include "domain.h" #include "comm.h" diff --git a/src/USER-INTEL/bond_harmonic_intel.cpp b/src/USER-INTEL/bond_harmonic_intel.cpp index 0ac466f11386c99bef29b95a33e086349389239c..0c714edc0959519621b624d19dd6e79b5b081977 100644 --- a/src/USER-INTEL/bond_harmonic_intel.cpp +++ b/src/USER-INTEL/bond_harmonic_intel.cpp @@ -19,6 +19,7 @@ #include <stdlib.h> #include "bond_harmonic_intel.h" #include "atom.h" +#include "modify.h" #include "neighbor.h" #include "domain.h" #include "comm.h" diff --git a/src/USER-INTEL/dihedral_charmm_intel.cpp b/src/USER-INTEL/dihedral_charmm_intel.cpp index 0e13e92251594d04348d98f2200c3501a5b29fd5..c8429dc8ff7079930f00459c33ad85674ebaf222 100644 --- a/src/USER-INTEL/dihedral_charmm_intel.cpp +++ b/src/USER-INTEL/dihedral_charmm_intel.cpp @@ -21,6 +21,7 @@ #include "atom.h" #include "comm.h" #include "memory.h" +#include "modify.h" #include "neighbor.h" #include "domain.h" #include "force.h" diff --git a/src/USER-INTEL/dihedral_fourier_intel.cpp b/src/USER-INTEL/dihedral_fourier_intel.cpp index 805ffc0e256177f294bc8f2b118e778c4cd7054e..772ea5b02fdf4a84838bd7a59a90def0b8a3d107 100644 --- a/src/USER-INTEL/dihedral_fourier_intel.cpp +++ b/src/USER-INTEL/dihedral_fourier_intel.cpp @@ -21,6 +21,7 @@ #include "atom.h" #include "comm.h" #include "memory.h" +#include "modify.h" #include "neighbor.h" #include "domain.h" #include "force.h" diff --git a/src/USER-INTEL/dihedral_harmonic_intel.cpp b/src/USER-INTEL/dihedral_harmonic_intel.cpp index 5d16b0da745425abd66617ab50043ef796d78db3..b35ea4e03a9b67a3b619028c0c559b304cbbaf07 100644 --- a/src/USER-INTEL/dihedral_harmonic_intel.cpp +++ b/src/USER-INTEL/dihedral_harmonic_intel.cpp @@ -21,6 +21,7 @@ #include "atom.h" #include "comm.h" #include "memory.h" +#include "modify.h" #include "neighbor.h" #include "domain.h" #include "force.h" diff --git a/src/USER-INTEL/dihedral_opls_intel.cpp b/src/USER-INTEL/dihedral_opls_intel.cpp index e290ab90616b94df0135264dcbbf66fe8546a36a..6b7b2c81ebd75bace530cbde5dd9bee1f0981c57 100644 --- a/src/USER-INTEL/dihedral_opls_intel.cpp +++ b/src/USER-INTEL/dihedral_opls_intel.cpp @@ -21,6 +21,7 @@ #include "atom.h" #include "comm.h" #include "memory.h" +#include "modify.h" #include "neighbor.h" #include "domain.h" #include "force.h" diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp index eac48b8510b4aa67a5e93f689ae1381510e02b57..3e36c8f7a937b0e254389c4099897a62aa9ac236 100644 --- a/src/USER-INTEL/fix_intel.cpp +++ b/src/USER-INTEL/fix_intel.cpp @@ -32,6 +32,7 @@ #include <string.h> #include <stdlib.h> #include <stdio.h> +#include <math.h> #ifdef _LMP_INTEL_OFFLOAD #ifndef INTEL_OFFLOAD_NOAFFINITY diff --git a/src/USER-INTEL/nbin_intel.cpp b/src/USER-INTEL/nbin_intel.cpp index 3a36ead499ebe34a1dfcce5350abbd1f13fb8d6b..9a1dae36ca465b08d8cbbf63407597306b810553 100644 --- a/src/USER-INTEL/nbin_intel.cpp +++ b/src/USER-INTEL/nbin_intel.cpp @@ -18,8 +18,9 @@ #include "nbin_intel.h" #include "atom.h" #include "group.h" -#include "domain.h" #include "comm.h" +#include "domain.h" +#include "modify.h" #include "update.h" #include "error.h" diff --git a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp index e6d45d7b2c6941fa87c24cd089ee46863b4828ee..a814891f2599ad73fcac0d8db7064e56ce173168 100644 --- a/src/USER-INTEL/npair_full_bin_ghost_intel.cpp +++ b/src/USER-INTEL/npair_full_bin_ghost_intel.cpp @@ -21,6 +21,8 @@ #include "neigh_list.h" #include "atom.h" #include "atom_vec.h" +#include "comm.h" +#include "domain.h" #include "molecule.h" #include "error.h" diff --git a/src/USER-INTEL/npair_full_bin_intel.cpp b/src/USER-INTEL/npair_full_bin_intel.cpp index 06c10c080fd9e506760c0d1e66c93ff76233ee7c..60b912d796146d2c9a7f7a15d116afdc430c20ff 100644 --- a/src/USER-INTEL/npair_full_bin_intel.cpp +++ b/src/USER-INTEL/npair_full_bin_intel.cpp @@ -20,6 +20,7 @@ #include "neigh_list.h" #include "atom.h" #include "comm.h" +#include "domain.h" #include "group.h" using namespace LAMMPS_NS; diff --git a/src/USER-INTEL/npair_half_bin_newton_intel.cpp b/src/USER-INTEL/npair_half_bin_newton_intel.cpp index c761557097cf96b677dc20a355a94e186ac5d8c5..8c024a46046827985db3f8047446ad06df61eba1 100644 --- a/src/USER-INTEL/npair_half_bin_newton_intel.cpp +++ b/src/USER-INTEL/npair_half_bin_newton_intel.cpp @@ -20,6 +20,7 @@ #include "neigh_list.h" #include "atom.h" #include "comm.h" +#include "domain.h" #include "group.h" using namespace LAMMPS_NS; diff --git a/src/USER-INTEL/npair_half_bin_newton_tri_intel.cpp b/src/USER-INTEL/npair_half_bin_newton_tri_intel.cpp index d70f1ec5897a0d7b1e8a7b5b9633f9882ce2c54f..653a95139498dbe1d34b5c8d77ca706664e02c56 100644 --- a/src/USER-INTEL/npair_half_bin_newton_tri_intel.cpp +++ b/src/USER-INTEL/npair_half_bin_newton_tri_intel.cpp @@ -20,6 +20,7 @@ #include "neigh_list.h" #include "atom.h" #include "comm.h" +#include "domain.h" #include "group.h" using namespace LAMMPS_NS; diff --git a/src/USER-INTEL/npair_intel.cpp b/src/USER-INTEL/npair_intel.cpp index 0068e02635c7d6d7bd145fe45a35bade50f64f0c..234557c941c1ce7d765e745fb918d6a42297c033 100644 --- a/src/USER-INTEL/npair_intel.cpp +++ b/src/USER-INTEL/npair_intel.cpp @@ -15,6 +15,10 @@ Contributing author: W. Michael Brown (Intel) ------------------------------------------------------------------------- */ +#include "comm.h" +#include "domain.h" +#include "timer.h" +#include "modify.h" #include "npair_intel.h" #include "nstencil.h" diff --git a/src/USER-INTEL/pair_eam_intel.cpp b/src/USER-INTEL/pair_eam_intel.cpp index b97128bf9ff434fbc71c02d204c25128e9583a20..ea4ee30d52602440b06b81603a3d93897b72cba3 100644 --- a/src/USER-INTEL/pair_eam_intel.cpp +++ b/src/USER-INTEL/pair_eam_intel.cpp @@ -23,6 +23,7 @@ #include "atom.h" #include "force.h" #include "comm.h" +#include "modify.h" #include "neighbor.h" #include "neigh_list.h" #include "neigh_request.h" diff --git a/src/USER-INTEL/pppm_disp_intel.cpp b/src/USER-INTEL/pppm_disp_intel.cpp index 1269579ff4f26addaa08d00c19ff433626d5a089..bd41f8b531cc18be1e6cf79376de773a52c9d7c3 100644 --- a/src/USER-INTEL/pppm_disp_intel.cpp +++ b/src/USER-INTEL/pppm_disp_intel.cpp @@ -20,7 +20,10 @@ #include <math.h> #include "pppm_disp_intel.h" #include "atom.h" +#include "comm.h" +#include "domain.h" #include "error.h" +#include "modify.h" #include "fft3d_wrap.h" #include "gridcomm.h" #include "math_const.h" diff --git a/src/USER-INTEL/pppm_intel.cpp b/src/USER-INTEL/pppm_intel.cpp index db855b75ef8f877303ae6186e12fc59314729397..30f8f4c5c0677890115a5f5668a557f2053bb054 100644 --- a/src/USER-INTEL/pppm_intel.cpp +++ b/src/USER-INTEL/pppm_intel.cpp @@ -23,7 +23,10 @@ #include <math.h> #include "pppm_intel.h" #include "atom.h" +#include "comm.h" +#include "domain.h" #include "error.h" +#include "modify.h" #include "fft3d_wrap.h" #include "gridcomm.h" #include "math_const.h" diff --git a/src/USER-MANIFOLD/manifold_plane_wiggle.cpp b/src/USER-MANIFOLD/manifold_plane_wiggle.cpp index fd50e774dafb08e7a3be7689ed1081e2fd3a7cef..136c52ab361a2023ee2442fc038039a8d2c1a7d0 100644 --- a/src/USER-MANIFOLD/manifold_plane_wiggle.cpp +++ b/src/USER-MANIFOLD/manifold_plane_wiggle.cpp @@ -24,5 +24,5 @@ void manifold_plane_wiggle::n( const double *x, double *n ) double w = params[1]; n[2] = 1; n[1] = 0.0; - n[0] = -a*w*cos(x[0]); + n[0] = -a*w*cos(w*x[0]); } diff --git a/src/USER-MISC/README b/src/USER-MISC/README index 5af5b22eb7889609834a4449bb5a118e75789c86..a8c33fa38056454194ff2346c9bd2d4822019200 100644 --- a/src/USER-MISC/README +++ b/src/USER-MISC/README @@ -63,6 +63,7 @@ pair_style coul/diel, Axel Kohlmeyer, akohlmey at gmail.com, 1 Dec 11 pair_style dipole/sf, Mario Orsi, orsimario at gmail.com, 8 Aug 11 pair_style edip, Luca Ferraro, luca.ferraro at caspur.it, 15 Sep 11 pair_style eam/cd, Alexander Stukowski, stukowski at mm.tu-darmstadt.de, 7 Nov 09 +pair_style extep, Jaap Kroes (Radboud U), jaapkroes at gmail dot com, 28 Nov 17 pair_style gauss/cut, Axel Kohlmeyer, akohlmey at gmail.com, 1 Dec 11 pair_style lennard/mdf, Paolo Raiteri, p.raiteri at curtin.edu.au, 2 Dec 15 pair_style list, Axel Kohlmeyer (Temple U), akohlmey at gmail.com, 1 Jun 13 diff --git a/src/USER-MISC/pair_extep.cpp b/src/USER-MISC/pair_extep.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cf859a28f8380fbfb65dd2b72092486f7d5941f3 --- /dev/null +++ b/src/USER-MISC/pair_extep.cpp @@ -0,0 +1,1189 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Jan Los +------------------------------------------------------------------------- */ + +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "pair_extep.h" +#include "atom.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "neigh_request.h" +#include "my_page.h" +#include "force.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define MAXLINE 1024 +#define DELTA 4 +#define PGDELTA 1 + +/* ---------------------------------------------------------------------- */ + +PairExTeP::PairExTeP(LAMMPS *lmp) : Pair(lmp) +{ + single_enable = 0; + restartinfo = 0; + one_coeff = 1; + manybody_flag = 1; + ghostneigh = 1; + + nelements = 0; + elements = NULL; + nparams = maxparam = 0; + params = NULL; + elem2param = NULL; + + maxlocal = 0; + SR_numneigh = NULL; + SR_firstneigh = NULL; + ipage = NULL; + pgsize = oneatom = 0; + map = NULL; + + Nt = NULL; + Nd = NULL; +} + +/* ---------------------------------------------------------------------- + check if allocated, since class can be destructed when incomplete +------------------------------------------------------------------------- */ + +PairExTeP::~PairExTeP() +{ + if (elements) + for (int i = 0; i < nelements; i++) delete [] elements[i]; + delete [] elements; + memory->destroy(params); + memory->destroy(elem2param); + + memory->destroy(SR_numneigh); + memory->sfree(SR_firstneigh); + delete [] ipage; + memory->destroy(Nt); + memory->destroy(Nd); + + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + memory->destroy(cutghost); + delete [] map; + } +} + +/* ---------------------------------------------------------------------- + create SR neighbor list from main neighbor list + SR neighbor list stores neighbors of ghost atoms +------------------------------------------------------------------------- */ + +void PairExTeP::SR_neigh() +{ + int i,j,ii,jj,n,allnum,jnum,itype,jtype,iparam_ij; + double xtmp,ytmp,ztmp,delx,dely,delz,rsq; + int *ilist,*jlist,*numneigh,**firstneigh; + int *neighptr; + + double **x = atom->x; + int *type = atom->type; + + if (atom->nmax > maxlocal) { // ensure there is enough space + maxlocal = atom->nmax; // for atoms and ghosts allocated + memory->destroy(SR_numneigh); + memory->sfree(SR_firstneigh); + memory->destroy(Nt); + memory->destroy(Nd); + memory->create(SR_numneigh,maxlocal,"ExTeP:numneigh"); + SR_firstneigh = (int **) memory->smalloc(maxlocal*sizeof(int *), + "ExTeP:firstneigh"); + memory->create(Nt,maxlocal,"ExTeP:Nt"); + memory->create(Nd,maxlocal,"ExTeP:Nd"); + } + + allnum = list->inum + list->gnum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // store all SR neighs of owned and ghost atoms + // scan full neighbor list of I + + ipage->reset(); + + for (ii = 0; ii < allnum; ii++) { + i = ilist[ii]; + itype=map[type[i]]; + + n = 0; + neighptr = ipage->vget(); + + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + Nt[i] = 0.0; + Nd[i] = 0.0; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + jtype=map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + if (rsq < params[iparam_ij].cutsq) { + neighptr[n++] = j; + double tmp_fc = ters_fc(sqrt(rsq),¶ms[iparam_ij]); + Nt[i] += tmp_fc; + if (itype!=jtype) { + Nd[i] += tmp_fc; + } + } + } + //printf("SR_neigh : N[%d] = %f\n",i,N[i]); + + ipage->vgot(n); + if (ipage->status()) + error->one(FLERR,"Neighbor list overflow, boost neigh_modify one"); + } +} + + +/* ---------------------------------------------------------------------- */ + +void PairExTeP::compute(int eflag, int vflag) +{ + int i,j,k,ii,jj,kk,inum,jnum; + int itype,jtype,ktype,iparam_ij,iparam_ijk; + tagint itag,jtag; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,rsq1,rsq2,r2; + double delr1[3],delr2[3],fi[3],fj[3],fk[3]; + double zeta_ij,prefactor; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = vflag_atom = 0; + + SR_neigh(); + + double **x = atom->x; + double **f = atom->f; + tagint *tag = atom->tag; + int *type = atom->type; + int nlocal = atom->nlocal; + int newton_pair = force->newton_pair; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over full neighbor list of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + + // two-body interactions, skip half of them + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j][2] < x[i][2]) continue; + if (x[j][2] == ztmp && x[j][1] < ytmp) continue; + if (x[j][2] == ztmp && x[j][1] == ytmp && x[j][0] < xtmp) continue; + } + + jtype = map[type[j]]; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + + iparam_ij = elem2param[itype][jtype][jtype]; + if (rsq > params[iparam_ij].cutsq) continue; + + repulsive(¶ms[iparam_ij],rsq,fpair,eflag,evdwl); + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,0.0,fpair,delx,dely,delz); + } + + // three-body interactions -(bij + Fcorrection) * fA + // skip immediately if I-J is not within cutoff + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + delr1[0] = x[j][0] - xtmp; + delr1[1] = x[j][1] - ytmp; + delr1[2] = x[j][2] - ztmp; + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + if (rsq1 > params[iparam_ij].cutsq) continue; + + // accumulate bondorder zeta for each i-j interaction via loop over k + + zeta_ij = 0.0; + + /* F_IJ (1) */ + // compute correction to energy and forces + // dE/dr = -Fij(Zi,Zj) dV/dr + // - dFij/dZi dZi/dr V + // (conjugate term is computed when j is a central atom) + + double FXY, dFXY_dNdij, dFXY_dNdji, fa, fa_d, deng, fpair; + double Ntij = Nt[i]; + double Ndij = Nd[i]; + double Ntji = Nt[j]; + double Ndji = Nd[j]; + double r = sqrt(rsq1); + double fc_ij = ters_fc(r,¶ms[iparam_ij]); + + Ntij -= fc_ij; + Ntji -= fc_ij; + if (jtype!=itype) { + Ndij -= fc_ij; + Ndji -= fc_ij; + } + if (Ntij<0) { Ntij=0.; } + if (Ndij<0) { Ndij=0.; } + if (Ntji<0) { Ntji=0.; } + if (Ndji<0) { Ndji=0.; } + FXY = F_corr(itype, jtype, Ndij, Ndji, &dFXY_dNdij, &dFXY_dNdji); + + // envelop functions + double fenv, dfenv_ij; + fenv = envelop_function(Ntij, Ntji, &dfenv_ij); + // + double Fc = fenv * FXY; + double dFc_dNtij = dfenv_ij * FXY; + double dFc_dNdij = fenv * dFXY_dNdij; + + fa = ters_fa(r,¶ms[iparam_ij]); + fa_d = ters_fa_d(r,¶ms[iparam_ij]); + deng = 0.5 * fa * Fc; + fpair = 0.5 * fa_d * Fc / r; + + f[i][0] += delr1[0]*fpair; + f[i][1] += delr1[1]*fpair; + f[i][2] += delr1[2]*fpair; + f[j][0] -= delr1[0]*fpair; + f[j][1] -= delr1[1]*fpair; + f[j][2] -= delr1[2]*fpair; + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + deng,0.0,-fpair,-delr1[0],-delr1[1],-delr1[2]); + /* END F_IJ (1) */ + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[iparam_ijk].cutsq) continue; + + r2 = sqrt(rsq2); + + zeta_ij += zeta(¶ms[iparam_ijk],r,r2,delr1,delr2); + + /* F_IJ (2) */ + // compute force components due to spline derivatives + // uses only the part with FXY_x (FXY_y is done when i and j are inversed) + int iparam_ik = elem2param[itype][ktype][0]; + double fc_ik_d = ters_fc_d(r2,¶ms[iparam_ik]); + double fc_prefac_ik_0 = 1.0 * fc_ik_d * fa / r2; + double fc_prefac_ik = dFc_dNtij * fc_prefac_ik_0; + f[i][0] += fc_prefac_ik * delr2[0]; + f[i][1] += fc_prefac_ik * delr2[1]; + f[i][2] += fc_prefac_ik * delr2[2]; + f[k][0] -= fc_prefac_ik * delr2[0]; + f[k][1] -= fc_prefac_ik * delr2[1]; + f[k][2] -= fc_prefac_ik * delr2[2]; + if ( itype != ktype ) { + fc_prefac_ik = dFc_dNdij * fc_prefac_ik_0; + f[i][0] += fc_prefac_ik * delr2[0]; + f[i][1] += fc_prefac_ik * delr2[1]; + f[i][2] += fc_prefac_ik * delr2[2]; + f[k][0] -= fc_prefac_ik * delr2[0]; + f[k][1] -= fc_prefac_ik * delr2[1]; + f[k][2] -= fc_prefac_ik * delr2[2]; + } + /* END F_IJ (2) */ + + } + + // pairwise force due to zeta + + force_zeta(¶ms[iparam_ij],r,zeta_ij,fpair,prefactor,eflag,evdwl); + + f[i][0] += delr1[0]*fpair; + f[i][1] += delr1[1]*fpair; + f[i][2] += delr1[2]*fpair; + f[j][0] -= delr1[0]*fpair; + f[j][1] -= delr1[1]*fpair; + f[j][2] -= delr1[2]*fpair; + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,0.0,-fpair,-delr1[0],-delr1[1],-delr1[2]); + + // attractive term via loop over k + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k][0] - xtmp; + delr2[1] = x[k][1] - ytmp; + delr2[2] = x[k][2] - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[iparam_ijk].cutsq) continue; + + attractive(¶ms[iparam_ijk],prefactor, + rsq1,rsq2,delr1,delr2,fi,fj,fk); + + + f[i][0] += fi[0]; + f[i][1] += fi[1]; + f[i][2] += fi[2]; + f[j][0] += fj[0]; + f[j][1] += fj[1]; + f[j][2] += fj[2]; + f[k][0] += fk[0]; + f[k][1] += fk[1]; + f[k][2] += fk[2]; + + if (vflag_atom) v_tally3(i,j,k,fj,fk,delr1,delr2); + } + } + } + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- */ + +void PairExTeP::allocate() +{ + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + memory->create(cutsq,n+1,n+1,"pair:cutsq"); + memory->create(cutghost,n+1,n+1,"pair:cutghost"); + + map = new int[n+1]; +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairExTeP::settings(int narg, char **arg) +{ + if (narg != 0) error->all(FLERR,"Illegal pair_style command"); +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairExTeP::coeff(int narg, char **arg) +{ + int i,j,n; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + // nelements = # of unique elements + // elements = list of element names + + if (elements) { + for (i = 0; i < nelements; i++) delete [] elements[i]; + delete [] elements; + } + elements = new char*[atom->ntypes]; + for (i = 0; i < atom->ntypes; i++) elements[i] = NULL; + + nelements = 0; + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < nelements; j++) + if (strcmp(arg[i],elements[j]) == 0) break; + map[i-2] = j; + if (j == nelements) { + n = strlen(arg[i]) + 1; + elements[j] = new char[n]; + strcpy(elements[j],arg[i]); + nelements++; + } + } + + // read potential file and initialize potential parameters + + read_file(arg[2]); + spline_init(); + setup(); + + // clear setflag since coeff() called once with I,J = * * + + n = atom->ntypes; + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + + int count = 0; + for (int i = 1; i <= n; i++) + for (int j = i; j <= n; j++) + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + count++; + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairExTeP::init_style() +{ + if (atom->tag_enable == 0) + error->all(FLERR,"Pair style ExTeP requires atom IDs"); + if (force->newton_pair == 0) + error->all(FLERR,"Pair style ExTeP requires newton pair on"); + + // need a full neighbor list + + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + + // including neighbors of ghosts + neighbor->requests[irequest]->ghost = 1; + + // create pages if first time or if neighbor pgsize/oneatom has changed + + int create = 0; + if (ipage == NULL) create = 1; + if (pgsize != neighbor->pgsize) create = 1; + if (oneatom != neighbor->oneatom) create = 1; + + if (create) { + delete [] ipage; + pgsize = neighbor->pgsize; + oneatom = neighbor->oneatom; + + int nmypage= comm->nthreads; + ipage = new MyPage<int>[nmypage]; + for (int i = 0; i < nmypage; i++) + ipage[i].init(oneatom,pgsize,PGDELTA); + } +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairExTeP::init_one(int i, int j) +{ + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + + cutghost[i][j] = cutmax ; + cutghost[j][i] = cutghost[i][j]; + + return cutmax; +} + +/* ---------------------------------------------------------------------- */ + +void PairExTeP::read_file(char *file) +{ + int params_per_line = 17; + char **words = new char*[params_per_line+1]; + + memory->sfree(params); + params = NULL; + nparams = maxparam = 0; + + // open file on proc 0 + + FILE *fp; + if (comm->me == 0) { + fp = force->open_potential(file); + if (fp == NULL) { + char str[128]; + sprintf(str,"Cannot open ExTeP potential file %s",file); + error->one(FLERR,str); + } + } + + // read each line out of file, skipping blank lines or leading '#' + // store line of params if all 3 element tags are in element list + + int n,nwords,ielement,jelement,kelement; + char line[MAXLINE],*ptr; + int eof = 0; + + while (1) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + if (nwords == 0) continue; + + // concatenate additional lines until have params_per_line words + + while (nwords < params_per_line) { + n = strlen(line); + if (comm->me == 0) { + ptr = fgets(&line[n],MAXLINE-n,fp); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + } + + if (nwords != params_per_line) + error->all(FLERR,"Insufficient spline parameters in potential file"); + + // words = ptrs to all words in line + + nwords = 0; + words[nwords++] = strtok(line," \t\n\r\f"); + while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + + // ielement,jelement,kelement = 1st args + // if all 3 args are in element list, then parse this line + // else skip to next line + + for (ielement = 0; ielement < nelements; ielement++) + if (strcmp(words[0],elements[ielement]) == 0) break; + if (ielement == nelements) continue; + for (jelement = 0; jelement < nelements; jelement++) + if (strcmp(words[1],elements[jelement]) == 0) break; + if (jelement == nelements) continue; + for (kelement = 0; kelement < nelements; kelement++) + if (strcmp(words[2],elements[kelement]) == 0) break; + if (kelement == nelements) continue; + + // load up parameter settings and error check their values + + if (nparams == maxparam) { + maxparam += DELTA; + params = (Param *) memory->srealloc(params,maxparam*sizeof(Param), + "pair:params"); + } + + params[nparams].ielement = ielement; + params[nparams].jelement = jelement; + params[nparams].kelement = kelement; + params[nparams].powerm = atof(words[3]); + params[nparams].gamma = atof(words[4]); + params[nparams].lam3 = atof(words[5]); + params[nparams].c = atof(words[6]); + params[nparams].d = atof(words[7]); + params[nparams].h = atof(words[8]); + params[nparams].powern = atof(words[9]); + params[nparams].beta = atof(words[10]); + params[nparams].lam2 = atof(words[11]); + params[nparams].bigb = atof(words[12]); + params[nparams].bigr = atof(words[13]); + params[nparams].bigd = atof(words[14]); + params[nparams].lam1 = atof(words[15]); + params[nparams].biga = atof(words[16]); + + // currently only allow m exponent of 1 or 3 + + params[nparams].powermint = int(params[nparams].powerm); + + if (params[nparams].c < 0.0 || params[nparams].d < 0.0 || + params[nparams].powern < 0.0 || params[nparams].beta < 0.0 || + params[nparams].lam2 < 0.0 || params[nparams].bigb < 0.0 || + params[nparams].bigr < 0.0 ||params[nparams].bigd < 0.0 || + params[nparams].bigd > params[nparams].bigr || + params[nparams].lam1 < 0.0 || params[nparams].biga < 0.0 || + params[nparams].powerm - params[nparams].powermint != 0.0 || + (params[nparams].powermint != 3 && params[nparams].powermint != 1) || + params[nparams].gamma < 0.0) + error->all(FLERR,"Illegal ExTeP parameter"); + + nparams++; + if (nparams >= pow(atom->ntypes,3)) break; + } + + // deallocate words array + delete [] words; + + /* F_IJ (3) */ + // read the spline coefficients + params_per_line = 8; + // reallocate with new size + words = new char*[params_per_line+1]; + + // intialize F_corr_data to all zeros + for (int iel=0;iel<atom->ntypes;iel++) + for (int jel=0;jel<atom->ntypes;jel++) + for (int in=0;in<4;in++) + for (int jn=0;jn<4;jn++) + for (int ivar=0;ivar<3;ivar++) + F_corr_data[iel][jel][in][jn][ivar]=0; + + // loop until EOF + while (1) { + if (comm->me == 0) { + ptr = fgets(line,MAXLINE,fp); + //fputs(line,stdout); + if (ptr == NULL) { + eof = 1; + fclose(fp); + } else n = strlen(line) + 1; + } + MPI_Bcast(&eof,1,MPI_INT,0,world); + if (eof) break; + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + // strip comment, skip line if blank + + if ((ptr = strchr(line,'#'))) *ptr = '\0'; + nwords = atom->count_words(line); + if (nwords == 0) continue; + + if (nwords != params_per_line) + error->all(FLERR,"Incorrect format in ExTeP potential file"); + + // words = ptrs to all words in line + + nwords = 0; + words[nwords++] = strtok(line," \t\n\r\f"); + while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + + // ielement,jelement = 1st args + // if all 3 args are in element list, then parse this line + // else skip to next line + // these lines set ielement and jelement to the + // integers matching the strings from the input + + for (ielement = 0; ielement < nelements; ielement++) + if (strcmp(words[0],elements[ielement]) == 0) break; + if (ielement == nelements) continue; + for (jelement = 0; jelement < nelements; jelement++) + if (strcmp(words[1],elements[jelement]) == 0) break; + if (jelement == nelements) continue; + + int Ni = atoi(words[2]); + int Nj = atoi(words[3]); + double spline_val = atof(words[4]); + double spline_derx = atof(words[5]); + double spline_dery = atof(words[6]); + + // Set value for all pairs of ielement,jelement (any kelement) + for (int iparam = 0; iparam < nparams; iparam++) { + if ( ielement == params[iparam].ielement + && jelement == params[iparam].jelement) { + F_corr_data[ielement][jelement][Ni][Nj][0] = spline_val; + F_corr_data[ielement][jelement][Ni][Nj][1] = spline_derx; + F_corr_data[ielement][jelement][Ni][Nj][2] = spline_dery; + + F_corr_data[jelement][ielement][Nj][Ni][0] = spline_val; + F_corr_data[jelement][ielement][Nj][Ni][1] = spline_dery; + F_corr_data[jelement][ielement][Nj][Ni][2] = spline_derx; + } + } + } + + delete [] words; + /* END F_IJ (3) */ + +} + +/* ---------------------------------------------------------------------- */ + +void PairExTeP::setup() +{ + int i,j,k,m,n; + + // set elem2param for all element triplet combinations + // must be a single exact match to lines read from file + // do not allow for ACB in place of ABC + + memory->destroy(elem2param); + memory->create(elem2param,nelements,nelements,nelements,"pair:elem2param"); + + for (i = 0; i < nelements; i++) + for (j = 0; j < nelements; j++) + for (k = 0; k < nelements; k++) { + n = -1; + for (m = 0; m < nparams; m++) { + if (i == params[m].ielement && j == params[m].jelement && + k == params[m].kelement) { + if (n >= 0) error->all(FLERR,"Potential file has duplicate entry"); + n = m; + } + } + if (n < 0) error->all(FLERR,"Potential file is missing an entry"); + elem2param[i][j][k] = n; + } + + // compute parameter values derived from inputs + + for (m = 0; m < nparams; m++) { + params[m].cut = params[m].bigr + params[m].bigd; + params[m].cutsq = params[m].cut*params[m].cut; + + params[m].c1 = pow(2.0*params[m].powern*1.0e-16,-1.0/params[m].powern); + params[m].c2 = pow(2.0*params[m].powern*1.0e-8,-1.0/params[m].powern); + params[m].c3 = 1.0/params[m].c2; + params[m].c4 = 1.0/params[m].c1; + } + + // set cutmax to max of all params + + cutmax = 0.0; + for (m = 0; m < nparams; m++) + if (params[m].cut > cutmax) cutmax = params[m].cut; +} + +/* ---------------------------------------------------------------------- */ + +void PairExTeP::repulsive(Param *param, double rsq, double &fforce, + int eflag, double &eng) +{ + double r,tmp_fc,tmp_fc_d,tmp_exp; + + r = sqrt(rsq); + tmp_fc = ters_fc(r,param); + tmp_fc_d = ters_fc_d(r,param); + tmp_exp = exp(-param->lam1 * r); + fforce = -param->biga * tmp_exp * (tmp_fc_d - tmp_fc*param->lam1) / r; + if (eflag) eng = tmp_fc * param->biga * tmp_exp; +} + +/* ---------------------------------------------------------------------- */ + +double PairExTeP::zeta(Param *param, double rij, double rik, + double *delrij, double *delrik) +{ + double costheta,arg,ex_delr; + + costheta = (delrij[0]*delrik[0] + delrij[1]*delrik[1] + + delrij[2]*delrik[2]) / (rij*rik); + + if (param->powermint == 3) arg = pow(param->lam3 * (rij-rik),3.0); + else arg = param->lam3 * (rij-rik); + + if (arg > 69.0776) ex_delr = 1.e30; + else if (arg < -69.0776) ex_delr = 0.0; + else ex_delr = exp(arg); + + return ters_fc(rik,param) * ters_gijk(costheta,param) * ex_delr; +} + +/* ---------------------------------------------------------------------- */ + +void PairExTeP::force_zeta(Param *param, double r, double zeta_ij, + double &fforce, double &prefactor, + int eflag, double &eng) +{ + double fa,fa_d,bij; + + fa = ters_fa(r,param); + fa_d = ters_fa_d(r,param); + bij = ters_bij(zeta_ij,param); + fforce = 0.5*bij*fa_d / r; + prefactor = -0.5*fa * ( ters_bij_d(zeta_ij,param) ); + if (eflag) eng = 0.5*bij*fa; +} + +/* ---------------------------------------------------------------------- + attractive term + use param_ij cutoff for rij test + use param_ijk cutoff for rik test +------------------------------------------------------------------------- */ + +void PairExTeP::attractive(Param *param, double prefactor, + double rsqij, double rsqik, + double *delrij, double *delrik, + double *fi, double *fj, double *fk) +{ + double rij_hat[3],rik_hat[3]; + double rij,rijinv,rik,rikinv; + + rij = sqrt(rsqij); + rijinv = 1.0/rij; + vec3_scale(rijinv,delrij,rij_hat); + + rik = sqrt(rsqik); + rikinv = 1.0/rik; + vec3_scale(rikinv,delrik,rik_hat); + + ters_zetaterm_d(prefactor,rij_hat,rij,rik_hat,rik,fi,fj,fk,param); +} + +/* ---------------------------------------------------------------------- */ + +double PairExTeP::ters_fc(double r, Param *param) +{ + double ters_R = param->bigr; + double ters_D = param->bigd; + + if (r < ters_R-ters_D) return 1.0; + if (r > ters_R+ters_D) return 0.0; + return 0.5*(1.0 - sin(MY_PI2*(r - ters_R)/ters_D)); +} + +/* ---------------------------------------------------------------------- */ + +double PairExTeP::ters_fc_d(double r, Param *param) +{ + double ters_R = param->bigr; + double ters_D = param->bigd; + + if (r < ters_R-ters_D) return 0.0; + if (r > ters_R+ters_D) return 0.0; + return -(MY_PI4/ters_D) * cos(MY_PI2*(r - ters_R)/ters_D); +} + +/* ---------------------------------------------------------------------- */ + +double PairExTeP::ters_fa(double r, Param *param) +{ + if (r > param->bigr + param->bigd) return 0.0; + return -param->bigb * exp(-param->lam2 * r) * ters_fc(r,param); +} + +/* ---------------------------------------------------------------------- */ + +double PairExTeP::ters_fa_d(double r, Param *param) +{ + if (r > param->bigr + param->bigd) return 0.0; + return param->bigb * exp(-param->lam2 * r) * + (param->lam2 * ters_fc(r,param) - ters_fc_d(r,param)); +} + +/* ---------------------------------------------------------------------- */ + +double PairExTeP::ters_bij(double zeta, Param *param) +{ + double tmp = param->beta * zeta; + if (tmp > param->c1) return 1.0/sqrt(tmp); + if (tmp > param->c2) + return (1.0 - pow(tmp,-param->powern) / (2.0*param->powern))/sqrt(tmp); + if (tmp < param->c4) return 1.0; + if (tmp < param->c3) + return 1.0 - pow(tmp,param->powern)/(2.0*param->powern); + return pow(1.0 + pow(tmp,param->powern), -1.0/(2.0*param->powern)); +} + +/* ---------------------------------------------------------------------- */ + +double PairExTeP::ters_bij_d(double zeta, Param *param) +{ + double tmp = param->beta * zeta; + if (tmp > param->c1) return param->beta * -0.5*pow(tmp,-1.5); + if (tmp > param->c2) + return param->beta * (-0.5*pow(tmp,-1.5) * + (1.0 - 0.5*(1.0 + 1.0/(2.0*param->powern)) * + pow(tmp,-param->powern))); + if (tmp < param->c4) return 0.0; + if (tmp < param->c3) + return -0.5*param->beta * pow(tmp,param->powern-1.0); + + double tmp_n = pow(tmp,param->powern); + return -0.5 * pow(1.0+tmp_n, -1.0-(1.0/(2.0*param->powern)))*tmp_n / zeta; +} + +/* ---------------------------------------------------------------------- */ + +void PairExTeP::ters_zetaterm_d(double prefactor, + double *rij_hat, double rij, + double *rik_hat, double rik, + double *dri, double *drj, double *drk, + Param *param) +{ + double gijk,gijk_d,ex_delr,ex_delr_d,fc,dfc,cos_theta,tmp; + double dcosdri[3],dcosdrj[3],dcosdrk[3]; + + fc = ters_fc(rik,param); + dfc = ters_fc_d(rik,param); + if (param->powermint == 3) tmp = pow(param->lam3 * (rij-rik),3.0); + else tmp = param->lam3 * (rij-rik); + + if (tmp > 69.0776) ex_delr = 1.e30; + else if (tmp < -69.0776) ex_delr = 0.0; + else ex_delr = exp(tmp); + + if (param->powermint == 3) + ex_delr_d = 3.0*pow(param->lam3,3.0) * pow(rij-rik,2.0)*ex_delr; + else ex_delr_d = param->lam3 * ex_delr; + + cos_theta = vec3_dot(rij_hat,rik_hat); + gijk = ters_gijk(cos_theta,param); + gijk_d = ters_gijk_d(cos_theta,param); + costheta_d(rij_hat,rij,rik_hat,rik,dcosdri,dcosdrj,dcosdrk); + + // compute the derivative wrt Ri + // dri = -dfc*gijk*ex_delr*rik_hat; + // dri += fc*gijk_d*ex_delr*dcosdri; + // dri += fc*gijk*ex_delr_d*(rik_hat - rij_hat); + + vec3_scale(-dfc*gijk*ex_delr,rik_hat,dri); + vec3_scaleadd(fc*gijk_d*ex_delr,dcosdri,dri,dri); + vec3_scaleadd(fc*gijk*ex_delr_d,rik_hat,dri,dri); + vec3_scaleadd(-fc*gijk*ex_delr_d,rij_hat,dri,dri); + vec3_scale(prefactor,dri,dri); + + // compute the derivative wrt Rj + // drj = fc*gijk_d*ex_delr*dcosdrj; + // drj += fc*gijk*ex_delr_d*rij_hat; + + vec3_scale(fc*gijk_d*ex_delr,dcosdrj,drj); + vec3_scaleadd(fc*gijk*ex_delr_d,rij_hat,drj,drj); + vec3_scale(prefactor,drj,drj); + + // compute the derivative wrt Rk + // drk = dfc*gijk*ex_delr*rik_hat; + // drk += fc*gijk_d*ex_delr*dcosdrk; + // drk += -fc*gijk*ex_delr_d*rik_hat; + + vec3_scale(dfc*gijk*ex_delr,rik_hat,drk); + vec3_scaleadd(fc*gijk_d*ex_delr,dcosdrk,drk,drk); + vec3_scaleadd(-fc*gijk*ex_delr_d,rik_hat,drk,drk); + vec3_scale(prefactor,drk,drk); +} + +/* ---------------------------------------------------------------------- */ + +void PairExTeP::costheta_d(double *rij_hat, double rij, + double *rik_hat, double rik, + double *dri, double *drj, double *drk) +{ + // first element is devative wrt Ri, second wrt Rj, third wrt Rk + + double cos_theta = vec3_dot(rij_hat,rik_hat); + + vec3_scaleadd(-cos_theta,rij_hat,rik_hat,drj); + vec3_scale(1.0/rij,drj,drj); + vec3_scaleadd(-cos_theta,rik_hat,rij_hat,drk); + vec3_scale(1.0/rik,drk,drk); + vec3_add(drj,drk,dri); + vec3_scale(-1.0,dri,dri); +} + + +/* ---------------------------------------------------------------------- */ + +/* F_IJ (4) */ +// initialize spline for F_corr (based on PairLCBOP::F_conj) + +void PairExTeP::spline_init() { + for ( size_t iel=0; iel<atom->ntypes; iel++) { + for ( size_t jel=0; jel<atom->ntypes; jel++) { + for ( size_t N_ij=0; N_ij<4; N_ij++ ) { + for ( size_t N_ji=0; N_ji<4; N_ji++ ) { + TF_corr_param &f = F_corr_param[iel][jel][N_ij][N_ji]; + + // corner points for each spline function + f.f_00 = F_corr_data[iel][jel][N_ij ][N_ji ][0]; + f.f_01 = F_corr_data[iel][jel][N_ij ][N_ji+1][0]; + f.f_10 = F_corr_data[iel][jel][N_ij+1][N_ji ][0]; + f.f_11 = F_corr_data[iel][jel][N_ij+1][N_ji+1][0]; + + // compute f tilde according to [Los & Fasolino, PRB 68, 024107 2003] + f.f_x_00 = F_corr_data[iel][jel][N_ij ][N_ji ][1] - f.f_10 + f.f_00; + f.f_x_01 = F_corr_data[iel][jel][N_ij ][N_ji+1][1] - f.f_11 + f.f_01; + f.f_x_10 = -(F_corr_data[iel][jel][N_ij+1][N_ji ][1] - f.f_10 + f.f_00); + f.f_x_11 = -(F_corr_data[iel][jel][N_ij+1][N_ji+1][1] - f.f_11 + f.f_01); + + f.f_y_00 = F_corr_data[iel][jel][N_ij ][N_ji ][2] - f.f_01 + f.f_00; + f.f_y_01 = -(F_corr_data[iel][jel][N_ij ][N_ji+1][2] - f.f_01 + f.f_00); + f.f_y_10 = F_corr_data[iel][jel][N_ij+1][N_ji ][2] - f.f_11 + f.f_10; + f.f_y_11 = -(F_corr_data[iel][jel][N_ij+1][N_ji+1][2] - f.f_11 + f.f_10); + } + } + } + } +} + +double PairExTeP::envelop_function(double x, double y, double *func_der) { + double fx,fy,fxy,dfx,dfxydx; + double del, delsq; + + fxy = 1.0; + dfxydx = 0.0; + + if (x <= 3.0) { + fx = 1.0; + dfx = 0.0; + if (x < 1.0 && y < 1.0) { + double gx=(1.0-x); + double gy=(1.0-y); + double gxsq=gx*gx; + double gysq=gy*gy; + fxy = 1.0 - gxsq*gysq; + dfxydx = 2.0*gx*gysq; + } + } else if (x < 4.0) { + del = 4.0-x; + delsq = del*del; + fx = (3.0-2.0*del)*delsq; + dfx = - 6.0*del*(1.0-del); + } else { + fx = 0.0; + dfx = 0.0; + } + if (y <= 3.0) { + fy = 1.0; + } else if (y < 4.0) { + del = 4.0-y; + delsq = del*del; + fy = (3.0-2.0*del)*delsq; + } else { + fy = 0.0; + } + + double func_val = fxy*fx*fy; + *func_der = dfxydx*fx*fy+fxy*dfx*fy; + + return func_val; +} + +double PairExTeP::F_corr(int iel, int jel, double Ndij, double Ndji, double *dFN_x, double *dFN_y ) { + + // compute F_XY + + size_t Ndij_int = static_cast<size_t>( floor( Ndij ) ); + size_t Ndji_int = static_cast<size_t>( floor( Ndji ) ); + double x = Ndij - Ndij_int; + double y = Ndji - Ndji_int; + TF_corr_param &f = F_corr_param[iel][jel][Ndij_int][Ndji_int]; + double F = 0; + double dF_dx = 0, dF_dy = 0; + double l, r; + if (Ndij_int < 4 && Ndji_int < 4) { + l = (1-y)* (1-x); + r = ( f.f_00 + x*x* f.f_x_10 + y*y* f.f_y_01 ); + F += l*r; + dF_dx += -(1-y)*r +l*2*x* f.f_x_10; + dF_dy += -(1-x)*r +l*2*y* f.f_y_01; + l = (1-y)*x; + r = ( f.f_10 + (1-x)*(1-x)*f.f_x_00 + y* y* f.f_y_11 ); + F += l*r; + dF_dx += (1-y)*r -l*2*(1-x)*f.f_x_00; + dF_dy += -x*r +l*2*y* f.f_y_11; + l = y* (1-x); + r = ( f.f_01 + x*x* f.f_x_11 + (1-y)*(1-y)*f.f_y_00 ); + F += l*r; + dF_dx += -y*r +l*2*x* f.f_x_11; + dF_dy += (1-x)*r -l*2*(1-y)*f.f_y_00; + l = y* x; + r = ( f.f_11 + (1-x)*(1-x)*f.f_x_01 + (1-y)*(1-y)*f.f_y_10 ); + F += l*r; + dF_dx += y*r -l*2*(1-x)*f.f_x_01; + dF_dy += x*r -l*2*(1-y)*f.f_y_10; + } + double result = F; + *dFN_x = dF_dx; + *dFN_y = dF_dy; + + return result; +} +/* F_IJ (4) */ diff --git a/src/USER-MISC/pair_extep.h b/src/USER-MISC/pair_extep.h new file mode 100644 index 0000000000000000000000000000000000000000..bad433455ed58e75c71fb92ff1c3bdbc1b5cc0fb --- /dev/null +++ b/src/USER-MISC/pair_extep.h @@ -0,0 +1,225 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(extep,PairExTeP) + +#else + +#ifndef LMP_PAIR_EXTEP_H +#define LMP_PAIR_EXTEP_H + +#include "pair.h" +#include "my_page.h" + +#define MAXTYPES 8 +#define NSPLINE 5 + +namespace LAMMPS_NS { + +class PairExTeP : public Pair { + public: + PairExTeP(class LAMMPS *); + virtual ~PairExTeP(); + virtual void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + void init_style(); + double init_one(int, int); + + protected: + struct Param { + double lam1,lam2,lam3; + double c,d,h; + double gamma,powerm; + double powern,beta; + double biga,bigb,bigd,bigr; + double cut,cutsq; + double c1,c2,c3,c4; + int ielement,jelement,kelement; + int powermint; + double Z_i,Z_j; // added for ExTePZBL + double ZBLcut,ZBLexpscale; + double c5,ca1,ca4; // added for ExTePMOD + double powern_del; + }; + + Param *params; // parameter set for an I-J-K interaction + char **elements; // names of unique elements + int ***elem2param; // mapping from element triplets to parameters + int *map; // mapping from atom types to elements + double cutmax; // max cutoff for all elements + int nelements; // # of unique elements + int nparams; // # of stored parameter sets + int maxparam; // max # of parameter sets + + int maxlocal; // size of numneigh, firstneigh arrays + int maxpage; // # of pages currently allocated + int pgsize; // size of neighbor page + int oneatom; // max # of neighbors for one atom + MyPage<int> *ipage; // neighbor list pages + int *SR_numneigh; // # of pair neighbors for each atom + int **SR_firstneigh; // ptr to 1st neighbor of each atom + + double *Nt, *Nd; // sum of cutoff fns ( f_C ) with SR neighs + + void allocate(); + void spline_init(); + virtual void read_file(char *); + virtual void setup(); + virtual void repulsive(Param *, double, double &, int, double &); + virtual double zeta(Param *, double, double, double *, double *); + virtual void force_zeta(Param *, double, double, double &, + double &, int, double &); + void attractive(Param *, double, double, double, double *, double *, + double *, double *, double *); + + virtual double ters_fc(double, Param *); + virtual double ters_fc_d(double, Param *); + virtual double ters_fa(double, Param *); + virtual double ters_fa_d(double, Param *); + virtual double ters_bij(double, Param *); + virtual double ters_bij_d(double, Param *); + + virtual void ters_zetaterm_d(double, double *, double, double *, double, + double *, double *, double *, Param *); + void costheta_d(double *, double, double *, double, + double *, double *, double *); + + // inlined functions for efficiency + + inline double ters_gijk(const double costheta, + const Param * const param) const { + const double ters_c = param->c * param->c; + const double ters_d = param->d * param->d; + const double hcth = param->h - costheta; + + return param->gamma*(1.0 + ters_c/ters_d - ters_c / (ters_d + hcth*hcth)); + } + + inline double ters_gijk_d(const double costheta, + const Param * const param) const { + const double ters_c = param->c * param->c; + const double ters_d = param->d * param->d; + const double hcth = param->h - costheta; + const double numerator = -2.0 * ters_c * hcth; + const double denominator = 1.0/(ters_d + hcth*hcth); + return param->gamma*numerator*denominator*denominator; + } + + inline double vec3_dot(const double x[3], const double y[3]) const { + return x[0]*y[0] + x[1]*y[1] + x[2]*y[2]; + } + + inline void vec3_add(const double x[3], const double y[3], + double * const z) const { + z[0] = x[0]+y[0]; z[1] = x[1]+y[1]; z[2] = x[2]+y[2]; + } + + inline void vec3_scale(const double k, const double x[3], + double y[3]) const { + y[0] = k*x[0]; y[1] = k*x[1]; y[2] = k*x[2]; + } + + inline void vec3_scaleadd(const double k, const double x[3], + const double y[3], double * const z) const { + z[0] = k*x[0]+y[0]; + z[1] = k*x[1]+y[1]; + z[2] = k*x[2]+y[2]; + } + + // splines parameters + // F[Ni=0-1, 1-2, 2-3, + // Nj=..., + struct TF_corr_param { + double + f_00, + f_01, + f_10, + f_11, + f_x_00, + f_x_01, + f_x_10, + f_x_11, + f_y_00, + f_y_01, + f_y_10, + f_y_11; + } F_corr_param[MAXTYPES][MAXTYPES][NSPLINE][NSPLINE]; + + double F_corr_data[MAXTYPES][MAXTYPES][NSPLINE][NSPLINE][3]; + + double F_corr( int, int, double, double, double*, double* ); + void SR_neigh(); + + double envelop_function(double, double, double*); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair style ExTeP requires atom IDs + +This is a requirement to use the ExTeP potential. + +E: Pair style ExTeP requires newton pair on + +See the newton command. This is a restriction to use the ExTeP +potential. + +E: All pair coeffs are not set + +All pair coefficients must be set in the data file or by the +pair_coeff command before running a simulation. + +E: Cannot open ExTeP potential file %s + +The specified potential file cannot be opened. Check that the path +and name are correct. + +E: Incorrect format in ExTeP potential file + +Incorrect number of words per line in the potential file. + +E: Illegal ExTeP parameter + +One or more of the coefficients defined in the potential file is +invalid. + +E: Potential file has duplicate entry + +The potential file for a SW or ExTeP potential has more than +one entry for the same 3 ordered elements. + +E: Potential file is missing an entry + +The potential file for a SW or ExTeP potential does not have a +needed entry. + +*/ diff --git a/src/USER-NETCDF/dump_netcdf.cpp b/src/USER-NETCDF/dump_netcdf.cpp index af9f94a728f8b61f43e08480f7b80dc33353d54d..a88d74434bf95829ec9d318f696bc60588e07421 100644 --- a/src/USER-NETCDF/dump_netcdf.cpp +++ b/src/USER-NETCDF/dump_netcdf.cpp @@ -43,7 +43,8 @@ using namespace LAMMPS_NS; using namespace MathConst; -enum{INT,FLOAT,BIGINT}; // same as in thermo.cpp +enum{THERMO_INT,THERMO_FLOAT,THERMO_BIGINT}; // same as in thermo.cpp +enum{DUMP_INT,DUMP_DOUBLE,DUMP_STRING,DUMP_BIGINT}; // same as in DumpCFG const char NC_FRAME_STR[] = "frame"; const char NC_SPATIAL_STR[] = "spatial"; @@ -121,10 +122,6 @@ DumpNetCDF::DumpNetCDF(LAMMPS *lmp, int narg, char **arg) : ndims = 3; strcpy(mangled, "velocities"); } - // extensions to the AMBER specification - else if (!strcmp(mangled, "type")) { - strcpy(mangled, "atom_types"); - } else if (!strcmp(mangled, "xs") || !strcmp(mangled, "ys") || !strcmp(mangled, "zs")) { idim = mangled[0] - 'x'; @@ -325,18 +322,6 @@ void DumpNetCDF::openfile() // variables specified in the input file for (int i = 0; i < n_perat; i++) { - nc_type xtype; - - // Type mangling - if (vtype[perat[i].field[0]] == INT) { - xtype = NC_INT; - } else { - if (double_precision) - xtype = NC_DOUBLE; - else - xtype = NC_FLOAT; - } - NCERRX( nc_inq_varid(ncid, perat[i].name, &perat[i].var), perat[i].name ); } @@ -421,10 +406,11 @@ void DumpNetCDF::openfile() nc_type xtype; // Type mangling - if (vtype[perat[i].field[0]] == INT) { + if (vtype[perat[i].field[0]] == DUMP_INT) { xtype = NC_INT; - } - else { + } else if (vtype[perat[i].field[0]] == DUMP_BIGINT) { + xtype = NC_INT64; + } else { if (double_precision) xtype = NC_DOUBLE; else @@ -488,17 +474,22 @@ void DumpNetCDF::openfile() if (thermo) { Thermo *th = output->thermo; for (int i = 0; i < th->nfield; i++) { - if (th->vtype[i] == FLOAT) { + if (th->vtype[i] == THERMO_FLOAT) { NCERRX( nc_def_var(ncid, th->keyword[i], NC_DOUBLE, 1, dims, &thermovar[i]), th->keyword[i] ); } - else if (th->vtype[i] == INT) { + else if (th->vtype[i] == THERMO_INT) { NCERRX( nc_def_var(ncid, th->keyword[i], NC_INT, 1, dims, &thermovar[i]), th->keyword[i] ); } - else if (th->vtype[i] == BIGINT) { + else if (th->vtype[i] == THERMO_BIGINT) { +#if defined(LAMMPS_SMALLBIG) || defined(LAMMPS_BIGBIG) + NCERRX( nc_def_var(ncid, th->keyword[i], NC_INT64, 1, dims, + &thermovar[i]), th->keyword[i] ); +#else NCERRX( nc_def_var(ncid, th->keyword[i], NC_LONG, 1, dims, &thermovar[i]), th->keyword[i] ); +#endif } } } @@ -606,6 +597,7 @@ void DumpNetCDF::openfile() count[1] = 5; NCERR( nc_put_vara_text(ncid, cell_angular_var, index, count, "gamma") ); + append_flag = 1; framei = 1; } } @@ -651,6 +643,52 @@ int nc_put_var1_bigint<long long>(int ncid, int varid, const size_t index[], return nc_put_var1_longlong(ncid, varid, index, tp); } +template <typename T> +int nc_put_vara_bigint(int ncid, int varid, const size_t start[], + const size_t count[], const T* tp) +{ + return nc_put_vara_int(ncid, varid, start, count, tp); +} + +template <> +int nc_put_vara_bigint<long>(int ncid, int varid, const size_t start[], + const size_t count[], const long* tp) +{ + return nc_put_vara_long(ncid, varid, start, count, tp); +} + +template <> +int nc_put_vara_bigint<long long>(int ncid, int varid, const size_t start[], + const size_t count[], const long long* tp) +{ + return nc_put_vara_longlong(ncid, varid, start, count, tp); +} + +template <typename T> +int nc_put_vars_bigint(int ncid, int varid, const size_t start[], + const size_t count[], const ptrdiff_t stride[], + const T* tp) +{ + return nc_put_vars_int(ncid, varid, start, count, stride, tp); +} + +template <> +int nc_put_vars_bigint<long>(int ncid, int varid, const size_t start[], + const size_t count[], const ptrdiff_t stride[], + const long* tp) +{ + return nc_put_vars_long(ncid, varid, start, count, stride, tp); +} + +template <> +int nc_put_vars_bigint<long long>(int ncid, int varid, const size_t start[], + const size_t count[], + const ptrdiff_t stride[], + const long long* tp) +{ + return nc_put_vars_longlong(ncid, varid, start, count, stride, tp); +} + void DumpNetCDF::write() { // open file @@ -672,16 +710,16 @@ void DumpNetCDF::write() for (int i = 0; i < th->nfield; i++) { th->call_vfunc(i); if (filewriter) { - if (th->vtype[i] == FLOAT) { + if (th->vtype[i] == THERMO_FLOAT) { NCERRX( nc_put_var1_double(ncid, thermovar[i], start, &th->dvalue), th->keyword[i] ); } - else if (th->vtype[i] == INT) { + else if (th->vtype[i] == THERMO_INT) { NCERRX( nc_put_var1_int(ncid, thermovar[i], start, &th->ivalue), th->keyword[i] ); } - else if (th->vtype[i] == BIGINT) { + else if (th->vtype[i] == THERMO_BIGINT) { NCERRX( nc_put_var1_bigint(ncid, thermovar[i], start, &th->bivalue), th->keyword[i] ); } @@ -782,16 +820,16 @@ void DumpNetCDF::write_data(int n, double *mybuf) if (!int_buffer) { n_buffer = n; - int_buffer = (int *) - memory->smalloc(n*sizeof(int),"dump::int_buffer"); + int_buffer = (bigint *) + memory->smalloc(n*sizeof(bigint),"dump::int_buffer"); double_buffer = (double *) memory->smalloc(n*sizeof(double),"dump::double_buffer"); } if (n > n_buffer) { n_buffer = n; - int_buffer = (int *) - memory->srealloc(int_buffer, n*sizeof(int),"dump::int_buffer"); + int_buffer = (bigint *) + memory->srealloc(int_buffer, n*sizeof(bigint),"dump::int_buffer"); double_buffer = (double *) memory->srealloc(double_buffer, n*sizeof(double),"dump::double_buffer"); } @@ -811,7 +849,7 @@ void DumpNetCDF::write_data(int n, double *mybuf) for (int i = 0; i < n_perat; i++) { int iaux = perat[i].field[0]; - if (vtype[iaux] == INT) { + if (vtype[iaux] == DUMP_INT || vtype[iaux] == DUMP_BIGINT) { // integers if (perat[i].dims > 1) { @@ -819,41 +857,55 @@ void DumpNetCDF::write_data(int n, double *mybuf) iaux = perat[i].field[idim]; if (iaux >= 0) { - for (int j = 0; j < n; j++, iaux+=size_one) { - int_buffer[j] = mybuf[iaux]; + if (vtype[iaux] == DUMP_INT) { + for (int j = 0; j < n; j++, iaux+=size_one) { + int_buffer[j] = static_cast<int>(mybuf[iaux]); + } + } + else { // DUMP_BIGINT + for (int j = 0; j < n; j++, iaux+=size_one) { + int_buffer[j] = static_cast<bigint>(mybuf[iaux]); + } } start[2] = idim; if (perat[i].constant) { if (perat[i].ndumped < ntotalgr) { - NCERR( nc_put_vars_int(ncid, perat[i].var, - start+1, count+1, stride+1, - int_buffer) ); + NCERR( nc_put_vars_bigint(ncid, perat[i].var, + start+1, count+1, stride+1, + int_buffer) ); perat[i].ndumped += n; } } else - NCERR( nc_put_vars_int(ncid, perat[i].var, start, count, stride, - int_buffer) ); + NCERR( nc_put_vars_bigint(ncid, perat[i].var, start, count, stride, + int_buffer) ); } } } else { - for (int j = 0; j < n; j++, iaux+=size_one) { - int_buffer[j] = mybuf[iaux]; + if (vtype[iaux] == DUMP_INT) { + for (int j = 0; j < n; j++, iaux+=size_one) { + int_buffer[j] = static_cast<int>(mybuf[iaux]); + } + } + else { // DUMP_BIGINT + for (int j = 0; j < n; j++, iaux+=size_one) { + int_buffer[j] = static_cast<bigint>(mybuf[iaux]); + } } if (perat[i].constant) { if (perat[i].ndumped < ntotalgr) { - NCERR( nc_put_vara_int(ncid, perat[i].var, start+1, count+1, - int_buffer) ); + NCERR( nc_put_vara_bigint(ncid, perat[i].var, start+1, count+1, + int_buffer) ); perat[i].ndumped += n; } } else - NCERR( nc_put_vara_int(ncid, perat[i].var, start, count, - int_buffer) ); + NCERR( nc_put_vara_bigint(ncid, perat[i].var, start, count, + int_buffer) ); } } else { diff --git a/src/USER-NETCDF/dump_netcdf.h b/src/USER-NETCDF/dump_netcdf.h index 25d64efade446861f152cc27f60728e7a15eb781..f97fd58409fa952ebe44e0bcb97ccd776227d619 100644 --- a/src/USER-NETCDF/dump_netcdf.h +++ b/src/USER-NETCDF/dump_netcdf.h @@ -66,7 +66,7 @@ class DumpNetCDF : public DumpCustom { bool thermo; // write thermo output to netcdf file bigint n_buffer; // size of buffer - int *int_buffer; // buffer for passing data to netcdf + bigint *int_buffer; // buffer for passing data to netcdf double *double_buffer; // buffer for passing data to netcdf int ncid; diff --git a/src/USER-NETCDF/dump_netcdf_mpiio.cpp b/src/USER-NETCDF/dump_netcdf_mpiio.cpp index 890029371e53fea15213fbc5178c50602e0eaad5..5e5be1c7aaa2c9c7979e022d11b79914b337da64 100644 --- a/src/USER-NETCDF/dump_netcdf_mpiio.cpp +++ b/src/USER-NETCDF/dump_netcdf_mpiio.cpp @@ -43,7 +43,8 @@ using namespace LAMMPS_NS; using namespace MathConst; -enum{INT,FLOAT,BIGINT}; // same as in thermo.cpp +enum{THERMO_INT,THERMO_FLOAT,THERMO_BIGINT}; // same as in thermo.cpp +enum{DUMP_INT,DUMP_DOUBLE,DUMP_STRING,DUMP_BIGINT}; // same as in DumpCFG const char NC_FRAME_STR[] = "frame"; const char NC_SPATIAL_STR[] = "spatial"; @@ -321,19 +322,6 @@ void DumpNetCDFMPIIO::openfile() // variables specified in the input file for (int i = 0; i < n_perat; i++) { - nc_type xtype; - - // Type mangling - if (vtype[perat[i].field[0]] == INT) { - xtype = NC_INT; - } - else { - if (double_precision) - xtype = NC_DOUBLE; - else - xtype = NC_FLOAT; - } - NCERRX( ncmpi_inq_varid(ncid, perat[i].name, &perat[i].var), perat[i].name ); } @@ -417,10 +405,11 @@ void DumpNetCDFMPIIO::openfile() nc_type xtype; // Type mangling - if (vtype[perat[i].field[0]] == INT) { + if (vtype[perat[i].field[0]] == DUMP_INT) { xtype = NC_INT; - } - else { + } else if (vtype[perat[i].field[0]] == DUMP_BIGINT) { + xtype = NC_INT64; + } else { if (double_precision) xtype = NC_DOUBLE; else @@ -456,17 +445,22 @@ void DumpNetCDFMPIIO::openfile() if (thermo) { Thermo *th = output->thermo; for (int i = 0; i < th->nfield; i++) { - if (th->vtype[i] == FLOAT) { + if (th->vtype[i] == THERMO_FLOAT) { NCERRX( ncmpi_def_var(ncid, th->keyword[i], NC_DOUBLE, 1, dims, &thermovar[i]), th->keyword[i] ); } - else if (th->vtype[i] == INT) { + else if (th->vtype[i] == THERMO_INT) { NCERRX( ncmpi_def_var(ncid, th->keyword[i], NC_INT, 1, dims, &thermovar[i]), th->keyword[i] ); } - else if (th->vtype[i] == BIGINT) { + else if (th->vtype[i] == THERMO_BIGINT) { +#if defined(LAMMPS_SMALLBIG) || defined(LAMMPS_BIGBIG) + NCERRX( ncmpi_def_var(ncid, th->keyword[i], NC_INT64, 1, dims, + &thermovar[i]), th->keyword[i] ); +#else NCERRX( ncmpi_def_var(ncid, th->keyword[i], NC_LONG, 1, dims, &thermovar[i]), th->keyword[i] ); +#endif } } } @@ -583,6 +577,7 @@ void DumpNetCDFMPIIO::openfile() NCERR( ncmpi_end_indep_data(ncid) ); + append_flag = 1; framei = 1; } } @@ -609,25 +604,77 @@ void DumpNetCDFMPIIO::closefile() template <typename T> int ncmpi_put_var1_bigint(int ncid, int varid, const MPI_Offset index[], - const T* tp) + const T* tp) { return ncmpi_put_var1_int(ncid, varid, index, tp); } template <> int ncmpi_put_var1_bigint<long>(int ncid, int varid, const MPI_Offset index[], - const long* tp) + const long* tp) { return ncmpi_put_var1_long(ncid, varid, index, tp); } template <> -int ncmpi_put_var1_bigint<long long>(int ncid, int varid, const MPI_Offset index[], - const long long* tp) +int ncmpi_put_var1_bigint<long long>(int ncid, int varid, + const MPI_Offset index[], + const long long* tp) { return ncmpi_put_var1_longlong(ncid, varid, index, tp); } +template <typename T> +int ncmpi_put_vara_bigint_all(int ncid, int varid, const MPI_Offset start[], + const MPI_Offset count[], const T* tp) +{ + return ncmpi_put_vara_int_all(ncid, varid, start, count, tp); +} + +template <> +int ncmpi_put_vara_bigint_all<long>(int ncid, int varid, + const MPI_Offset start[], + const MPI_Offset count[], const long* tp) +{ + return ncmpi_put_vara_long_all(ncid, varid, start, count, tp); +} + +template <> +int ncmpi_put_vara_bigint_all<long long>(int ncid, int varid, + const MPI_Offset start[], + const MPI_Offset count[], + const long long* tp) +{ + return ncmpi_put_vara_longlong_all(ncid, varid, start, count, tp); +} + +template <typename T> +int ncmpi_put_vars_bigint_all(int ncid, int varid, const MPI_Offset start[], + const MPI_Offset count[], + const MPI_Offset stride[], const T* tp) +{ + return ncmpi_put_vars_int_all(ncid, varid, start, count, stride, tp); +} + +template <> +int ncmpi_put_vars_bigint_all<long>(int ncid, int varid, + const MPI_Offset start[], + const MPI_Offset count[], + const MPI_Offset stride[], const long* tp) +{ + return ncmpi_put_vars_long_all(ncid, varid, start, count, stride, tp); +} + +template <> +int ncmpi_put_vars_bigint_all<long long>(int ncid, int varid, + const MPI_Offset start[], + const MPI_Offset count[], + const MPI_Offset stride[], + const long long* tp) +{ + return ncmpi_put_vars_longlong_all(ncid, varid, start, count, stride, tp); +} + void DumpNetCDFMPIIO::write() { // open file @@ -651,16 +698,16 @@ void DumpNetCDFMPIIO::write() for (int i = 0; i < th->nfield; i++) { th->call_vfunc(i); if (filewriter) { - if (th->vtype[i] == FLOAT) { + if (th->vtype[i] == THERMO_FLOAT) { NCERRX( ncmpi_put_var1_double(ncid, thermovar[i], start, &th->dvalue), th->keyword[i] ); } - else if (th->vtype[i] == INT) { + else if (th->vtype[i] == THERMO_INT) { NCERRX( ncmpi_put_var1_int(ncid, thermovar[i], start, &th->ivalue), th->keyword[i] ); } - else if (th->vtype[i] == BIGINT) { + else if (th->vtype[i] == THERMO_BIGINT) { NCERRX( ncmpi_put_var1_bigint(ncid, thermovar[i], start, &th->bivalue), th->keyword[i] ); } @@ -789,16 +836,16 @@ void DumpNetCDFMPIIO::write_data(int n, double *mybuf) if (!int_buffer) { n_buffer = std::max(1, n); - int_buffer = (int *) - memory->smalloc(n_buffer*sizeof(int),"dump::int_buffer"); + int_buffer = (bigint *) + memory->smalloc(n_buffer*sizeof(bigint),"dump::int_buffer"); double_buffer = (double *) memory->smalloc(n_buffer*sizeof(double),"dump::double_buffer"); } if (n > n_buffer) { n_buffer = std::max(1, n); - int_buffer = (int *) - memory->srealloc(int_buffer, n_buffer*sizeof(int),"dump::int_buffer"); + int_buffer = (bigint *) + memory->srealloc(int_buffer, n_buffer*sizeof(bigint),"dump::int_buffer"); double_buffer = (double *) memory->srealloc(double_buffer, n_buffer*sizeof(double), "dump::double_buffer"); @@ -831,7 +878,7 @@ void DumpNetCDFMPIIO::write_data(int n, double *mybuf) error->one(FLERR,errmsg); } - if (vtype[iaux] == INT) { + if (vtype[iaux] == DUMP_INT || vtype[iaux] == DUMP_BIGINT) { // integers if (perat[i].dims > 1) { @@ -846,13 +893,21 @@ void DumpNetCDFMPIIO::write_data(int n, double *mybuf) error->one(FLERR,errmsg); } - for (int j = 0; j < n; j++, iaux+=size_one) { - int_buffer[j] = mybuf[iaux]; + if (vtype[iaux] == DUMP_INT) { + for (int j = 0; j < n; j++, iaux+=size_one) { + int_buffer[j] = static_cast<int>(mybuf[iaux]); + } + } + else { // DUMP_BIGINT + for (int j = 0; j < n; j++, iaux+=size_one) { + int_buffer[j] = static_cast<bigint>(mybuf[iaux]); + } } start[2] = idim; - NCERRX( ncmpi_put_vars_int_all(ncid, perat[i].var, start, count, - stride, int_buffer), perat[i].name ); + NCERRX( ncmpi_put_vars_bigint_all(ncid, perat[i].var, start, count, + stride, int_buffer), + perat[i].name ); } } } @@ -861,8 +916,8 @@ void DumpNetCDFMPIIO::write_data(int n, double *mybuf) int_buffer[j] = mybuf[iaux]; } - NCERRX( ncmpi_put_vara_int_all(ncid, perat[i].var, start, count, - int_buffer), perat[i].name ); + NCERRX( ncmpi_put_vara_bigint_all(ncid, perat[i].var, start, count, + int_buffer), perat[i].name ); } } else { diff --git a/src/USER-NETCDF/dump_netcdf_mpiio.h b/src/USER-NETCDF/dump_netcdf_mpiio.h index 3ca52449a5f2de1b888706c3cfba1f58b247cf53..330fa46c045b8eee761dce3958422f9db41bed1a 100644 --- a/src/USER-NETCDF/dump_netcdf_mpiio.h +++ b/src/USER-NETCDF/dump_netcdf_mpiio.h @@ -65,7 +65,7 @@ class DumpNetCDFMPIIO : public DumpCustom { bool thermo; // write thermo output to netcdf file bigint n_buffer; // size of buffer - int *int_buffer; // buffer for passing data to netcdf + bigint *int_buffer; // buffer for passing data to netcdf double *double_buffer; // buffer for passing data to netcdf int ncid; diff --git a/src/USER-OMP/fix_neigh_history_omp.cpp b/src/USER-OMP/fix_neigh_history_omp.cpp index ecc3147ed5d0515dfb13d743531ed28599e11b40..1b212732de66a2928cc9c5dbf2d28bb71e12485b 100644 --- a/src/USER-OMP/fix_neigh_history_omp.cpp +++ b/src/USER-OMP/fix_neigh_history_omp.cpp @@ -356,7 +356,7 @@ void FixNeighHistoryOMP::pre_exchange_newton() #endif { maxpartner = MAX(m,maxpartner); - comm->maxexchange_fix = MAX(comm->maxexchange_fix,4*maxpartner+1); + comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1); } } diff --git a/src/USER-OMP/pair_lj_cut_thole_long_omp.cpp b/src/USER-OMP/pair_lj_cut_thole_long_omp.cpp index 85c7e44f8c8654d4eb9e7128c93f8a5e964c309c..110b8917cf07366b02aa0eeda1b99daeaffbe99a 100644 --- a/src/USER-OMP/pair_lj_cut_thole_long_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_thole_long_omp.cpp @@ -29,6 +29,7 @@ #include "math_const.h" #include "error.h" #include "suffix.h" +#include "domain.h" using namespace LAMMPS_NS; using namespace MathConst; diff --git a/src/USER-OMP/reaxc_forces_omp.cpp b/src/USER-OMP/reaxc_forces_omp.cpp index 6fec7a7e4d614a1c50306d528bce63af8255df13..321d104b00921e9af51f691191fad4402aea0213 100644 --- a/src/USER-OMP/reaxc_forces_omp.cpp +++ b/src/USER-OMP/reaxc_forces_omp.cpp @@ -601,44 +601,17 @@ void Compute_ForcesOMP( reax_system *system, control_params *control, MPI_Comm comm = mpi_data->world; // Init Forces -#if defined(LOG_PERFORMANCE) - double t_start = 0; - if( system->my_rank == MASTER_NODE ) - t_start = Get_Time( ); -#endif - Init_Forces_noQEq_OMP( system, control, data, workspace, - lists, out_control, comm ); - -#if defined(LOG_PERFORMANCE) - //MPI_Barrier( comm ); - if( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &(data->timing.init_forces) ); -#endif + lists, out_control, comm ); // Bonded Interactions Compute_Bonded_ForcesOMP( system, control, data, workspace, lists, out_control, mpi_data->world ); -#if defined(LOG_PERFORMANCE) - if( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &(data->timing.bonded) ); -#endif - // Nonbonded Interactions Compute_NonBonded_ForcesOMP( system, control, data, workspace, lists, out_control, mpi_data->world ); -#if defined(LOG_PERFORMANCE) - if( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &(data->timing.nonb) ); -#endif - // Total Force Compute_Total_ForceOMP( system, control, data, workspace, lists, mpi_data ); - -#if defined(LOG_PERFORMANCE) - if( system->my_rank == MASTER_NODE ) - Update_Timing_Info( &t_start, &(data->timing.bonded) ); -#endif } diff --git a/src/USER-REAXC/pair_reaxc.cpp b/src/USER-REAXC/pair_reaxc.cpp index 0f4bd49cc855c7e78cc4cd69b3c66b69964b572f..7c135463add0c68cc3a7fb2a036a601bf75c7928 100644 --- a/src/USER-REAXC/pair_reaxc.cpp +++ b/src/USER-REAXC/pair_reaxc.cpp @@ -697,7 +697,7 @@ int PairReaxC::write_reax_lists() int itr_i, itr_j, i, j; int num_nbrs; int *ilist, *jlist, *numneigh, **firstneigh; - double d_sqr; + double d_sqr, cutoff_sqr; rvec dvec; double *dist, **x; reax_list *far_nbrs; @@ -712,6 +712,7 @@ int PairReaxC::write_reax_lists() far_list = far_nbrs->select.far_nbr_list; num_nbrs = 0; + int inum = list->inum; dist = (double*) calloc( system->N, sizeof(double) ); int numall = list->inum + list->gnum; @@ -721,12 +722,17 @@ int PairReaxC::write_reax_lists() jlist = firstneigh[i]; Set_Start_Index( i, num_nbrs, far_nbrs ); + if (i < inum) + cutoff_sqr = control->nonb_cut*control->nonb_cut; + else + cutoff_sqr = control->bond_cut*control->bond_cut; + for( itr_j = 0; itr_j < numneigh[i]; ++itr_j ){ j = jlist[itr_j]; j &= NEIGHMASK; get_distance( x[j], x[i], &d_sqr, &dvec ); - if( d_sqr <= (control->nonb_cut*control->nonb_cut) ){ + if( d_sqr <= (cutoff_sqr) ){ dist[j] = sqrt( d_sqr ); set_far_nbr( &far_list[num_nbrs], j, dist[j], dvec ); ++num_nbrs; diff --git a/src/USER-SMD/pair_smd_tlsph.cpp b/src/USER-SMD/pair_smd_tlsph.cpp index 19005199354629c33d3b5afa1f020fb7a36de76f..d6ddc9c72d4f0a7c0ace6faee3eac27f05b2dd8f 100644 --- a/src/USER-SMD/pair_smd_tlsph.cpp +++ b/src/USER-SMD/pair_smd_tlsph.cpp @@ -1066,7 +1066,7 @@ void PairTlsph::coeff(int narg, char **arg) { Lookup[HEAT_CAPACITY][itype] = force->numeric(FLERR, arg[ioffset + 7]); Lookup[LAME_LAMBDA][itype] = Lookup[YOUNGS_MODULUS][itype] * Lookup[POISSON_RATIO][itype] - / ((1.0 + Lookup[POISSON_RATIO][itype] * (1.0 - 2.0 * Lookup[POISSON_RATIO][itype]))); + / ((1.0 + Lookup[POISSON_RATIO][itype]) * (1.0 - 2.0 * Lookup[POISSON_RATIO][itype])); Lookup[SHEAR_MODULUS][itype] = Lookup[YOUNGS_MODULUS][itype] / (2.0 * (1.0 + Lookup[POISSON_RATIO][itype])); Lookup[M_MODULUS][itype] = Lookup[LAME_LAMBDA][itype] + 2.0 * Lookup[SHEAR_MODULUS][itype]; Lookup[SIGNAL_VELOCITY][itype] = sqrt( diff --git a/src/USER-SMD/smd_material_models.cpp b/src/USER-SMD/smd_material_models.cpp index 228d1c709a77870ecb752a31580cfa6c571d8ff2..f1288ae5cad31aa8c69b8f9b1df46f6a88e1de5b 100644 --- a/src/USER-SMD/smd_material_models.cpp +++ b/src/USER-SMD/smd_material_models.cpp @@ -75,7 +75,7 @@ void ShockEOS(double rho, double rho0, double e, double e0, double c0, double S, double mu = rho / rho0 - 1.0; double pH = rho0 * square(c0) * mu * (1.0 + mu) / square(1.0 - (S - 1.0) * mu); - pFinal = (pH + rho * Gamma * (e - e0)); + pFinal = (-pH + rho * Gamma * (e - e0)); //printf("shock EOS: rho = %g, rho0 = %g, Gamma=%f, c0=%f, S=%f, e=%f, e0=%f\n", rho, rho0, Gamma, c0, S, e, e0); //printf("pFinal = %f\n", pFinal); diff --git a/src/USER-TALLY/compute_force_tally.cpp b/src/USER-TALLY/compute_force_tally.cpp index 5f29aea5b22cd3b892596ec4f3356e9184a85602..3ec6c188fc385026ce3fd93383fa87e38559b15f 100644 --- a/src/USER-TALLY/compute_force_tally.cpp +++ b/src/USER-TALLY/compute_force_tally.cpp @@ -12,6 +12,7 @@ ------------------------------------------------------------------------- */ #include <string.h> +#include <math.h> #include "compute_force_tally.h" #include "atom.h" #include "group.h" @@ -20,6 +21,7 @@ #include "memory.h" #include "error.h" #include "force.h" +#include "comm.h" using namespace LAMMPS_NS; diff --git a/src/USER-TALLY/compute_heat_flux_tally.cpp b/src/USER-TALLY/compute_heat_flux_tally.cpp index c090050b1540925d3ca76cb3e4f23fcaf7471c4e..43b663b27a580495943b5d4835650ae01811b0cd 100644 --- a/src/USER-TALLY/compute_heat_flux_tally.cpp +++ b/src/USER-TALLY/compute_heat_flux_tally.cpp @@ -20,6 +20,7 @@ #include "memory.h" #include "error.h" #include "force.h" +#include "comm.h" using namespace LAMMPS_NS; diff --git a/src/USER-TALLY/compute_pe_mol_tally.cpp b/src/USER-TALLY/compute_pe_mol_tally.cpp index 25a172b7f81eb6cd68a75fcb520c401bb89ec1de..0328740e0395bc6ca7914ee8d7b7b88e212e0000 100644 --- a/src/USER-TALLY/compute_pe_mol_tally.cpp +++ b/src/USER-TALLY/compute_pe_mol_tally.cpp @@ -20,6 +20,7 @@ #include "memory.h" #include "error.h" #include "force.h" +#include "comm.h" using namespace LAMMPS_NS; diff --git a/src/USER-TALLY/compute_pe_tally.cpp b/src/USER-TALLY/compute_pe_tally.cpp index 5b4644d4e16833f8177a0d22aa29de9802877bc1..caa4cf134a89dd10d73169eae5ba02510a8bf2dd 100644 --- a/src/USER-TALLY/compute_pe_tally.cpp +++ b/src/USER-TALLY/compute_pe_tally.cpp @@ -20,6 +20,7 @@ #include "memory.h" #include "error.h" #include "force.h" +#include "comm.h" using namespace LAMMPS_NS; diff --git a/src/USER-TALLY/compute_stress_tally.cpp b/src/USER-TALLY/compute_stress_tally.cpp index 32253d2cad78857a31c52753f85b856e7b84cec2..e44313d695c46959b87ddb89bbf50ec99078bd7c 100644 --- a/src/USER-TALLY/compute_stress_tally.cpp +++ b/src/USER-TALLY/compute_stress_tally.cpp @@ -20,6 +20,8 @@ #include "memory.h" #include "error.h" #include "force.h" +#include "comm.h" +#include "domain.h" using namespace LAMMPS_NS; diff --git a/src/accelerator_kokkos.h b/src/accelerator_kokkos.h index 8ea5b9d4d2dd93dd4f85147035bfc5be1633024a..708592a25eb000cf3bdc5bb38add4b413809a94b 100644 --- a/src/accelerator_kokkos.h +++ b/src/accelerator_kokkos.h @@ -25,6 +25,7 @@ #include "comm_tiled_kokkos.h" #include "domain_kokkos.h" #include "neighbor_kokkos.h" +#include "memory_kokkos.h" #include "modify_kokkos.h" #else @@ -37,6 +38,7 @@ #include "comm_tiled.h" #include "domain.h" #include "neighbor.h" +#include "memory.h" #include "modify.h" namespace LAMMPS_NS { @@ -89,6 +91,13 @@ class NeighborKokkos : public Neighbor { ~NeighborKokkos() {} }; +class MemoryKokkos : public Memory { + public: + MemoryKokkos(class LAMMPS *lmp) : Memory(lmp) {} + ~MemoryKokkos() {} + void grow_kokkos(tagint **, tagint **, int, int, const char*) {} +}; + class ModifyKokkos : public Modify { public: ModifyKokkos(class LAMMPS *lmp) : Modify(lmp) {} diff --git a/src/compute_aggregate_atom.cpp b/src/compute_aggregate_atom.cpp index 1155ac437adfb439cda2813ea0b8d4f4a850afed..1e91327e54639be94bf89810058c7d9e1df5e2a9 100644 --- a/src/compute_aggregate_atom.cpp +++ b/src/compute_aggregate_atom.cpp @@ -16,6 +16,7 @@ ------------------------------------------------------------------------- */ #include <string.h> +#include <math.h> #include "compute_aggregate_atom.h" #include "atom.h" #include "atom_vec.h" diff --git a/src/compute_msd_chunk.cpp b/src/compute_msd_chunk.cpp index bc5a374fadb8c53f713c1d176f1998a0ebe93437..69cd87f96b6b62d800eb0a7e54587525bc779cfd 100644 --- a/src/compute_msd_chunk.cpp +++ b/src/compute_msd_chunk.cpp @@ -126,7 +126,7 @@ void ComputeMSDChunk::setup() if (fix->nrow == nchunk && fix->ncol == 3) return; fix->reset_global(nchunk,3); - + double **cominit = fix->astore; for (int i = 0; i < nchunk; i++) { cominit[i][0] = comall[i][0]; diff --git a/src/compute_orientorder_atom.cpp b/src/compute_orientorder_atom.cpp index 90e2830e39e55881884bb6cb57f137ec387f62bf..b443d56bf82b3fd7a8d4f50b58181ecb49e922e4 100644 --- a/src/compute_orientorder_atom.cpp +++ b/src/compute_orientorder_atom.cpp @@ -18,6 +18,7 @@ #include <string.h> #include <stdlib.h> +#include <math.h> #include "compute_orientorder_atom.h" #include "atom.h" #include "update.h" diff --git a/src/compute_rdf.cpp b/src/compute_rdf.cpp index 167de4576dfcbe7c24d8db5c1049c84806c29ecb..bcb620f3b36c59dd842669dd626baebcda760f31 100644 --- a/src/compute_rdf.cpp +++ b/src/compute_rdf.cpp @@ -32,6 +32,7 @@ #include "math_const.h" #include "memory.h" #include "error.h" +#include "comm.h" using namespace LAMMPS_NS; using namespace MathConst; diff --git a/src/delete_atoms.cpp b/src/delete_atoms.cpp index 825426b2b6759734da67a39fea0342c0305ab70b..489c5bf5d5f4ce5837d42ee64d8fcd04be3103f3 100644 --- a/src/delete_atoms.cpp +++ b/src/delete_atoms.cpp @@ -28,6 +28,7 @@ #include "random_mars.h" #include "memory.h" #include "error.h" +#include "modify.h" #include <map> diff --git a/src/dump_image.cpp b/src/dump_image.cpp index eedfb89992d24e3fcabcdae53e8138e5bcc2e68f..0e5f57be6bcdb46a99bce387f9b43b8221523f28 100644 --- a/src/dump_image.cpp +++ b/src/dump_image.cpp @@ -1345,7 +1345,7 @@ int DumpImage::modify_param(int narg, char **arg) if (atom->nbondtypes == 0) error->all(FLERR,"Dump modify bdiam not allowed with no bond types"); int nlo,nhi; - force->bounds(FLERR,arg[1],atom->ntypes,nlo,nhi); + force->bounds(FLERR,arg[1],atom->nbondtypes,nlo,nhi); double diam = force->numeric(FLERR,arg[2]); if (diam <= 0.0) error->all(FLERR,"Illegal dump_modify command"); for (int i = nlo; i <= nhi; i++) bdiamtype[i] = diam; diff --git a/src/error.cpp b/src/error.cpp index 0969507fc977b854aceae485a24e0fa9d4667d16..9193314fc8c8674c445341d9e993c4690499da55 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -115,10 +115,10 @@ void Error::all(const char *file, int line, const char *str) if (me == 0) { if (input && input->line) lastcmd = input->line; if (screen) fprintf(screen,"ERROR: %s (%s:%d)\n" - "Last command: %s\n", + "Last command: %s\n", str,file,line,lastcmd); if (logfile) fprintf(logfile,"ERROR: %s (%s:%d)\n" - "Last command: %s\n", + "Last command: %s\n", str,file,line,lastcmd); } @@ -152,9 +152,17 @@ void Error::all(const char *file, int line, const char *str) void Error::one(const char *file, int line, const char *str) { int me; + const char *lastcmd = (const char*)"(unknown)"; MPI_Comm_rank(world,&me); - if (screen) fprintf(screen,"ERROR on proc %d: %s (%s:%d)\n", - me,str,file,line); + + if (input && input->line) lastcmd = input->line; + if (screen) fprintf(screen,"ERROR on proc %d: %s (%s:%d)\n" + "Last command: %s\n", + me,str,file,line,lastcmd); + if (logfile) fprintf(logfile,"ERROR on proc %d: %s (%s:%d)\n" + "Last command: %s\n", + me,str,file,line,lastcmd); + if (universe->nworlds > 1) if (universe->uscreen) fprintf(universe->uscreen,"ERROR on proc %d: %s (%s:%d)\n", diff --git a/src/fix_addforce.cpp b/src/fix_addforce.cpp index 5c677a43385a071a99d1e89489b6cb2a6ace6e8c..6b1e89227985acb0dc12336520962ff7599ffee3 100644 --- a/src/fix_addforce.cpp +++ b/src/fix_addforce.cpp @@ -16,7 +16,6 @@ #include "fix_addforce.h" #include "atom.h" #include "atom_masks.h" -#include "accelerator_kokkos.h" #include "update.h" #include "modify.h" #include "domain.h" diff --git a/src/fix_ave_chunk.cpp b/src/fix_ave_chunk.cpp index 1060acd7bb8c6ac289e6ef7606de8de515cbdd35..73df50c66cfa08c10ab2e6e93cf94fa4fabf0322 100644 --- a/src/fix_ave_chunk.cpp +++ b/src/fix_ave_chunk.cpp @@ -63,6 +63,7 @@ FixAveChunk::FixAveChunk(LAMMPS *lmp, int narg, char **arg) : strcpy(idchunk,arg[6]); global_freq = nfreq; + peratom_freq = nfreq; no_change_box = 1; // expand args if any have wildcard character "*" diff --git a/src/fix_neigh_history.cpp b/src/fix_neigh_history.cpp index 322c8d55619abd0ef17b0a7788523f978261e53b..623fbff756e5be8f4cb215767533e6d9bc88979e 100644 --- a/src/fix_neigh_history.cpp +++ b/src/fix_neigh_history.cpp @@ -35,7 +35,7 @@ enum{DEFAULT,NPARTNER,PERPARTNER}; // also set in fix neigh/history/omp FixNeighHistory::FixNeighHistory(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg), - npartner(NULL), partner(NULL), valuepartner(NULL), pair(NULL), + npartner(NULL), partner(NULL), valuepartner(NULL), pair(NULL), ipage_atom(NULL), dpage_atom(NULL), ipage_neigh(NULL), dpage_neigh(NULL) { if (narg != 4) error->all(FLERR,"Illegal fix NEIGH_HISTORY command"); @@ -295,7 +295,7 @@ void FixNeighHistory::pre_exchange_onesided() // set maxpartner = max # of partners of any owned atom // bump up comm->maxexchange_fix if necessary - + maxpartner = 0; for (i = 0; i < nlocal_neigh; i++) maxpartner = MAX(maxpartner,npartner[i]); comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1); @@ -318,7 +318,7 @@ void FixNeighHistory::pre_exchange_newton() int *allflags; double *allvalues,*onevalues,*jvalues; - // NOTE: all operations until very end are with + // NOTE: all operations until very end are with // nlocal_neigh <= current nlocal and nall_neigh // b/c previous neigh list was built with nlocal_neigh & nghost_neigh // nlocal can be larger if other fixes added atoms at this pre_exchange() @@ -424,7 +424,7 @@ void FixNeighHistory::pre_exchange_newton() maxpartner = 0; for (i = 0; i < nlocal_neigh; i++) maxpartner = MAX(maxpartner,npartner[i]); - comm->maxexchange_fix = MAX(comm->maxexchange_fix,4*maxpartner+1); + comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1); // zero npartner values from previous nlocal_neigh to current nlocal @@ -526,7 +526,7 @@ void FixNeighHistory::pre_exchange_no_newton() // set maxpartner = max # of partners of any owned atom // bump up comm->maxexchange_fix if necessary - + maxpartner = 0; for (i = 0; i < nlocal_neigh; i++) maxpartner = MAX(maxpartner,npartner[i]); comm->maxexchange_fix = MAX(comm->maxexchange_fix,(dnum+1)*maxpartner+1); @@ -571,9 +571,9 @@ void FixNeighHistory::post_neighbor() memory->sfree(firstflag); memory->sfree(firstvalue); maxatom = nall; - firstflag = (int **) + firstflag = (int **) memory->smalloc(maxatom*sizeof(int *),"neighbor_history:firstflag"); - firstvalue = (double **) + firstvalue = (double **) memory->smalloc(maxatom*sizeof(double *),"neighbor_history:firstvalue"); } @@ -720,7 +720,7 @@ int FixNeighHistory::pack_reverse_comm_size(int n, int first) last = first + n; for (i = first; i < last; i++) - m += 1 + 4*npartner[i]; + m += 1 + (dnum+1)*npartner[i]; return m; } diff --git a/src/fix_store.cpp b/src/fix_store.cpp index c856bb2db29e6de5b21a9adaa70e0fc03eb64927..84e94fc2b7807c3fc3c1a8938e8d6ce370885d30 100644 --- a/src/fix_store.cpp +++ b/src/fix_store.cpp @@ -155,8 +155,7 @@ void FixStore::reset_global(int nrow_caller, int ncol_caller) else memory->create(astore,nrow,ncol,"fix/store:astore"); memory->create(rbuf,nrow*ncol+2,"fix/store:rbuf"); - - printf("AAA HOW GET HERE\n"); + // printf("AAA HOW GET HERE\n"); } /* ---------------------------------------------------------------------- diff --git a/src/input.cpp b/src/input.cpp index 23b89d3040d7816546d5d701609bf0cf87bcac05..06403276824a471aaafdc3453706ca951624c657 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -27,7 +27,6 @@ #include "comm.h" #include "comm_brick.h" #include "comm_tiled.h" -#include "accelerator_kokkos.h" #include "group.h" #include "domain.h" #include "output.h" @@ -530,8 +529,11 @@ void Input::substitute(char *&str, char *&str2, int &max, int &max2, int flag) value = variable->retrieve(var); } - if (value == NULL) error->one(FLERR,"Substitution for illegal variable"); - + if (value == NULL) { + char str[128]; + sprintf(str,"Substitution for illegal variable %s",var); + error->one(FLERR,str); + } // check if storage in str2 needs to be expanded // re-initialize ptr and ptr2 to the point beyond the variable. @@ -1172,6 +1174,7 @@ void Input::print() FILE *fp = NULL; int screenflag = 1; + int universeflag = 0; int iarg = 1; while (iarg < narg) { @@ -1194,6 +1197,12 @@ void Input::print() else if (strcmp(arg[iarg+1],"no") == 0) screenflag = 0; else error->all(FLERR,"Illegal print command"); iarg += 2; + } else if (strcmp(arg[iarg],"universe") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal print command"); + if (strcmp(arg[iarg+1],"yes") == 0) universeflag = 1; + else if (strcmp(arg[iarg+1],"no") == 0) universeflag = 0; + else error->all(FLERR,"Illegal print command"); + iarg += 2; } else error->all(FLERR,"Illegal print command"); } @@ -1205,6 +1214,10 @@ void Input::print() fclose(fp); } } + if (universeflag && (universe->me == 0)) { + if (universe->uscreen) fprintf(universe->uscreen, "%s\n",line); + if (universe->ulogfile) fprintf(universe->ulogfile,"%s\n",line); + } } /* ---------------------------------------------------------------------- */ diff --git a/src/kspace.h b/src/kspace.h index ad29c214728f56eb0eec5739aeac18b076c752b9..5a2e5b78840456b2b4b555ef9f7f8eb47d70b66c 100644 --- a/src/kspace.h +++ b/src/kspace.h @@ -15,7 +15,6 @@ #define LMP_KSPACE_H #include "pointers.h" -#include "accelerator_kokkos.h" #ifdef FFT_SINGLE typedef float FFT_SCALAR; @@ -124,11 +123,6 @@ class KSpace : protected Pointers { virtual void pack_reverse(int, FFT_SCALAR *, int, int *) {}; virtual void unpack_reverse(int, FFT_SCALAR *, int, int *) {}; - virtual void pack_forward_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_forward_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void pack_reverse_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_reverse_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual int timing(int, double &, double &) {return 0;} virtual int timing_1d(int, double &) {return 0;} virtual int timing_3d(int, double &) {return 0;} diff --git a/src/lammps.h b/src/lammps.h index c432784a0b14c92f571dc032040f22e35ec30f96..e3b815e6237a7ad660b90b97ee7c65b64db858db 100644 --- a/src/lammps.h +++ b/src/lammps.h @@ -53,6 +53,7 @@ class LAMMPS { class KokkosLMP *kokkos; // KOKKOS accelerator class class AtomKokkos *atomKK; // KOKKOS version of Atom class + class MemoryKokkos *memoryKK; // KOKKOS version of Memory class class Python * python; // Python interface diff --git a/src/memory.h b/src/memory.h index b83482e4c2b69c9d11259e7c5d1ea73ba7ba077b..f2faecf6e177587e4aaf9dfc14991c6ef4f94f58 100644 --- a/src/memory.h +++ b/src/memory.h @@ -16,9 +16,6 @@ #include "lmptype.h" #include "pointers.h" -#ifdef LMP_KOKKOS -#include "kokkos_type.h" -#endif namespace LAMMPS_NS { @@ -31,16 +28,6 @@ class Memory : protected Pointers { void sfree(void *); void fail(const char *); - // Kokkos memory allocation functions - // provide a dummy prototpye for any Kokkos memory function - // called in main LAMMPS even when not built with KOKKOS package - -#ifdef LMP_KOKKOS -#include "memory_kokkos.h" -#else - void grow_kokkos(tagint **, tagint **, int, int, const char*) {} -#endif - /* ---------------------------------------------------------------------- create/grow/destroy vecs and multidim arrays with contiguous memory blocks only use with primitive data types, e.g. 1d vec of ints, 2d array of doubles diff --git a/src/pair.h b/src/pair.h index eb71e8822474aed87e024988e27b669196385cd9..cfb6576653c3aca4d21dfa99a0a6feda78a4977a 100644 --- a/src/pair.h +++ b/src/pair.h @@ -15,7 +15,6 @@ #define LMP_PAIR_H #include "pointers.h" -#include "accelerator_kokkos.h" namespace LAMMPS_NS { @@ -165,10 +164,6 @@ class Pair : protected Pointers { virtual int pack_forward_comm(int, int *, double *, int, int *) {return 0;} virtual void unpack_forward_comm(int, int, double *) {} - virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, - int, DAT::tdual_xfloat_1d &, - int, int *) {return 0;}; - virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {} virtual int pack_reverse_comm(int, int, double *) {return 0;} virtual void unpack_reverse_comm(int, int *, double *) {} virtual double memory_usage(); diff --git a/src/pair_yukawa.cpp b/src/pair_yukawa.cpp index 2ba6633d9e21719f8c06beccf263c88ac438110e..9be9237734343588dda84aceb4d650c067c8f525 100644 --- a/src/pair_yukawa.cpp +++ b/src/pair_yukawa.cpp @@ -139,7 +139,6 @@ void PairYukawa::allocate() setflag[i][j] = 0; memory->create(cutsq,n+1,n+1,"pair:cutsq"); - memory->create(rad,n+1,"pair:rad"); memory->create(cut,n+1,n+1,"pair:cut"); memory->create(a,n+1,n+1,"pair:a"); diff --git a/src/pair_yukawa.h b/src/pair_yukawa.h index 3859d163af5dd7a6843c7b5a178a53722691abf6..3222019a0ae94d4b4a9ccb4ca95fdcf3519ef59b 100644 --- a/src/pair_yukawa.h +++ b/src/pair_yukawa.h @@ -46,7 +46,7 @@ class PairYukawa : public Pair { double *rad; double **cut,**a,**offset; - void allocate(); + virtual void allocate(); }; } diff --git a/src/pointers.h b/src/pointers.h index 82b49c1dad05e6988ec02bd1b5646ee2078b6442..44967f51359a28067fee329362a65c8b3a8920dd 100644 --- a/src/pointers.h +++ b/src/pointers.h @@ -57,6 +57,7 @@ class Pointers { screen(ptr->screen), logfile(ptr->logfile), atomKK(ptr->atomKK), + memoryKK(ptr->memoryKK), python(ptr->python) {} virtual ~Pointers() {} @@ -84,6 +85,7 @@ class Pointers { FILE *&logfile; class AtomKokkos *&atomKK; + class MemoryKokkos *&memoryKK; class Python *&python; }; diff --git a/src/region.cpp b/src/region.cpp index d2ef481cb79595ec51d22380cff51b95f12c4d77..da814746ad24ef686a7224e9df7ecad8570a0bd0 100644 --- a/src/region.cpp +++ b/src/region.cpp @@ -142,15 +142,6 @@ int Region::match(double x, double y, double z) return !(inside(x,y,z) ^ interior); } -/* ---------------------------------------------------------------------- - generate error if Kokkos function defaults to base class -------------------------------------------------------------------------- */ - -void Region::match_all_kokkos(int, DAT::tdual_int_1d) -{ - error->all(FLERR,"Can only use Kokkos supported regions with Kokkos package"); -} - /* ---------------------------------------------------------------------- generate list of contact points for interior or exterior regions if region has variable shape, invoke shape_update() once per timestep diff --git a/src/region.h b/src/region.h index 5b4238acb4b071e10de309f04c6d3af2083a4e0e..7e8c45cb2ea91360ceffdf3dfe0b735a61b0684c 100644 --- a/src/region.h +++ b/src/region.h @@ -15,7 +15,6 @@ #define LMP_REGION_H #include "pointers.h" -#include "accelerator_kokkos.h" namespace LAMMPS_NS { @@ -97,10 +96,6 @@ class Region : protected Pointers { virtual void set_velocity_shape() {} virtual void velocity_contact_shape(double*, double*) {} - // Kokkos function, implemented by each Kokkos region - - virtual void match_all_kokkos(int, DAT::tdual_int_1d); - protected: void add_contact(int, double *, double, double, double); void options(int, char **); diff --git a/src/replicate.cpp b/src/replicate.cpp index f3d196416917cab8be9080243ea9d919abdebe74..1251688211950d97bbc62dd497b40cb14e6f8162 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -44,7 +44,7 @@ void Replicate::command(int narg, char **arg) if (domain->box_exist == 0) error->all(FLERR,"Replicate command before simulation box is defined"); - if (narg != 3) error->all(FLERR,"Illegal replicate command"); + if (narg < 3 || narg > 4) error->all(FLERR,"Illegal replicate command"); int me = comm->me; int nprocs = comm->nprocs; @@ -58,6 +58,10 @@ void Replicate::command(int narg, char **arg) int nz = force->inumeric(FLERR,arg[2]); int nrep = nx*ny*nz; + int bbox_flag = 0; + if (narg == 4) + if (strcmp(arg[3],"bbox") == 0) bbox_flag = 1; + // error and warning checks if (nx <= 0 || ny <= 0 || nz <= 0) @@ -99,6 +103,37 @@ void Replicate::command(int narg, char **arg) maxmol = maxmol_all; } + // check image flags maximum extent; only efficient small image flags compared to new system + + int _imagelo[3], _imagehi[3]; + _imagelo[0] = 0; + _imagelo[1] = 0; + _imagelo[2] = 0; + _imagehi[0] = 0; + _imagehi[1] = 0; + _imagehi[2] = 0; + + if (bbox_flag) { + + for (i=0; i<atom->nlocal; ++i) { + imageint image = atom->image[i]; + int xbox = (image & IMGMASK) - IMGMAX; + int ybox = (image >> IMGBITS & IMGMASK) - IMGMAX; + int zbox = (image >> IMG2BITS) - IMGMAX; + + if (xbox < _imagelo[0]) _imagelo[0] = xbox; + if (ybox < _imagelo[1]) _imagelo[1] = ybox; + if (zbox < _imagelo[2]) _imagelo[2] = zbox; + + if (xbox > _imagehi[0]) _imagehi[0] = xbox; + if (ybox > _imagehi[1]) _imagehi[1] = ybox; + if (zbox > _imagehi[2]) _imagehi[2] = zbox; + } + + MPI_Allreduce(MPI_IN_PLACE, &(_imagelo[0]), 3, MPI_INT, MPI_MIN, world); + MPI_Allreduce(MPI_IN_PLACE, &(_imagehi[0]), 3, MPI_INT, MPI_MAX, world); + } + // unmap existing atoms via image flags for (i = 0; i < atom->nlocal; i++) @@ -280,93 +315,392 @@ void Replicate::command(int narg, char **arg) double *coord; int tag_enable = atom->tag_enable; - for (int iproc = 0; iproc < nprocs; iproc++) { - if (me == iproc) { - n = 0; - for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]); + if (bbox_flag) { + + // allgather size of buf on each proc + + n = 0; + for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]); + + int * size_buf_rnk; + memory->create(size_buf_rnk, nprocs, "replicate:size_buf_rnk"); + + MPI_Allgather(&n, 1, MPI_INT, size_buf_rnk, 1, MPI_INT, world); + + // size of buf_all + + int size_buf_all = 0; + MPI_Allreduce(&n, &size_buf_all, 1, MPI_INT, MPI_SUM, world); + + if (me == 0 && screen) { + fprintf(screen," bounding box image = (%i %i %i) to (%i %i %i)\n", + _imagelo[0],_imagelo[1],_imagelo[2],_imagehi[0],_imagehi[1],_imagehi[2]); + fprintf(screen," bounding box extra memory = %.2f MB\n", + (double)size_buf_all*sizeof(double)/1024/1024); } - MPI_Bcast(&n,1,MPI_INT,iproc,world); - MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world); + + // rnk offsets + + int * disp_buf_rnk; + memory->create(disp_buf_rnk, nprocs, "replicate:disp_buf_rnk"); + disp_buf_rnk[0] = 0; + for (i=1; i<nprocs; ++i) disp_buf_rnk[i] = disp_buf_rnk[i-1] + size_buf_rnk[i-1]; + + // allgather buf_all + + double * buf_all; + memory->create(buf_all, size_buf_all, "replicate:buf_all"); + + MPI_Allgatherv(buf, n, MPI_DOUBLE, buf_all, size_buf_rnk, disp_buf_rnk, MPI_DOUBLE, world); + + // bounding box of original unwrapped system + + double _orig_lo[3], _orig_hi[3]; + if (triclinic) { + _orig_lo[0] = domain->boxlo[0] + _imagelo[0] * old_xprd + _imagelo[1] * old_xy + _imagelo[2] * old_xz; + _orig_lo[1] = domain->boxlo[1] + _imagelo[1] * old_yprd + _imagelo[2] * old_yz; + _orig_lo[2] = domain->boxlo[2] + _imagelo[2] * old_zprd; + + _orig_hi[0] = domain->boxlo[0] + (_imagehi[0]+1) * old_xprd + (_imagehi[1]+1) * old_xy + (_imagehi[2]+1) * old_xz; + _orig_hi[1] = domain->boxlo[1] + (_imagehi[1]+1) * old_yprd + (_imagehi[2]+1) * old_yz; + _orig_hi[2] = domain->boxlo[2] + (_imagehi[2]+1) * old_zprd; + } else { + _orig_lo[0] = domain->boxlo[0] + _imagelo[0] * old_xprd; + _orig_lo[1] = domain->boxlo[1] + _imagelo[1] * old_yprd; + _orig_lo[2] = domain->boxlo[2] + _imagelo[2] * old_zprd; + + _orig_hi[0] = domain->boxlo[0] + (_imagehi[0]+1) * old_xprd; + _orig_hi[1] = domain->boxlo[1] + (_imagehi[1]+1) * old_yprd; + _orig_hi[2] = domain->boxlo[2] + (_imagehi[2]+1) * old_zprd; + } + + double _lo[3], _hi[3]; + + int num_replicas_added = 0; for (ix = 0; ix < nx; ix++) { for (iy = 0; iy < ny; iy++) { for (iz = 0; iz < nz; iz++) { - // while loop over one proc's atom list + // domain->remap() overwrites coordinates, so always recompute here + + if (triclinic) { + _lo[0] = _orig_lo[0] + ix * old_xprd + iy * old_xy + iz * old_xz; + _hi[0] = _orig_hi[0] + ix * old_xprd + iy * old_xy + iz * old_xz; + + _lo[1] = _orig_lo[1] + iy * old_yprd + iz * old_yz; + _hi[1] = _orig_hi[1] + iy * old_yprd + iz * old_yz; + + _lo[2] = _orig_lo[2] + iz * old_zprd; + _hi[2] = _orig_hi[2] + iz * old_zprd; + } else { + _lo[0] = _orig_lo[0] + ix * old_xprd; + _hi[0] = _orig_hi[0] + ix * old_xprd; + + _lo[1] = _orig_lo[1] + iy * old_yprd; + _hi[1] = _orig_hi[1] + iy * old_yprd; - m = 0; - while (m < n) { - image = ((imageint) IMGMAX << IMG2BITS) | + _lo[2] = _orig_lo[2] + iz * old_zprd; + _hi[2] = _orig_hi[2] + iz * old_zprd; + } + + // test if bounding box of shifted replica overlaps sub-domain of proc + // if not, then skip testing atoms + + int xoverlap = 1; + int yoverlap = 1; + int zoverlap = 1; + if (triclinic) { + double _llo[3]; + domain->x2lamda(_lo,_llo); + double _lhi[3]; + domain->x2lamda(_hi,_lhi); + + if (_llo[0] > (subhi[0] - EPSILON) + || _lhi[0] < (sublo[0] + EPSILON) ) xoverlap = 0; + if (_llo[1] > (subhi[1] - EPSILON) + || _lhi[1] < (sublo[1] + EPSILON) ) yoverlap = 0; + if (_llo[2] > (subhi[2] - EPSILON) + || _lhi[2] < (sublo[2] + EPSILON) ) zoverlap = 0; + } else { + if (_lo[0] > (subhi[0] - EPSILON) + || _hi[0] < (sublo[0] + EPSILON) ) xoverlap = 0; + if (_lo[1] > (subhi[1] - EPSILON) + || _hi[1] < (sublo[1] + EPSILON) ) yoverlap = 0; + if (_lo[2] > (subhi[2] - EPSILON) + || _hi[2] < (sublo[2] + EPSILON) ) zoverlap = 0; + } + + int overlap = 0; + if (xoverlap && yoverlap && zoverlap) overlap = 1; + + // if no overlap, test if bounding box wrapped back into new system + + if (!overlap) { + + // wrap back into cell + + imageint imagelo = ((imageint) IMGMAX << IMG2BITS) | ((imageint) IMGMAX << IMGBITS) | IMGMAX; - if (triclinic == 0) { - x[0] = buf[m+1] + ix*old_xprd; - x[1] = buf[m+2] + iy*old_yprd; - x[2] = buf[m+3] + iz*old_zprd; - } else { - x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz; - x[1] = buf[m+2] + iy*old_yprd + iz*old_yz; - x[2] = buf[m+3] + iz*old_zprd; - } - domain->remap(x,image); + domain->remap(&(_lo[0]), imagelo); + int xboxlo = (imagelo & IMGMASK) - IMGMAX; + int yboxlo = (imagelo >> IMGBITS & IMGMASK) - IMGMAX; + int zboxlo = (imagelo >> IMG2BITS) - IMGMAX; + + imageint imagehi = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + domain->remap(&(_hi[0]), imagehi); + int xboxhi = (imagehi & IMGMASK) - IMGMAX; + int yboxhi = (imagehi >> IMGBITS & IMGMASK) - IMGMAX; + int zboxhi = (imagehi >> IMG2BITS) - IMGMAX; + if (triclinic) { - domain->x2lamda(x,lamda); - coord = lamda; - } else coord = x; - - if (coord[0] >= sublo[0] && coord[0] < subhi[0] && - coord[1] >= sublo[1] && coord[1] < subhi[1] && - coord[2] >= sublo[2] && coord[2] < subhi[2]) { - - m += avec->unpack_restart(&buf[m]); - - i = atom->nlocal - 1; - if (tag_enable) - atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag; - else atom_offset = 0; - mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol; - - atom->x[i][0] = x[0]; - atom->x[i][1] = x[1]; - atom->x[i][2] = x[2]; - - atom->tag[i] += atom_offset; - atom->image[i] = image; - - if (atom->molecular) { - if (atom->molecule[i] > 0) - atom->molecule[i] += mol_offset; - if (atom->molecular == 1) { - if (atom->avec->bonds_allow) - for (j = 0; j < atom->num_bond[i]; j++) - atom->bond_atom[i][j] += atom_offset; - if (atom->avec->angles_allow) - for (j = 0; j < atom->num_angle[i]; j++) { - atom->angle_atom1[i][j] += atom_offset; - atom->angle_atom2[i][j] += atom_offset; - atom->angle_atom3[i][j] += atom_offset; - } - if (atom->avec->dihedrals_allow) - for (j = 0; j < atom->num_dihedral[i]; j++) { - atom->dihedral_atom1[i][j] += atom_offset; - atom->dihedral_atom2[i][j] += atom_offset; - atom->dihedral_atom3[i][j] += atom_offset; - atom->dihedral_atom4[i][j] += atom_offset; - } - if (atom->avec->impropers_allow) - for (j = 0; j < atom->num_improper[i]; j++) { - atom->improper_atom1[i][j] += atom_offset; - atom->improper_atom2[i][j] += atom_offset; - atom->improper_atom3[i][j] += atom_offset; - atom->improper_atom4[i][j] += atom_offset; - } + double _llo[3]; + _llo[0] = _lo[0]; _llo[1] = _lo[1]; _llo[2] = _lo[2]; + domain->x2lamda(_llo,_lo); + + double _lhi[3]; + _lhi[0] = _hi[0]; _lhi[1] = _hi[1]; _lhi[2] = _hi[2]; + domain->x2lamda(_lhi,_hi); + } + + // test all fragments for any overlap; ok to include false positives + + int _xoverlap1 = 0; + int _xoverlap2 = 0; + if (!xoverlap) { + if (xboxlo < 0) { + _xoverlap1 = 1; + if ( _lo[0] > (subhi[0] - EPSILON) ) _xoverlap1 = 0; + } + + if (xboxhi > 0) { + _xoverlap2 = 1; + if ( _hi[0] < (sublo[0] + EPSILON) ) _xoverlap2 = 0; + } + + if (_xoverlap1 || _xoverlap2) xoverlap = 1; + } + + int _yoverlap1 = 0; + int _yoverlap2 = 0; + if (!yoverlap) { + if (yboxlo < 0) { + _yoverlap1 = 1; + if ( _lo[1] > (subhi[1] - EPSILON) ) _yoverlap1 = 0; + } + + if (yboxhi > 0) { + _yoverlap2 = 1; + if ( _hi[1] < (sublo[1] + EPSILON) ) _yoverlap2 = 0; + } + + if (_yoverlap1 || _yoverlap2) yoverlap = 1; + } + + + int _zoverlap1 = 0; + int _zoverlap2 = 0; + if (!zoverlap) { + if (zboxlo < 0) { + _zoverlap1 = 1; + if ( _lo[2] > (subhi[2] - EPSILON) ) _zoverlap1 = 0; + } + + if (zboxhi > 0) { + _zoverlap2 = 1; + if ( _hi[2] < (sublo[2] + EPSILON) ) _zoverlap2 = 0; + } + + if (_zoverlap1 || _zoverlap2) zoverlap = 1; + } + + // does either fragment overlap w/ sub-domain + + if (xoverlap && yoverlap && zoverlap) overlap = 1; + } + + // while loop over one proc's atom list + + if (overlap) { + num_replicas_added++; + + m = 0; + while (m < size_buf_all) { + image = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + if (triclinic == 0) { + x[0] = buf_all[m+1] + ix*old_xprd; + x[1] = buf_all[m+2] + iy*old_yprd; + x[2] = buf_all[m+3] + iz*old_zprd; + } else { + x[0] = buf_all[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz; + x[1] = buf_all[m+2] + iy*old_yprd + iz*old_yz; + x[2] = buf_all[m+3] + iz*old_zprd; + } + domain->remap(x,image); + if (triclinic) { + domain->x2lamda(x,lamda); + coord = lamda; + } else coord = x; + + if (coord[0] >= sublo[0] && coord[0] < subhi[0] && + coord[1] >= sublo[1] && coord[1] < subhi[1] && + coord[2] >= sublo[2] && coord[2] < subhi[2]) { + + m += avec->unpack_restart(&buf_all[m]); + + i = atom->nlocal - 1; + if (tag_enable) + atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag; + else atom_offset = 0; + mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol; + + atom->x[i][0] = x[0]; + atom->x[i][1] = x[1]; + atom->x[i][2] = x[2]; + + atom->tag[i] += atom_offset; + atom->image[i] = image; + + if (atom->molecular) { + if (atom->molecule[i] > 0) + atom->molecule[i] += mol_offset; + if (atom->molecular == 1) { + if (atom->avec->bonds_allow) + for (j = 0; j < atom->num_bond[i]; j++) + atom->bond_atom[i][j] += atom_offset; + if (atom->avec->angles_allow) + for (j = 0; j < atom->num_angle[i]; j++) { + atom->angle_atom1[i][j] += atom_offset; + atom->angle_atom2[i][j] += atom_offset; + atom->angle_atom3[i][j] += atom_offset; + } + if (atom->avec->dihedrals_allow) + for (j = 0; j < atom->num_dihedral[i]; j++) { + atom->dihedral_atom1[i][j] += atom_offset; + atom->dihedral_atom2[i][j] += atom_offset; + atom->dihedral_atom3[i][j] += atom_offset; + atom->dihedral_atom4[i][j] += atom_offset; + } + if (atom->avec->impropers_allow) + for (j = 0; j < atom->num_improper[i]; j++) { + atom->improper_atom1[i][j] += atom_offset; + atom->improper_atom2[i][j] += atom_offset; + atom->improper_atom3[i][j] += atom_offset; + atom->improper_atom4[i][j] += atom_offset; + } + } } + } else m += static_cast<int> (buf_all[m]); + } + } // if (overlap) + + } + } + } + + memory->destroy(size_buf_rnk); + memory->destroy(disp_buf_rnk); + memory->destroy(buf_all); + + int sum = 0; + MPI_Reduce(&num_replicas_added, &sum, 1, MPI_INT, MPI_SUM, 0, world); + double avg = (double) sum / nprocs; + if (me == 0 && screen) + fprintf(screen," average # of replicas added to proc = %.2f out of %i (%.2f %%)\n", + avg,nx*ny*nz,avg/(nx*ny*nz)*100.0); + + } else { + + for (int iproc = 0; iproc < nprocs; iproc++) { + if (me == iproc) { + n = 0; + for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]); + } + MPI_Bcast(&n,1,MPI_INT,iproc,world); + MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world); + + for (ix = 0; ix < nx; ix++) { + for (iy = 0; iy < ny; iy++) { + for (iz = 0; iz < nz; iz++) { + + // while loop over one proc's atom list + + m = 0; + while (m < n) { + image = ((imageint) IMGMAX << IMG2BITS) | + ((imageint) IMGMAX << IMGBITS) | IMGMAX; + if (triclinic == 0) { + x[0] = buf[m+1] + ix*old_xprd; + x[1] = buf[m+2] + iy*old_yprd; + x[2] = buf[m+3] + iz*old_zprd; + } else { + x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz; + x[1] = buf[m+2] + iy*old_yprd + iz*old_yz; + x[2] = buf[m+3] + iz*old_zprd; } - } else m += static_cast<int> (buf[m]); + domain->remap(x,image); + if (triclinic) { + domain->x2lamda(x,lamda); + coord = lamda; + } else coord = x; + + if (coord[0] >= sublo[0] && coord[0] < subhi[0] && + coord[1] >= sublo[1] && coord[1] < subhi[1] && + coord[2] >= sublo[2] && coord[2] < subhi[2]) { + + m += avec->unpack_restart(&buf[m]); + + i = atom->nlocal - 1; + if (tag_enable) + atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag; + else atom_offset = 0; + mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol; + + atom->x[i][0] = x[0]; + atom->x[i][1] = x[1]; + atom->x[i][2] = x[2]; + + atom->tag[i] += atom_offset; + atom->image[i] = image; + + if (atom->molecular) { + if (atom->molecule[i] > 0) + atom->molecule[i] += mol_offset; + if (atom->molecular == 1) { + if (atom->avec->bonds_allow) + for (j = 0; j < atom->num_bond[i]; j++) + atom->bond_atom[i][j] += atom_offset; + if (atom->avec->angles_allow) + for (j = 0; j < atom->num_angle[i]; j++) { + atom->angle_atom1[i][j] += atom_offset; + atom->angle_atom2[i][j] += atom_offset; + atom->angle_atom3[i][j] += atom_offset; + } + if (atom->avec->dihedrals_allow) + for (j = 0; j < atom->num_dihedral[i]; j++) { + atom->dihedral_atom1[i][j] += atom_offset; + atom->dihedral_atom2[i][j] += atom_offset; + atom->dihedral_atom3[i][j] += atom_offset; + atom->dihedral_atom4[i][j] += atom_offset; + } + if (atom->avec->impropers_allow) + for (j = 0; j < atom->num_improper[i]; j++) { + atom->improper_atom1[i][j] += atom_offset; + atom->improper_atom2[i][j] += atom_offset; + atom->improper_atom3[i][j] += atom_offset; + atom->improper_atom4[i][j] += atom_offset; + } + } + } + } else m += static_cast<int> (buf[m]); + } } } } } - } + } // if (bbox_flag) // free communication buffer and old atom class diff --git a/src/set.cpp b/src/set.cpp index 2b1c0edee2da90faaf506c9ce6976ab3f49a325f..11b91df4c4b8f76000b8d59ed5267dd0b2242610 100644 --- a/src/set.cpp +++ b/src/set.cpp @@ -36,6 +36,7 @@ #include "math_const.h" #include "memory.h" #include "error.h" +#include "modify.h" using namespace LAMMPS_NS; using namespace MathConst; diff --git a/src/special.cpp b/src/special.cpp index 381a763dd29ad9cec5254e8556763f0967d826ac..56529e748b8d52e070696543627bfba255b87e0a 100644 --- a/src/special.cpp +++ b/src/special.cpp @@ -591,7 +591,8 @@ void Special::combine() AtomKokkos* atomKK = (AtomKokkos*) atom; atomKK->modified(Host,SPECIAL_MASK); atomKK->sync(Device,SPECIAL_MASK); - memory->grow_kokkos(atomKK->k_special,atom->special, + MemoryKokkos* memoryKK = (MemoryKokkos*) memory; + memoryKK->grow_kokkos(atomKK->k_special,atom->special, atom->nmax,atom->maxspecial,"atom:special"); atomKK->modified(Device,SPECIAL_MASK); atomKK->sync(Host,SPECIAL_MASK); diff --git a/src/write_coeff.cpp b/src/write_coeff.cpp index d4d4f99bf388c597f9ef5b674b06ec07a933871d..0556d647cf36d1046f4864002f2e6fa5e5d656f5 100644 --- a/src/write_coeff.cpp +++ b/src/write_coeff.cpp @@ -23,6 +23,7 @@ #include "force.h" #include "universe.h" #include "error.h" +#include "domain.h" using namespace LAMMPS_NS;