From 5ce9fbe0baf6e71b92a55ca8b6e11ae40d011795 Mon Sep 17 00:00:00 2001 From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa> Date: Mon, 11 Mar 2013 14:53:45 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9626 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/USER-OMP/neighbor_omp.h | 30 ++-- src/USER-OMP/pppm_cg_omp.cpp | 148 ++++++++++-------- src/USER-OMP/pppm_disp_omp.cpp | 215 +++++++++++++++---------- src/USER-OMP/pppm_disp_tip4p_omp.cpp | 224 +++++++++++++++++---------- src/USER-OMP/pppm_omp.cpp | 71 +++++---- src/USER-OMP/pppm_tip4p_omp.cpp | 76 +++++---- 6 files changed, 461 insertions(+), 303 deletions(-) diff --git a/src/USER-OMP/neighbor_omp.h b/src/USER-OMP/neighbor_omp.h index fd27ba8b61..8db58c7e20 100644 --- a/src/USER-OMP/neighbor_omp.h +++ b/src/USER-OMP/neighbor_omp.h @@ -24,31 +24,31 @@ namespace LAMMPS_NS { #if defined(_OPENMP) // make sure we have at least one page for each thread -#define NEIGH_OMP_INIT \ - const int nthreads = comm->nthreads; \ - if (nthreads > list->maxpage) \ +#define NEIGH_OMP_INIT \ + const int nthreads = comm->nthreads; \ + if (nthreads > list->maxpage) \ list->add_pages(nthreads - list->maxpage) // get thread id and then assign each thread a fixed chunk of atoms -#define NEIGH_OMP_SETUP(num) \ - { \ - const int tid = omp_get_thread_num(); \ - const int idelta = 1 + num/nthreads; \ - const int ifrom = tid*idelta; \ - const int ito = ((ifrom + idelta) > num) \ - ? num : (ifrom+idelta); \ +#define NEIGH_OMP_SETUP(num) \ + { \ + const int tid = omp_get_thread_num(); \ + const int idelta = 1 + num/nthreads; \ + const int ifrom = tid*idelta; \ + const int ito = ((ifrom + idelta) > num) \ + ? num : (ifrom+idelta); #define NEIGH_OMP_CLOSE } #else /* !defined(_OPENMP) */ -#define NEIGH_OMP_INIT \ +#define NEIGH_OMP_INIT \ const int nthreads = comm->nthreads; -#define NEIGH_OMP_SETUP(num) \ - const int tid = 0; \ - const int ifrom = 0; \ - const int ito = num +#define NEIGH_OMP_SETUP(num) \ + const int tid = 0; \ + const int ifrom = 0; \ + const int ito = num #define NEIGH_OMP_CLOSE diff --git a/src/USER-OMP/pppm_cg_omp.cpp b/src/USER-OMP/pppm_cg_omp.cpp index 5f1b5e1604..d5ebf21e14 100644 --- a/src/USER-OMP/pppm_cg_omp.cpp +++ b/src/USER-OMP/pppm_cg_omp.cpp @@ -338,6 +338,10 @@ void PPPMCGOMP::make_rho() FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); memset(d,0,ngrid*sizeof(FFT_SCALAR)); + // no local atoms with a charge => nothing else to do + + if (num_charged == 0) return; + const int ix = nxhi_out - nxlo_out + 1; const int iy = nyhi_out - nylo_out + 1; @@ -377,7 +381,7 @@ void PPPMCGOMP::make_rho() // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) - || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; + || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv; const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv; @@ -388,23 +392,23 @@ void PPPMCGOMP::make_rho() const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; ++n) { - const int jn = (nz+n-nzlo_out)*ix*iy; - const FFT_SCALAR y0 = z0*r1d[2][n]; - - for (int m = nlower; m <= nupper; ++m) { - const int jm = jn+(ny+m-nylo_out)*ix; - const FFT_SCALAR x0 = y0*r1d[1][m]; - - for (int l = nlower; l <= nupper; ++l) { - const int jl = jm+nx+l-nxlo_out; - // make sure each thread only updates - // "his" elements of the density grid - if (jl >= jto) break; - if (jl < jfrom) continue; - - d[jl] += x0*r1d[0][l]; - } - } + const int jn = (nz+n-nzlo_out)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower; m <= nupper; ++m) { + const int jm = jn+(ny+m-nylo_out)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower; l <= nupper; ++l) { + const int jl = jm+nx+l-nxlo_out; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } } } } @@ -416,6 +420,10 @@ void PPPMCGOMP::make_rho() void PPPMCGOMP::fieldforce_ik() { + // no local atoms with a charge => nothing to do + + if (num_charged == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -455,19 +463,19 @@ void PPPMCGOMP::fieldforce_ik() ekx = eky = ekz = ZEROF; for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = r1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*r1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*r1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } } // convert E-field to force @@ -486,6 +494,10 @@ void PPPMCGOMP::fieldforce_ik() void PPPMCGOMP::fieldforce_ad() { + // no local atoms with a charge => nothing to do + + if (num_charged == 0) return; + const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda; const double hx_inv = nx_pppm/prd[0]; const double hy_inv = ny_pppm/prd[1]; @@ -533,16 +545,16 @@ void PPPMCGOMP::fieldforce_ad() ekx = eky = ekz = ZEROF; for (n = nlower; n <= nupper; n++) { - mz = n+nz; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; - eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; - ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx]; - } - } + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; + eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; + ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx]; + } + } } ekx *= hx_inv; eky *= hy_inv; @@ -580,6 +592,10 @@ void PPPMCGOMP::fieldforce_ad() void PPPMCGOMP::fieldforce_peratom() { + // no local atoms with a charge => nothing to do + + if (num_charged == 0) return; + // loop over my charges, interpolate from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -617,36 +633,36 @@ void PPPMCGOMP::fieldforce_peratom() u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = r1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*r1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*r1d[0][l]; - if (eflag_atom) u += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } } const double qi = q[i]; if (eflag_atom) eatom[i] += qi*u; if (vflag_atom) { - vatom[i][0] += qi*v0; - vatom[i][1] += qi*v1; - vatom[i][2] += qi*v2; - vatom[i][3] += qi*v3; - vatom[i][4] += qi*v4; - vatom[i][5] += qi*v5; + vatom[i][0] += qi*v0; + vatom[i][1] += qi*v1; + vatom[i][2] += qi*v2; + vatom[i][3] += qi*v3; + vatom[i][4] += qi*v4; + vatom[i][5] += qi*v5; } } } // end of parallel region diff --git a/src/USER-OMP/pppm_disp_omp.cpp b/src/USER-OMP/pppm_disp_omp.cpp index 6d6ab70a0a..8a571115e7 100644 --- a/src/USER-OMP/pppm_disp_omp.cpp +++ b/src/USER-OMP/pppm_disp_omp.cpp @@ -401,6 +401,11 @@ void PPPMDispOMP::make_rho_c() FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); memset(d,0,ngrid*sizeof(FFT_SCALAR)); + // no local atoms => nothing else to do + + const int nlocal = atom->nlocal; + if (nlocal == 0) return; + const int ix = nxhi_out - nxlo_out + 1; const int iy = nyhi_out - nylo_out + 1; @@ -429,7 +434,6 @@ void PPPMDispOMP::make_rho_c() // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads - const int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; @@ -439,7 +443,7 @@ void PPPMDispOMP::make_rho_c() // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) - || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; + || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv; const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv; @@ -450,23 +454,23 @@ void PPPMDispOMP::make_rho_c() const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; ++n) { - const int jn = (nz+n-nzlo_out)*ix*iy; - const FFT_SCALAR y0 = z0*r1d[2][n]; - - for (int m = nlower; m <= nupper; ++m) { - const int jm = jn+(ny+m-nylo_out)*ix; - const FFT_SCALAR x0 = y0*r1d[1][m]; - - for (int l = nlower; l <= nupper; ++l) { - const int jl = jm+nx+l-nxlo_out; - // make sure each thread only updates - // "his" elements of the density grid - if (jl >= jto) break; - if (jl < jfrom) continue; - - d[jl] += x0*r1d[0][l]; - } - } + const int jn = (nz+n-nzlo_out)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower; m <= nupper; ++m) { + const int jm = jn+(ny+m-nylo_out)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower; l <= nupper; ++l) { + const int jl = jm+nx+l-nxlo_out; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } } } } @@ -485,6 +489,11 @@ void PPPMDispOMP::make_rho_g() FFT_SCALAR * _noalias const d = &(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]); memset(d,0,ngrid_6*sizeof(FFT_SCALAR)); + // no local atoms => nothing else to do + + const int nlocal = atom->nlocal; + if (nlocal == 0) return; + const int ix = nxhi_out_6 - nxlo_out_6 + 1; const int iy = nyhi_out_6 - nylo_out_6 + 1; @@ -512,7 +521,6 @@ void PPPMDispOMP::make_rho_g() // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads - const int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; @@ -522,7 +530,7 @@ void PPPMDispOMP::make_rho_g() // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) - || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; + || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6; const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6; @@ -535,23 +543,23 @@ void PPPMDispOMP::make_rho_g() const FFT_SCALAR z0 = delvolinv_6 * lj; for (int n = nlower_6; n <= nupper_6; ++n) { - const int jn = (nz+n-nzlo_out_6)*ix*iy; - const FFT_SCALAR y0 = z0*r1d[2][n]; - - for (int m = nlower_6; m <= nupper_6; ++m) { - const int jm = jn+(ny+m-nylo_out_6)*ix; - const FFT_SCALAR x0 = y0*r1d[1][m]; - - for (int l = nlower_6; l <= nupper_6; ++l) { - const int jl = jm+nx+l-nxlo_out_6; - // make sure each thread only updates - // "his" elements of the density grid - if (jl >= jto) break; - if (jl < jfrom) continue; - - d[jl] += x0*r1d[0][l]; - } - } + const int jn = (nz+n-nzlo_out_6)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower_6; m <= nupper_6; ++m) { + const int jm = jn+(ny+m-nylo_out_6)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower_6; l <= nupper_6; ++l) { + const int jl = jm+nx+l-nxlo_out_6; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } } } } @@ -583,6 +591,11 @@ void PPPMDispOMP::make_rho_a() memset(d5,0,ngrid_6*sizeof(FFT_SCALAR)); memset(d6,0,ngrid_6*sizeof(FFT_SCALAR)); + // no local atoms => nothing else to do + + const int nlocal = atom->nlocal; + if (nlocal == 0) return; + const int ix = nxhi_out_6 - nxlo_out_6 + 1; const int iy = nyhi_out_6 - nylo_out_6 + 1; @@ -610,7 +623,6 @@ void PPPMDispOMP::make_rho_a() // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads - const int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; @@ -620,7 +632,7 @@ void PPPMDispOMP::make_rho_a() // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) - || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; + || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6; const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6; @@ -640,31 +652,31 @@ void PPPMDispOMP::make_rho_a() const FFT_SCALAR z0 = delvolinv_6; for (int n = nlower_6; n <= nupper_6; ++n) { - const int jn = (nz+n-nzlo_out_6)*ix*iy; - const FFT_SCALAR y0 = z0*r1d[2][n]; + const int jn = (nz+n-nzlo_out_6)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; - for (int m = nlower_6; m <= nupper_6; ++m) { - const int jm = jn+(ny+m-nylo_out_6)*ix; - const FFT_SCALAR x0 = y0*r1d[1][m]; + for (int m = nlower_6; m <= nupper_6; ++m) { + const int jm = jn+(ny+m-nylo_out_6)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; - for (int l = nlower_6; l <= nupper_6; ++l) { - const int jl = jm+nx+l-nxlo_out_6; - // make sure each thread only updates - // "his" elements of the density grid - if (jl >= jto) break; - if (jl < jfrom) continue; + for (int l = nlower_6; l <= nupper_6; ++l) { + const int jl = jm+nx+l-nxlo_out_6; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; const double w = x0*r1d[0][l]; - d0[jl] += w*lj0; - d1[jl] += w*lj1; - d2[jl] += w*lj2; - d3[jl] += w*lj3; - d4[jl] += w*lj4; - d5[jl] += w*lj5; - d6[jl] += w*lj6; - } - } + d0[jl] += w*lj0; + d1[jl] += w*lj1; + d2[jl] += w*lj2; + d3[jl] += w*lj3; + d4[jl] += w*lj4; + d5[jl] += w*lj5; + d6[jl] += w*lj6; + } + } } } } @@ -678,6 +690,13 @@ void PPPMDispOMP::make_rho_a() void PPPMDispOMP::fieldforce_c_ik() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -686,8 +705,6 @@ void PPPMDispOMP::fieldforce_c_ik() const double * const q = atom->q; const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; const double qqrd2e = force->qqrd2e; #if defined(_OPENMP) @@ -761,6 +778,13 @@ void PPPMDispOMP::fieldforce_c_ik() void PPPMDispOMP::fieldforce_c_ad() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -769,8 +793,6 @@ void PPPMDispOMP::fieldforce_c_ad() const double * const q = atom->q; const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; const double qqrd2e = force->qqrd2e; //const double * const sf_c = sf_coeff; @@ -878,6 +900,13 @@ void PPPMDispOMP::fieldforce_c_ad() void PPPMDispOMP::fieldforce_c_peratom() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -885,8 +914,6 @@ void PPPMDispOMP::fieldforce_c_peratom() const double * const q = atom->q; const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; #if defined(_OPENMP) #pragma omp parallel default(none) @@ -970,6 +997,13 @@ void PPPMDispOMP::fieldforce_c_peratom() void PPPMDispOMP::fieldforce_g_ik() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -977,8 +1011,6 @@ void PPPMDispOMP::fieldforce_g_ik() // ek = 3 components of E-field on particle const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; const double qqrd2e = force->qqrd2e; #if defined(_OPENMP) @@ -1055,6 +1087,13 @@ void PPPMDispOMP::fieldforce_g_ik() void PPPMDispOMP::fieldforce_g_ad() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -1062,9 +1101,6 @@ void PPPMDispOMP::fieldforce_g_ad() // ek = 3 components of E-field on particle const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; - double *prd; if (triclinic == 0) prd = domain->prd; @@ -1173,14 +1209,19 @@ void PPPMDispOMP::fieldforce_g_ad() void PPPMDispOMP::fieldforce_g_peratom() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; #if defined(_OPENMP) #pragma omp parallel default(none) @@ -1267,6 +1308,13 @@ void PPPMDispOMP::fieldforce_g_peratom() void PPPMDispOMP::fieldforce_a_ik() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -1274,8 +1322,6 @@ void PPPMDispOMP::fieldforce_a_ik() // ek = 3 components of E-field on particle const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; const double qqrd2e = force->qqrd2e; #if defined(_OPENMP) @@ -1384,6 +1430,13 @@ void PPPMDispOMP::fieldforce_a_ik() void PPPMDispOMP::fieldforce_a_ad() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -1391,9 +1444,6 @@ void PPPMDispOMP::fieldforce_a_ad() // ek = 3 components of E-field on particle const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; - double *prd; if (triclinic == 0) prd = domain->prd; @@ -1569,14 +1619,19 @@ void PPPMDispOMP::fieldforce_a_ad() void PPPMDispOMP::fieldforce_a_peratom() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; #if defined(_OPENMP) #pragma omp parallel default(none) diff --git a/src/USER-OMP/pppm_disp_tip4p_omp.cpp b/src/USER-OMP/pppm_disp_tip4p_omp.cpp index bbb96228a3..9532a2d61b 100644 --- a/src/USER-OMP/pppm_disp_tip4p_omp.cpp +++ b/src/USER-OMP/pppm_disp_tip4p_omp.cpp @@ -337,6 +337,9 @@ void PPPMDispTIP4POMP::particle_map_c(double dxinv, double dyinv, int nxhi_o, int nyhi_o, int nzhi_o) { + // no local atoms => nothing to do + if (atom->nlocal == 0) return; + const int * _noalias const type = atom->type; const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; int3_t * _noalias const p2g = (int3_t *) part2grid[0]; @@ -411,6 +414,9 @@ void PPPMDispTIP4POMP::particle_map(double dxinv, double dyinv, int nxhi_o, int nyhi_o, int nzhi_o) { + // no local atoms => nothing to do + if (atom->nlocal == 0) return; + const int * _noalias const type = atom->type; const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; int3_t * _noalias const p2g = (int3_t *) part2grid[0]; @@ -478,6 +484,11 @@ void PPPMDispTIP4POMP::make_rho_c() FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); memset(d,0,ngrid*sizeof(FFT_SCALAR)); + // no local atoms => nothing else to do + + const int nlocal = atom->nlocal; + if (nlocal == 0) return; + const int ix = nxhi_out - nxlo_out + 1; const int iy = nyhi_out - nylo_out + 1; @@ -508,7 +519,6 @@ void PPPMDispTIP4POMP::make_rho_c() // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads - const int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; @@ -518,13 +528,13 @@ void PPPMDispTIP4POMP::make_rho_c() // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) - || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; + || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; - if (type[i] == typeO) { - find_M_thr(i,iH1,iH2,xM); - } else { - xM = x[i]; - } + if (type[i] == typeO) { + find_M_thr(i,iH1,iH2,xM); + } else { + xM = x[i]; + } const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv; const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv; const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv; @@ -534,23 +544,23 @@ void PPPMDispTIP4POMP::make_rho_c() const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; ++n) { - const int jn = (nz+n-nzlo_out)*ix*iy; - const FFT_SCALAR y0 = z0*r1d[2][n]; - - for (int m = nlower; m <= nupper; ++m) { - const int jm = jn+(ny+m-nylo_out)*ix; - const FFT_SCALAR x0 = y0*r1d[1][m]; - - for (int l = nlower; l <= nupper; ++l) { - const int jl = jm+nx+l-nxlo_out; - // make sure each thread only updates - // "his" elements of the density grid - if (jl >= jto) break; - if (jl < jfrom) continue; - - d[jl] += x0*r1d[0][l]; - } - } + const int jn = (nz+n-nzlo_out)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower; m <= nupper; ++m) { + const int jm = jn+(ny+m-nylo_out)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower; l <= nupper; ++l) { + const int jl = jm+nx+l-nxlo_out; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } } } } @@ -569,6 +579,11 @@ void PPPMDispTIP4POMP::make_rho_g() FFT_SCALAR * _noalias const d = &(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]); memset(d,0,ngrid_6*sizeof(FFT_SCALAR)); + // no local atoms => nothing else to do + + const int nlocal = atom->nlocal; + if (nlocal == 0) return; + const int ix = nxhi_out_6 - nxlo_out_6 + 1; const int iy = nyhi_out_6 - nylo_out_6 + 1; @@ -596,7 +611,6 @@ void PPPMDispTIP4POMP::make_rho_g() // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads - const int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; @@ -606,7 +620,7 @@ void PPPMDispTIP4POMP::make_rho_g() // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) - || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; + || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6; const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6; @@ -619,23 +633,23 @@ void PPPMDispTIP4POMP::make_rho_g() const FFT_SCALAR z0 = delvolinv_6 * lj; for (int n = nlower_6; n <= nupper_6; ++n) { - const int jn = (nz+n-nzlo_out_6)*ix*iy; - const FFT_SCALAR y0 = z0*r1d[2][n]; - - for (int m = nlower_6; m <= nupper_6; ++m) { - const int jm = jn+(ny+m-nylo_out_6)*ix; - const FFT_SCALAR x0 = y0*r1d[1][m]; - - for (int l = nlower_6; l <= nupper_6; ++l) { - const int jl = jm+nx+l-nxlo_out_6; - // make sure each thread only updates - // "his" elements of the density grid - if (jl >= jto) break; - if (jl < jfrom) continue; - - d[jl] += x0*r1d[0][l]; - } - } + const int jn = (nz+n-nzlo_out_6)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower_6; m <= nupper_6; ++m) { + const int jm = jn+(ny+m-nylo_out_6)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower_6; l <= nupper_6; ++l) { + const int jl = jm+nx+l-nxlo_out_6; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } } } } @@ -667,6 +681,11 @@ void PPPMDispTIP4POMP::make_rho_a() memset(d5,0,ngrid_6*sizeof(FFT_SCALAR)); memset(d6,0,ngrid_6*sizeof(FFT_SCALAR)); + // no local atoms => nothing else to do + + const int nlocal = atom->nlocal; + if (nlocal == 0) return; + const int ix = nxhi_out_6 - nxlo_out_6 + 1; const int iy = nyhi_out_6 - nylo_out_6 + 1; @@ -694,7 +713,6 @@ void PPPMDispTIP4POMP::make_rho_a() // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads - const int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; @@ -704,7 +722,7 @@ void PPPMDispTIP4POMP::make_rho_a() // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) - || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; + || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6; const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6; @@ -724,31 +742,31 @@ void PPPMDispTIP4POMP::make_rho_a() const FFT_SCALAR z0 = delvolinv_6; for (int n = nlower_6; n <= nupper_6; ++n) { - const int jn = (nz+n-nzlo_out_6)*ix*iy; - const FFT_SCALAR y0 = z0*r1d[2][n]; + const int jn = (nz+n-nzlo_out_6)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; - for (int m = nlower_6; m <= nupper_6; ++m) { - const int jm = jn+(ny+m-nylo_out_6)*ix; - const FFT_SCALAR x0 = y0*r1d[1][m]; + for (int m = nlower_6; m <= nupper_6; ++m) { + const int jm = jn+(ny+m-nylo_out_6)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; - for (int l = nlower_6; l <= nupper_6; ++l) { - const int jl = jm+nx+l-nxlo_out_6; - // make sure each thread only updates - // "his" elements of the density grid - if (jl >= jto) break; - if (jl < jfrom) continue; + for (int l = nlower_6; l <= nupper_6; ++l) { + const int jl = jm+nx+l-nxlo_out_6; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; const double w = x0*r1d[0][l]; - d0[jl] += w*lj0; - d1[jl] += w*lj1; - d2[jl] += w*lj2; - d3[jl] += w*lj3; - d4[jl] += w*lj4; - d5[jl] += w*lj5; - d6[jl] += w*lj6; - } - } + d0[jl] += w*lj0; + d1[jl] += w*lj1; + d2[jl] += w*lj2; + d3[jl] += w*lj3; + d4[jl] += w*lj4; + d5[jl] += w*lj5; + d6[jl] += w*lj6; + } + } } } } @@ -760,6 +778,13 @@ void PPPMDispTIP4POMP::make_rho_a() void PPPMDispTIP4POMP::fieldforce_c_ik() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -776,9 +801,6 @@ void PPPMDispTIP4POMP::fieldforce_c_ik() const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; - #if defined(_OPENMP) #pragma omp parallel default(none) #endif @@ -860,6 +882,13 @@ void PPPMDispTIP4POMP::fieldforce_c_ik() void PPPMDispTIP4POMP::fieldforce_c_ad() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda; const double hx_inv = nx_pppm/prd[0]; const double hy_inv = ny_pppm/prd[1]; @@ -881,9 +910,6 @@ void PPPMDispTIP4POMP::fieldforce_c_ad() const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; - #if defined(_OPENMP) #pragma omp parallel default(none) #endif @@ -985,6 +1011,13 @@ void PPPMDispTIP4POMP::fieldforce_c_ad() void PPPMDispTIP4POMP::fieldforce_g_ik() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -992,8 +1025,6 @@ void PPPMDispTIP4POMP::fieldforce_g_ik() // ek = 3 components of E-field on particle const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; const double qqrd2e = force->qqrd2e; #if defined(_OPENMP) @@ -1070,6 +1101,13 @@ void PPPMDispTIP4POMP::fieldforce_g_ik() void PPPMDispTIP4POMP::fieldforce_g_ad() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -1077,9 +1115,6 @@ void PPPMDispTIP4POMP::fieldforce_g_ad() // ek = 3 components of E-field on particle const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; - double *prd; if (triclinic == 0) prd = domain->prd; @@ -1188,14 +1223,19 @@ void PPPMDispTIP4POMP::fieldforce_g_ad() void PPPMDispTIP4POMP::fieldforce_g_peratom() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; #if defined(_OPENMP) #pragma omp parallel default(none) @@ -1282,6 +1322,13 @@ void PPPMDispTIP4POMP::fieldforce_g_peratom() void PPPMDispTIP4POMP::fieldforce_a_ik() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -1289,8 +1336,6 @@ void PPPMDispTIP4POMP::fieldforce_a_ik() // ek = 3 components of E-field on particle const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; const double qqrd2e = force->qqrd2e; #if defined(_OPENMP) @@ -1399,6 +1444,13 @@ void PPPMDispTIP4POMP::fieldforce_a_ik() void PPPMDispTIP4POMP::fieldforce_a_ad() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -1406,9 +1458,6 @@ void PPPMDispTIP4POMP::fieldforce_a_ad() // ek = 3 components of E-field on particle const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; - double *prd; if (triclinic == 0) prd = domain->prd; @@ -1584,14 +1633,19 @@ void PPPMDispTIP4POMP::fieldforce_a_ad() void PPPMDispTIP4POMP::fieldforce_a_peratom() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt // (mx,my,mz) = global coords of moving stencil pt const double * const * const x = atom->x; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; #if defined(_OPENMP) #pragma omp parallel default(none) diff --git a/src/USER-OMP/pppm_omp.cpp b/src/USER-OMP/pppm_omp.cpp index 6ada6b4137..d0c9d99689 100644 --- a/src/USER-OMP/pppm_omp.cpp +++ b/src/USER-OMP/pppm_omp.cpp @@ -336,6 +336,11 @@ void PPPMOMP::make_rho() FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); memset(d,0,ngrid*sizeof(FFT_SCALAR)); + // no local atoms => nothing else to do + + const int nlocal = atom->nlocal; + if (nlocal == 0) return; + const int ix = nxhi_out - nxlo_out + 1; const int iy = nyhi_out - nylo_out + 1; @@ -364,7 +369,6 @@ void PPPMOMP::make_rho() // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads - const int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; @@ -374,7 +378,7 @@ void PPPMOMP::make_rho() // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) - || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; + || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv; const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv; @@ -385,23 +389,23 @@ void PPPMOMP::make_rho() const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; ++n) { - const int jn = (nz+n-nzlo_out)*ix*iy; - const FFT_SCALAR y0 = z0*r1d[2][n]; - - for (int m = nlower; m <= nupper; ++m) { - const int jm = jn+(ny+m-nylo_out)*ix; - const FFT_SCALAR x0 = y0*r1d[1][m]; - - for (int l = nlower; l <= nupper; ++l) { - const int jl = jm+nx+l-nxlo_out; - // make sure each thread only updates - // "his" elements of the density grid - if (jl >= jto) break; - if (jl < jfrom) continue; - - d[jl] += x0*r1d[0][l]; - } - } + const int jn = (nz+n-nzlo_out)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower; m <= nupper; ++m) { + const int jm = jn+(ny+m-nylo_out)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower; l <= nupper; ++l) { + const int jl = jm+nx+l-nxlo_out; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } } } } @@ -419,6 +423,13 @@ void PPPMOMP::fieldforce_ik() // (mx,my,mz) = global coords of moving stencil pt // ek = 3 components of E-field on particle + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; const double * _noalias const q = atom->q; const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; @@ -428,9 +439,6 @@ void PPPMOMP::fieldforce_ik() const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; - #if defined(_OPENMP) #pragma omp parallel default(none) #endif @@ -488,6 +496,13 @@ void PPPMOMP::fieldforce_ik() void PPPMOMP::fieldforce_ad() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda; const double hx_inv = nx_pppm/prd[0]; const double hy_inv = ny_pppm/prd[1]; @@ -507,9 +522,6 @@ void PPPMOMP::fieldforce_ad() const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; - #if defined(_OPENMP) #pragma omp parallel default(none) #endif @@ -586,6 +598,13 @@ void PPPMOMP::fieldforce_ad() void PPPMOMP::fieldforce_peratom() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -593,8 +612,6 @@ void PPPMOMP::fieldforce_peratom() const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; const double * _noalias const q = atom->q; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; #if defined(_OPENMP) #pragma omp parallel default(none) diff --git a/src/USER-OMP/pppm_tip4p_omp.cpp b/src/USER-OMP/pppm_tip4p_omp.cpp index dcc84b6e26..9679474ae4 100644 --- a/src/USER-OMP/pppm_tip4p_omp.cpp +++ b/src/USER-OMP/pppm_tip4p_omp.cpp @@ -330,6 +330,10 @@ void PPPMTIP4POMP::compute(int eflag, int vflag) void PPPMTIP4POMP::particle_map() { + // no local atoms => nothing to do + + if (atom->nlocal == 0) return; + const int * _noalias const type = atom->type; const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; int3_t * _noalias const p2g = (int3_t *) part2grid[0]; @@ -392,6 +396,11 @@ void PPPMTIP4POMP::make_rho() FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); memset(d,0,ngrid*sizeof(FFT_SCALAR)); + // no local atoms => nothing else to do + + const int nlocal = atom->nlocal; + if (nlocal == 0) return; + const int ix = nxhi_out - nxlo_out + 1; const int iy = nyhi_out - nylo_out + 1; @@ -422,7 +431,6 @@ void PPPMTIP4POMP::make_rho() // (dx,dy,dz) = distance to "lower left" grid pt // loop over all local atoms for all threads - const int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { const int nx = p2g[i].a; @@ -432,13 +440,13 @@ void PPPMTIP4POMP::make_rho() // pre-screen whether this atom will ever come within // reach of the data segement this thread is updating. if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) - || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; + || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; - if (type[i] == typeO) { - find_M_thr(i,iH1,iH2,xM); - } else { - xM = x[i]; - } + if (type[i] == typeO) { + find_M_thr(i,iH1,iH2,xM); + } else { + xM = x[i]; + } const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv; const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv; const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv; @@ -448,23 +456,23 @@ void PPPMTIP4POMP::make_rho() const FFT_SCALAR z0 = delvolinv * q[i]; for (int n = nlower; n <= nupper; ++n) { - const int jn = (nz+n-nzlo_out)*ix*iy; - const FFT_SCALAR y0 = z0*r1d[2][n]; - - for (int m = nlower; m <= nupper; ++m) { - const int jm = jn+(ny+m-nylo_out)*ix; - const FFT_SCALAR x0 = y0*r1d[1][m]; - - for (int l = nlower; l <= nupper; ++l) { - const int jl = jm+nx+l-nxlo_out; - // make sure each thread only updates - // "his" elements of the density grid - if (jl >= jto) break; - if (jl < jfrom) continue; - - d[jl] += x0*r1d[0][l]; - } - } + const int jn = (nz+n-nzlo_out)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower; m <= nupper; ++m) { + const int jm = jn+(ny+m-nylo_out)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower; l <= nupper; ++l) { + const int jl = jm+nx+l-nxlo_out; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } } } } @@ -476,6 +484,13 @@ void PPPMTIP4POMP::make_rho() void PPPMTIP4POMP::fieldforce_ik() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (dx,dy,dz) = distance to "lower left" grid pt @@ -492,9 +507,6 @@ void PPPMTIP4POMP::fieldforce_ik() const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; - #if defined(_OPENMP) #pragma omp parallel default(none) #endif @@ -576,6 +588,13 @@ void PPPMTIP4POMP::fieldforce_ik() void PPPMTIP4POMP::fieldforce_ad() { + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + // no local atoms => nothing to do + + if (nlocal == 0) return; + const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda; const double hx_inv = nx_pppm/prd[0]; const double hy_inv = ny_pppm/prd[1]; @@ -597,9 +616,6 @@ void PPPMTIP4POMP::fieldforce_ad() const double boxloy = boxlo[1]; const double boxloz = boxlo[2]; - const int nthreads = comm->nthreads; - const int nlocal = atom->nlocal; - #if defined(_OPENMP) #pragma omp parallel default(none) #endif -- GitLab