From f509f133afd9bdeed268de1582ca0adb4f4e0692 Mon Sep 17 00:00:00 2001
From: Steve Plimpton <sjplimp@sandia.gov>
Date: Tue, 13 Dec 2016 17:14:28 -0700
Subject: [PATCH] patch 13Dec16: neighbor refactor, Stan pppm/disp bug fix, M
 Brown INTEL package updates

---
 doc/src/Manual.txt                            |   4 +-
 src/MAKE/MACHINES/Makefile.cori2              | 127 ++++++++++++++++++
 src/MAKE/OPTIONS/Makefile.intel_cpu           | 123 +++++++++++++++++
 .../OPTIONS/Makefile.intel_knl_coprocessor    | 123 +++++++++++++++++
 src/USER-INTEL/fix_intel.cpp                  |  32 +++--
 src/USER-INTEL/intel_intrinsics.h             |   2 +-
 src/USER-INTEL/intel_preprocess.h             |  15 ++-
 src/USER-INTEL/verlet_lrt_intel.cpp           |  15 ++-
 src/USER-INTEL/verlet_lrt_intel.h             |   2 +-
 src/force.cpp                                 |  30 ++++-
 src/memory.cpp                                |  15 ++-
 src/modify.cpp                                |  15 ++-
 src/neighbor.cpp                              |  11 +-
 src/version.h                                 |   2 +-
 14 files changed, 484 insertions(+), 32 deletions(-)
 create mode 100755 src/MAKE/MACHINES/Makefile.cori2
 create mode 100755 src/MAKE/OPTIONS/Makefile.intel_cpu
 create mode 100644 src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor

diff --git a/doc/src/Manual.txt b/doc/src/Manual.txt
index 40288268a1..ab6aa91b7c 100644
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@@ -1,7 +1,7 @@
 <!-- HTML_ONLY -->
 <HEAD>
 <TITLE>LAMMPS Users Manual</TITLE>
-<META NAME="docnumber" CONTENT="30 Nov 2016 version">
+<META NAME="docnumber" CONTENT="13 Dec 2016 version">
 <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
 <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation.  This software and manual is distributed under the GNU General Public License.">
 </HEAD>
@@ -21,7 +21,7 @@
 <H1></H1>
 
 LAMMPS Documentation :c,h3
-30 Nov 2016 version :c,h4
+13 Dec 2016 version :c,h4
 
 Version info: :h4
 
diff --git a/src/MAKE/MACHINES/Makefile.cori2 b/src/MAKE/MACHINES/Makefile.cori2
new file mode 100755
index 0000000000..a367d54080
--- /dev/null
+++ b/src/MAKE/MACHINES/Makefile.cori2
@@ -0,0 +1,127 @@
+# cori2 = NERSC Cori II KNL, static build, FFTW (single precision)
+
+# ---------------------------------------------------------------------
+#   module swap craype-haswell craype-mic-knl
+#   module load fftw
+#   module load craype-hugepages2M
+#   Recommend using #SBATCH -S 2 for core specialization
+# ---------------------------------------------------------------------
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		CC
+OPTFLAGS =      -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
+CCFLAGS =	-g -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \
+                -fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_NO_TBB
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		CC
+LINKFLAGS =	-g -qopenmp $(OPTFLAGS)
+LIB =           
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings, all OPTIONAL
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings
+# see possible settings in Section 2.2 (step 4) of manual
+
+LMP_INC =	#-DLAMMPS_GZIP -DLAMMPS_JPEG
+
+# MPI library
+# see discussion in Section 2.2 (step 5) of manual
+# MPI wrapper compiler/linker can provide this info
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
+MPI_PATH = 
+MPI_LIB =
+
+# FFT library
+# see discussion in Section 2.2 (step 6) of manaul
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =       -DFFT_FFTW3 -DFFT_SINGLE
+FFT_PATH = 
+FFT_LIB =       -lfftw3f
+
+# JPEG and/or PNG library
+# see discussion in Section 2.2 (step 7) of manual
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	#-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# do not edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cpp
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+%.o:%.cu
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+# Individual dependencies
+
+depend : fastdep.exe $(SRC)
+	@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
+
+fastdep.exe: ../DEPEND/fastdep.c
+	cc -O -o $@ $<
+
+sinclude .depend
diff --git a/src/MAKE/OPTIONS/Makefile.intel_cpu b/src/MAKE/OPTIONS/Makefile.intel_cpu
new file mode 100755
index 0000000000..b34ff47761
--- /dev/null
+++ b/src/MAKE/OPTIONS/Makefile.intel_cpu
@@ -0,0 +1,123 @@
+# intel_cpu_intelmpi = USER-INTEL package, Intel MPI, MKL FFT
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		mpiicpc 
+OPTFLAGS =      -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
+CCFLAGS =	-g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
+                -fno-alias -ansi-alias -restrict $(OPTFLAGS)
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		mpiicpc
+LINKFLAGS =	-g -qopenmp $(OPTFLAGS)
+LIB =           -ltbbmalloc -ltbbmalloc_proxy
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings, all OPTIONAL
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings
+# see possible settings in Section 2.2 (step 4) of manual
+
+LMP_INC =	-DLAMMPS_GZIP -DLAMMPS_JPEG
+
+# MPI library
+# see discussion in Section 2.2 (step 5) of manual
+# MPI wrapper compiler/linker can provide this info
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
+MPI_PATH = 
+MPI_LIB =
+
+# FFT library
+# see discussion in Section 2.2 (step 6) of manaul
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =       -DFFT_MKL -DFFT_SINGLE
+FFT_PATH = 
+FFT_LIB =       -L$MKLROOT/lib/intel64/ -lmkl_intel_ilp64 \
+                -lmkl_sequential -lmkl_core	
+
+# JPEG and/or PNG library
+# see discussion in Section 2.2 (step 7) of manual
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# do not edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
+EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ) $(EXTRA_LINK_DEPENDS)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ) $(EXTRA_LINK_DEPENDS)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ) $(EXTRA_LINK_DEPENDS)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cpp $(EXTRA_CPP_DEPENDS)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp $(EXTRA_CPP_DEPENDS)
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+%.o:%.cu $(EXTRA_CPP_DEPENDS)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+# Individual dependencies
+
+depend : fastdep.exe $(SRC)
+	@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
+
+fastdep.exe: ../DEPEND/fastdep.c
+	cc -O -o $@ $<
+
+sinclude .depend
diff --git a/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor b/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor
new file mode 100644
index 0000000000..b7f3cd6846
--- /dev/null
+++ b/src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor
@@ -0,0 +1,123 @@
+# intel_phi = USER-INTEL with Phi x200 (KNL) offload support,Intel MPI,MKL FFT
+
+SHELL = /bin/sh
+
+# ---------------------------------------------------------------------
+# compiler/linker settings
+# specify flags and libraries needed for your compiler
+
+CC =		mpiicpc 
+MIC_OPT =       -qoffload-arch=mic-avx512 -fp-model fast=2
+CCFLAGS =	-g -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \
+                -xHost -fno-alias -ansi-alias -restrict \
+                -qoverride-limits $(MIC_OPT)
+SHFLAGS =	-fPIC
+DEPFLAGS =	-M
+
+LINK =		mpiicpc
+LINKFLAGS =	-g -O3 -xHost -qopenmp -qoffload $(MIC_OPT)
+LIB =           -ltbbmalloc
+SIZE =		size
+
+ARCHIVE =	ar
+ARFLAGS =	-rc
+SHLIBFLAGS =	-shared
+
+# ---------------------------------------------------------------------
+# LAMMPS-specific settings, all OPTIONAL
+# specify settings for LAMMPS features you will use
+# if you change any -D setting, do full re-compile after "make clean"
+
+# LAMMPS ifdef settings
+# see possible settings in Section 2.2 (step 4) of manual
+
+LMP_INC =	-DLAMMPS_GZIP -DLAMMPS_JPEG
+
+# MPI library
+# see discussion in Section 2.2 (step 5) of manual
+# MPI wrapper compiler/linker can provide this info
+# can point to dummy MPI library in src/STUBS as in Makefile.serial
+# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
+# INC = path for mpi.h, MPI compiler settings
+# PATH = path for MPI library
+# LIB = name of MPI library
+
+MPI_INC =       -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
+MPI_PATH = 
+MPI_LIB =
+
+# FFT library
+# see discussion in Section 2.2 (step 6) of manaul
+# can be left blank to use provided KISS FFT library
+# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
+# PATH = path for FFT library
+# LIB = name of FFT library
+
+FFT_INC =      -DFFT_MKL -DFFT_SINGLE
+FFT_PATH = 
+FFT_LIB =	-L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
+
+# JPEG and/or PNG library
+# see discussion in Section 2.2 (step 7) of manual
+# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
+# INC = path(s) for jpeglib.h and/or png.h
+# PATH = path(s) for JPEG library and/or PNG library
+# LIB = name(s) of JPEG library and/or PNG library
+
+JPG_INC =       
+JPG_PATH = 	
+JPG_LIB =	-ljpeg
+
+# ---------------------------------------------------------------------
+# build rules and dependencies
+# do not edit this section
+
+include	Makefile.package.settings
+include	Makefile.package
+
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
+EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
+EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
+EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
+EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
+
+# Path to src files
+
+vpath %.cpp ..
+vpath %.h ..
+
+# Link target
+
+$(EXE):	$(OBJ) $(EXTRA_LINK_DEPENDS)
+	$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
+	$(SIZE) $(EXE)
+
+# Library targets
+
+lib:	$(OBJ) $(EXTRA_LINK_DEPENDS)
+	$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
+
+shlib:	$(OBJ) $(EXTRA_LINK_DEPENDS)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
+        $(OBJ) $(EXTRA_LIB) $(LIB)
+
+# Compilation rules
+
+%.o:%.cpp $(EXTRA_CPP_DEPENDS)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+%.d:%.cpp $(EXTRA_CPP_DEPENDS)
+	$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
+
+%.o:%.cu $(EXTRA_CPP_DEPENDS)
+	$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
+
+# Individual dependencies
+
+depend : fastdep.exe $(SRC)
+	@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
+
+fastdep.exe: ../DEPEND/fastdep.c
+	cc -O -o $@ $<
+
+sinclude .depend
diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp
index 06bd23abcc..84f4994901 100644
--- a/src/USER-INTEL/fix_intel.cpp
+++ b/src/USER-INTEL/fix_intel.cpp
@@ -63,7 +63,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)
   _nbor_pack_width = 1;
 
   _precision_mode = PREC_MODE_MIXED;
-  _offload_balance = 1.0;
+  _offload_balance = -1.0;
   _overflow_flag[LMP_OVERFLOW] = 0;
   _off_overflow_flag[LMP_OVERFLOW] = 0;
 
@@ -189,10 +189,18 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)
       offload_cores = omp_get_num_procs();
       omp_set_num_threads(offload_cores);
       max_offload_threads = omp_get_max_threads();
+      #ifdef __AVX512F__
+      if ( (offload_cores / 4) % 2 == 1) {
+        offload_cores += 4;
+        max_offload_threads += 4;
+      }
+      #endif
     }
     _max_offload_threads = max_offload_threads;
     _offload_cores = offload_cores;
     if (_offload_threads == 0) _offload_threads = offload_cores;
+    if (_offload_cores > 244 && _offload_tpc > 2)
+      _offload_tpc = 2;
   }
   #endif
 
@@ -317,6 +325,8 @@ void FixIntel::init()
     error->all(FLERR,
 	       "Currently, cannot use more than one intel style with hybrid.");
 
+  neighbor->fix_intel = (void *)this;
+
   check_neighbor_intel();
   if (_precision_mode == PREC_MODE_SINGLE)
     _single_buffers->zero_ev();
@@ -1004,8 +1014,10 @@ void FixIntel::set_offload_affinity()
   int offload_threads = _offload_threads;
   int offload_tpc = _offload_tpc;
   int offload_affinity_balanced = _offload_affinity_balanced;
+  int offload_cores = _offload_cores;
   #pragma offload target(mic:_cop) mandatory \
-    in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced)
+    in(node_rank,offload_threads,offload_tpc,offload_affinity_balanced, \
+       offload_cores)
   {
     omp_set_num_threads(offload_threads);
     #pragma omp parallel
@@ -1013,20 +1025,24 @@ void FixIntel::set_offload_affinity()
       int tnum = omp_get_thread_num();
       kmp_affinity_mask_t mask;
       kmp_create_affinity_mask(&mask);
-      int proc;
-      if (offload_affinity_balanced) {
-	proc = offload_threads * node_rank + tnum;
+      int proc = offload_threads * node_rank + tnum;
+      #ifdef __AVX512F__
+      proc = (proc / offload_tpc) + (proc % offload_tpc) * 
+	     ((offload_cores) / 4);
+      proc += 68;
+      #else
+      if (offload_affinity_balanced)
 	proc = proc * 4 - (proc / 60) * 240 + proc / 60 + 1;
-      } else {
-	proc = offload_threads * node_rank + tnum;
+      else
 	proc += (proc / 4) * (4 - offload_tpc) + 1;
-      }
+      #endif
       kmp_set_affinity_mask_proc(proc, &mask);
       if (kmp_set_affinity(&mask) != 0)
 	printf("Could not set affinity on rank %d thread %d to %d\n",
 	       node_rank, tnum, proc);
     }
   }
+
   if (_precision_mode == PREC_MODE_SINGLE)
     _single_buffers->set_off_params(offload_threads, _cop, _separate_buffers);
   else if (_precision_mode == PREC_MODE_MIXED)
diff --git a/src/USER-INTEL/intel_intrinsics.h b/src/USER-INTEL/intel_intrinsics.h
index 754b390722..44a9605961 100644
--- a/src/USER-INTEL/intel_intrinsics.h
+++ b/src/USER-INTEL/intel_intrinsics.h
@@ -28,7 +28,7 @@
 // implementations.
 
 // Vector classes provided with the intel compiler
-#ifdef __MIC__
+#if defined(__MIC__) && !defined(__AVX512F__)
 #include <mic/micvec.h>
 #else
 #include <dvec.h> // icc-mmic hates generating movq
diff --git a/src/USER-INTEL/intel_preprocess.h b/src/USER-INTEL/intel_preprocess.h
index 48378d3840..ad07dfd7c2 100644
--- a/src/USER-INTEL/intel_preprocess.h
+++ b/src/USER-INTEL/intel_preprocess.h
@@ -22,6 +22,11 @@
 #ifdef __INTEL_OFFLOAD
 #ifdef LMP_INTEL_OFFLOAD
 #define _LMP_INTEL_OFFLOAD
+#ifdef __TARGET_ARCH_MIC
+#ifndef __MIC__
+#define __MIC__ 1
+#endif
+#endif
 #endif
 #endif
 
@@ -62,6 +67,7 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
 #define INTEL_MAX_STENCIL_CHECK 4096
 #define INTEL_P3M_MAXORDER 5
 
+#ifdef __INTEL_COMPILER
 #ifdef __AVX__
 #undef INTEL_VECTOR_WIDTH
 #define INTEL_VECTOR_WIDTH 8
@@ -90,6 +96,13 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
 #endif
 #endif
 
+#else
+
+#undef INTEL_VECTOR_WIDTH
+#define INTEL_VECTOR_WIDTH 1
+
+#endif
+
 #define INTEL_DATA_ALIGN 64
 #define INTEL_ONEATOM_FACTOR 2
 #define INTEL_MIC_NBOR_PAD INTEL_MIC_VECTOR_WIDTH
@@ -97,7 +110,7 @@ enum {TIME_PACK, TIME_HOST_NEIGHBOR, TIME_HOST_PAIR, TIME_OFFLOAD_NEIGHBOR,
 #define INTEL_LB_MEAN_WEIGHT 0.1
 #define INTEL_BIGP 1e15
 #define INTEL_MAX_HOST_CORE_COUNT 512
-#define INTEL_MAX_COI_CORES 2
+#define INTEL_MAX_COI_CORES 36
 
 #define IP_PRE_get_stride(stride, n, datasize, torque)	\
   {								\
diff --git a/src/USER-INTEL/verlet_lrt_intel.cpp b/src/USER-INTEL/verlet_lrt_intel.cpp
index fcd9dff08a..afb7852f98 100644
--- a/src/USER-INTEL/verlet_lrt_intel.cpp
+++ b/src/USER-INTEL/verlet_lrt_intel.cpp
@@ -43,7 +43,20 @@ using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 
 VerletLRTIntel::VerletLRTIntel(LAMMPS *lmp, int narg, char **arg) :
-  Verlet(lmp, narg, arg) {}
+  Verlet(lmp, narg, arg) {
+  #if defined(_LMP_INTEL_LRT_PTHREAD)
+  pthread_mutex_init(&_kmutex,NULL);
+  #endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+VerletLRTIntel::~VerletLRTIntel() 
+{
+  #if defined(_LMP_INTEL_LRT_PTHREAD)
+  pthread_mutex_destroy(&_kmutex);
+  #endif
+}
 
 /* ----------------------------------------------------------------------
    initialization before run
diff --git a/src/USER-INTEL/verlet_lrt_intel.h b/src/USER-INTEL/verlet_lrt_intel.h
index 112b299cd1..a699c20796 100644
--- a/src/USER-INTEL/verlet_lrt_intel.h
+++ b/src/USER-INTEL/verlet_lrt_intel.h
@@ -40,7 +40,7 @@ namespace LAMMPS_NS {
 class VerletLRTIntel : public Verlet {
  public:
   VerletLRTIntel(class LAMMPS *, int, char **);
-  virtual ~VerletLRTIntel() {}
+  virtual ~VerletLRTIntel();
   virtual void init();
   virtual void setup();
   virtual void run(int);
diff --git a/src/force.cpp b/src/force.cpp
index 3154139d54..95a6ff4f6d 100644
--- a/src/force.cpp
+++ b/src/force.cpp
@@ -237,7 +237,10 @@ Pair *Force::new_pair(const char *style, int trysuffix, int &sflag)
     return pair_creator(lmp);
   }
 
-  error->all(FLERR,"Unknown pair style");
+  char str[128];
+  sprintf(str,"Unknown pair style %s",style);
+  error->all(FLERR,str);
+
   return NULL;
 }
 
@@ -362,7 +365,10 @@ Bond *Force::new_bond(const char *style, int trysuffix, int &sflag)
     return bond_creator(lmp);
   }
 
-  error->all(FLERR,"Unknown bond style");
+  char str[128];
+  sprintf(str,"Unknown bond style %s",style);
+  error->all(FLERR,str);
+
   return NULL;
 }
 
@@ -440,7 +446,10 @@ Angle *Force::new_angle(const char *style, int trysuffix, int &sflag)
     return angle_creator(lmp);
   }
 
-  error->all(FLERR,"Unknown angle style");
+  char str[128];
+  sprintf(str,"Unknown angle style %s",style);
+  error->all(FLERR,str);
+
   return NULL;
 }
 
@@ -519,7 +528,10 @@ Dihedral *Force::new_dihedral(const char *style, int trysuffix, int &sflag)
     return dihedral_creator(lmp);
   }
 
-  error->all(FLERR,"Unknown dihedral style");
+  char str[128];
+  sprintf(str,"Unknown dihedral style %s",style);
+  error->all(FLERR,str);
+
   return NULL;
 }
 
@@ -597,7 +609,10 @@ Improper *Force::new_improper(const char *style, int trysuffix, int &sflag)
     return improper_creator(lmp);
   }
 
-  error->all(FLERR,"Unknown improper style");
+  char str[128];
+  sprintf(str,"Unknown improper style %s",style);
+  error->all(FLERR,str);
+
   return NULL;
 }
 
@@ -679,7 +694,10 @@ KSpace *Force::new_kspace(int narg, char **arg, int trysuffix, int &sflag)
     return kspace_creator(lmp, narg-1, &arg[1]);
   }
 
-  error->all(FLERR,"Unknown kspace style");
+  char str[128];
+  sprintf(str,"Unknown kspace style %s",arg[0]);
+  error->all(FLERR,str);
+
   return NULL;
 }
 
diff --git a/src/memory.cpp b/src/memory.cpp
index 1ab5074079..ed0993202b 100644
--- a/src/memory.cpp
+++ b/src/memory.cpp
@@ -18,12 +18,12 @@
 #include "error.h"
 
 #if defined(LMP_USER_INTEL) && defined(__INTEL_COMPILER)
+#ifndef LMP_INTEL_NO_TBB
 #define LMP_USE_TBB_ALLOCATOR
 #include "tbb/scalable_allocator.h"
+#else
+#include <malloc.h>
 #endif
-
-#if defined(LMP_USER_INTEL) && !defined(LAMMPS_MEMALIGN)
-#define LAMMPS_MEMALIGN 64
 #endif
 
 using namespace LAMMPS_NS;
@@ -75,6 +75,15 @@ void *Memory::srealloc(void *ptr, bigint nbytes, const char *name)
 
 #if defined(LMP_USE_TBB_ALLOCATOR)
   ptr = scalable_aligned_realloc(ptr, nbytes, LAMMPS_MEMALIGN);
+#elif defined(LMP_INTEL_NO_TBB) && defined(LAMMPS_MEMALIGN)
+  ptr = realloc(ptr, nbytes);
+  uintptr_t offset = ((uintptr_t)(const void *)(ptr)) % LAMMPS_MEMALIGN;
+  if (offset) {
+    void *optr = ptr;
+    ptr = smalloc(nbytes, name);
+    memcpy(ptr, optr, MIN(nbytes,malloc_usable_size(optr)));
+    free(optr);
+  }
 #else
   ptr = realloc(ptr,nbytes);
 #endif
diff --git a/src/modify.cpp b/src/modify.cpp
index 6370998a23..2e5bd78504 100644
--- a/src/modify.cpp
+++ b/src/modify.cpp
@@ -767,7 +767,8 @@ void Modify::add_fix(int narg, char **arg, int trysuffix)
         if (strcmp(estyle,fix[ifix]->style) == 0) match = 1;
       }
     }
-    if (!match) error->all(FLERR,"Replacing a fix, but new style != old style");
+    if (!match) error->all(FLERR,
+                           "Replacing a fix, but new style != old style");
 
     if (fix[ifix]->igroup != igroup && comm->me == 0)
       error->warning(FLERR,"Replacing a fix, but new group != old group");
@@ -812,7 +813,11 @@ void Modify::add_fix(int narg, char **arg, int trysuffix)
     fix[ifix] = fix_creator(lmp,narg,arg);
   }
 
-  if (fix[ifix] == NULL) error->all(FLERR,"Unknown fix style");
+  if (fix[ifix] == NULL) {
+    char str[128];
+    sprintf(str,"Unknown fix style %s",arg[2]);
+    error->all(FLERR,str);
+  }
 
   // check if Fix is in restart_global list
   // if yes, pass state info to the Fix so it can reset itself
@@ -994,7 +999,11 @@ void Modify::add_compute(int narg, char **arg, int trysuffix)
     compute[ncompute] = compute_creator(lmp,narg,arg);
   }
 
-  if (compute[ncompute] == NULL) error->all(FLERR,"Unknown compute style");
+  if (compute[ncompute] == NULL) {
+    char str[128];
+    sprintf(str,"Unknown compute style %s",arg[2]);
+    error->all(FLERR,str);
+  }
 
   ncompute++;
 }
diff --git a/src/neighbor.cpp b/src/neighbor.cpp
index d014f1c8d0..f968be1dd0 100644
--- a/src/neighbor.cpp
+++ b/src/neighbor.cpp
@@ -890,7 +890,8 @@ void Neighbor::init_pair()
   for (i = 0; i < nstencil; i++) {
     flag = 0;
     for (j = 0; j < npair_perpetual; j++)
-      if (lists[plist[j]]->stencil_method == neigh_stencil[i]->istyle) flag = 1;
+      if (lists[plist[j]]->stencil_method == neigh_stencil[i]->istyle) 
+        flag = 1;
     if (flag) slist[nstencil_perpetual++] = i;
   }
 
@@ -1139,8 +1140,8 @@ void Neighbor::print_pairwise_info()
         } else {
           sprintf(str,"  (%d) command %s",i+1,requests[i]->command_style);
         }
-        fprintf(out,"%s\n",str);
-        
+        fprintf(out,"%s",str);
+
         if (requests[i]->half) kind = "half";
         else if (requests[i]->full) kind = "full";
         else if (requests[i]->gran) kind = "size";
@@ -1328,7 +1329,6 @@ int Neighbor::choose_stencil(NeighRequest *rq)
     if (style == BIN && !(mask & NS_BIN)) continue;
     if (style == MULTI && !(mask & NS_MULTI)) continue;
 
-
     if (dimension == 2 && !(mask & NS_2D)) continue;
     if (dimension == 3 && !(mask & NS_3D)) continue;
 
@@ -1389,7 +1389,8 @@ int Neighbor::choose_pair(NeighRequest *rq)
   //   triclinic = orthgonal/triclinic box
 
   int copyflag,skipflag,halfflag,fullflag,halffullflag,sizeflag,respaflag,
-    ghostflag,off2onflag,onesideflag,ssaflag,ompflag,intelflag,kokkos_device_flag,kokkos_host_flag;
+    ghostflag,off2onflag,onesideflag,ssaflag,ompflag,intelflag,
+    kokkos_device_flag,kokkos_host_flag;
 
   copyflag = skipflag = halfflag = fullflag = halffullflag = sizeflag = 
     ghostflag = respaflag = off2onflag = onesideflag = ssaflag = 
diff --git a/src/version.h b/src/version.h
index 1562ca3d95..c7c630c699 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define LAMMPS_VERSION "30 Nov 2016"
+#define LAMMPS_VERSION "13 Dec 2016"
-- 
GitLab