From 30431d4edb77a52f7c2c46d8dfde95888242c0f8 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer <akohlmey@gmail.com> Date: Tue, 8 Aug 2017 16:57:27 -0400 Subject: [PATCH] rework Install.py for gpu library. make it consistent with other lib folders and support python3 --- lib/gpu/.gitignore | 10 +++--- lib/gpu/Install.py | 58 ++++++++++++++++-------------- lib/gpu/Makefile.linux | 2 +- lib/gpu/Makefile.mingw32-cross | 17 --------- lib/gpu/Makefile.mingw32-cross-mpi | 19 ---------- lib/gpu/Makefile.mingw64-cross | 18 ---------- lib/gpu/Makefile.mingw64-cross-mpi | 20 ----------- lib/gpu/Makefile.mpi | 1 + lib/gpu/Makefile.serial | 31 ++++++++++++---- 9 files changed, 63 insertions(+), 113 deletions(-) delete mode 100644 lib/gpu/Makefile.mingw32-cross delete mode 100644 lib/gpu/Makefile.mingw32-cross-mpi delete mode 100644 lib/gpu/Makefile.mingw64-cross delete mode 100644 lib/gpu/Makefile.mingw64-cross-mpi create mode 120000 lib/gpu/Makefile.mpi diff --git a/lib/gpu/.gitignore b/lib/gpu/.gitignore index 228a9f7731..9ad6046a09 100644 --- a/lib/gpu/.gitignore +++ b/lib/gpu/.gitignore @@ -1,4 +1,6 @@ -obj -obj_ocl -ocl_get_devices -nvc_get_devices +/obj +/obj_ocl +/ocl_get_devices +/nvc_get_devices +/*.cubin +/*_cubin.h diff --git a/lib/gpu/Install.py b/lib/gpu/Install.py index c6cd1f3021..657f1c8fcc 100644 --- a/lib/gpu/Install.py +++ b/lib/gpu/Install.py @@ -3,53 +3,57 @@ # Install.py tool to build the GPU library # used to automate the steps described in the README file in this dir -import sys,os,re,commands +from __future__ import print_function +import sys,os,subprocess # help message help = """ -Syntax from src dir: make lib-gpu args="-i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix" -Syntax from lib dir: python Install.py -i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix +Syntax from src dir: make lib-gpu args="-m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix" +Syntax from lib dir: python Install.py -m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix specify one or more options, order does not matter -copies an existing Makefile.isuffix in lib/gpu to Makefile.auto +copies an existing Makefile.machine in lib/gpu to Makefile.auto optionally edits these variables in Makefile.auto: CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE optionally uses Makefile.auto to build the GPU library -> libgpu.a and to copy a Makefile.lammps.esuffix -> Makefile.lammps optionally copies Makefile.auto to a new Makefile.osuffix - -i = use Makefile.isuffix as starting point, copy to Makefile.auto - default isuffix = linux + -m = use Makefile.machine as starting point, copy to Makefile.auto + default machine = linux -h = set CUDA_HOME variable in Makefile.auto to hdir hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda -a = set CUDA_ARCH variable in Makefile.auto to arch - use arch = ?? for K40 (Tesla) - use arch = 37 for dual K80 (Tesla) - use arch = 60 for P100 (Pascal) + use arch = 20 for Tesla C2050/C2070 (Fermi) (deprecated as of CUDA 8.0) + or GeForce GTX 580 or similar + use arch = 30 for Tesla K10 (Kepler) + use arch = 35 for Tesla K40 (Kepler) or GeForce GTX Titan or similar + use arch = 37 for Tesla dual K80 (Kepler) + use arch = 60 for Tesla P100 (Pascal) -p = set CUDA_PRECISION variable in Makefile.auto to precision use precision = double or mixed or single -e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix - -m = make the GPU library using Makefile.auto + -b = make the GPU library using Makefile.auto first performs a "make clean" - produces libgpu.a if successful + then produces libgpu.a if successful also copies EXTRAMAKE file -> Makefile.lammps -e can set which Makefile.lammps.esuffix file is copied -o = copy final Makefile.auto to Makefile.osuffix Examples: -make lib-gpu args="-m" # build GPU lib with default Makefile.linux -make lib-gpu args="-i xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision -make lib-gpu args="-i xk7 -p single -o xk7.single -m" # ditto, also build GPU lib +make lib-gpu args="-b" # build GPU lib with default Makefile.linux +make lib-gpu args="-m xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision +make lib-gpu args="-m mpi -a 35 -p single -o mpi.mixed -b" # create new Makefile.mpi.mixed, also build GPU lib with these settings """ # print error message or help def error(str=None): - if not str: print help - else: print "ERROR",str + if not str: print(help) + else: print("ERROR",str) sys.exit() # parse args @@ -65,7 +69,7 @@ outflag = 0 iarg = 0 while iarg < nargs: - if args[iarg] == "-i": + if args[iarg] == "-m": if iarg+2 > nargs: error() isuffix = args[iarg+1] iarg += 2 @@ -89,7 +93,7 @@ while iarg < nargs: eflag = 1 lmpsuffix = args[iarg+1] iarg += 2 - elif args[iarg] == "-m": + elif args[iarg] == "-b": makeflag = 1 iarg += 1 elif args[iarg] == "-o": @@ -117,9 +121,9 @@ fp = open("Makefile.auto",'w') for line in lines: words = line.split() if len(words) != 3: - print >>fp,line, + fp.write(line) continue - + if hflag and words[0] == "CUDA_HOME" and words[1] == '=': line = line.replace(words[2],hdir) if aflag and words[0] == "CUDA_ARCH" and words[1] == '=': @@ -128,20 +132,20 @@ for line in lines: line = line.replace(words[2],precstr) if eflag and words[0] == "EXTRAMAKE" and words[1] == '=': line = line.replace(words[2],"Makefile.lammps.%s" % lmpsuffix) - - print >>fp,line, + fp.write(line) fp.close() # perform make # make operations copies EXTRAMAKE file to Makefile.lammps if makeflag: - print "Building libgpu.a ..." + print("Building libgpu.a ...") cmd = "rm -f libgpu.a" - commands.getoutput(cmd) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) cmd = "make -f Makefile.auto clean; make -f Makefile.auto" - commands.getoutput(cmd) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + print(txt.decode('UTF-8')) if not os.path.exists("libgpu.a"): error("Build of lib/gpu/libgpu.a was NOT successful") if not os.path.exists("Makefile.lammps"): @@ -150,6 +154,6 @@ if makeflag: # copy new Makefile.auto to Makefile.osuffix if outflag: - print "Creating new Makefile.%s" % osuffix + print("Creating new Makefile.%s" % osuffix) cmd = "cp Makefile.auto Makefile.%s" % osuffix - commands.getoutput(cmd) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) diff --git a/lib/gpu/Makefile.linux b/lib/gpu/Makefile.linux index d72c0ba437..dfcc5bf7d3 100644 --- a/lib/gpu/Makefile.linux +++ b/lib/gpu/Makefile.linux @@ -37,7 +37,7 @@ CUDA_INCLUDE = -I$(CUDA_HOME)/include CUDA_LIB = -L$(CUDA_HOME)/lib64 CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC) -CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC +CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias BIN_DIR = ./ diff --git a/lib/gpu/Makefile.mingw32-cross b/lib/gpu/Makefile.mingw32-cross deleted file mode 100644 index 6f77634755..0000000000 --- a/lib/gpu/Makefile.mingw32-cross +++ /dev/null @@ -1,17 +0,0 @@ -CUDA_HOME = ../../tools/mingw-cross/OpenCL - -OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \ - -mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \ - -I$(CUDA_HOME)/include -OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic -L../../src/STUBS -lmpi_mingw32 -OCL_PREC = -D_SINGLE_DOUBLE -OCL_TUNE = -DFERMI_OCL -EXTRAMAKE = Makefile.lammps.mingw-cross - -BIN_DIR = Obj_mingw32 -OBJ_DIR = Obj_mingw32 -LIB_DIR = Obj_mingw32 -AR = i686-w64-mingw32-ar -BSH = /bin/sh - -include Opencl.makefile diff --git a/lib/gpu/Makefile.mingw32-cross-mpi b/lib/gpu/Makefile.mingw32-cross-mpi deleted file mode 100644 index 94099cd90b..0000000000 --- a/lib/gpu/Makefile.mingw32-cross-mpi +++ /dev/null @@ -1,19 +0,0 @@ -CUDA_HOME = ../../tools/mingw-cross/OpenCL - -OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \ - -mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \ - -I../../tools/mingw-cross/mpich2-win32/include/ \ - -DMPICH_IGNORE_CXX_SEEK -OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic \ - -L../../tools/mingw-cross/mpich2-win32/lib -lmpi -OCL_PREC = -D_SINGLE_DOUBLE -OCL_TUNE = -DFERMI_OCL -EXTRAMAKE = Makefile.lammps.mingw-cross - -BIN_DIR = Obj_mingw32-mpi -OBJ_DIR = Obj_mingw32-mpi -LIB_DIR = Obj_mingw32-mpi -AR = i686-w64-mingw32-ar -BSH = /bin/sh - -include Opencl.makefile diff --git a/lib/gpu/Makefile.mingw64-cross b/lib/gpu/Makefile.mingw64-cross deleted file mode 100644 index 54f6af8c65..0000000000 --- a/lib/gpu/Makefile.mingw64-cross +++ /dev/null @@ -1,18 +0,0 @@ -CUDA_HOME = ../../tools/mingw-cross/OpenCL - -OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \ - -msse2 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \ - -I$(CUDA_HOME)/include -OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \ - -L../../src/STUBS -lmpi_mingw64 -OCL_PREC = -D_SINGLE_DOUBLE -OCL_TUNE = -DFERMI_OCL -EXTRAMAKE = Makefile.lammps.mingw-cross - -BIN_DIR = Obj_mingw64 -OBJ_DIR = Obj_mingw64 -LIB_DIR = Obj_mingw64 -AR = x86_64-w64-mingw32-ar -BSH = /bin/sh - -include Opencl.makefile diff --git a/lib/gpu/Makefile.mingw64-cross-mpi b/lib/gpu/Makefile.mingw64-cross-mpi deleted file mode 100644 index 2ff72d98b1..0000000000 --- a/lib/gpu/Makefile.mingw64-cross-mpi +++ /dev/null @@ -1,20 +0,0 @@ -CUDA_HOME = ../../tools/mingw-cross/OpenCL - -OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \ - -msse2 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \ - -I../../tools/mingw-cross/mpich2-win64/include/ \ - -DMPICH_IGNORE_CXX_SEEK - -OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \ - -L../../tools/mingw-cross/mpich2-win64/lib -lmpi -OCL_PREC = -D_SINGLE_DOUBLE -OCL_TUNE = -DFERMI_OCL -EXTRAMAKE = Makefile.lammps.mingw-cross - -BIN_DIR = Obj_mingw64-mpi -OBJ_DIR = Obj_mingw64-mpi -LIB_DIR = Obj_mingw64-mpi -AR = x86_64-w64-mingw32-ar -BSH = /bin/sh - -include Opencl.makefile diff --git a/lib/gpu/Makefile.mpi b/lib/gpu/Makefile.mpi new file mode 120000 index 0000000000..8bad27d081 --- /dev/null +++ b/lib/gpu/Makefile.mpi @@ -0,0 +1 @@ +Makefile.linux \ No newline at end of file diff --git a/lib/gpu/Makefile.serial b/lib/gpu/Makefile.serial index 809e99cc94..9348dc565a 100644 --- a/lib/gpu/Makefile.serial +++ b/lib/gpu/Makefile.serial @@ -1,5 +1,5 @@ # /* ---------------------------------------------------------------------- -# Generic Makefile for CUDA using MPI STUBS library +# Generic Linux Makefile for CUDA # - Change CUDA_ARCH for your GPU # ------------------------------------------------------------------------- */ @@ -7,23 +7,38 @@ EXTRAMAKE = Makefile.lammps.standard -CUDA_HOME = $(HOME)/cuda +ifeq ($(CUDA_HOME),) +CUDA_HOME = /usr/local/cuda +endif + NVCC = nvcc # Tesla CUDA -CUDA_ARCH = -arch=sm_20 +CUDA_ARCH = -arch=sm_21 # newer CUDA #CUDA_ARCH = -arch=sm_13 # older CUDA #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE +CUDA_ARCH = -arch=sm_35 + +# this setting should match LAMMPS Makefile +# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL + +LMP_INC = -DLAMMPS_SMALLBIG + +# precision for GPU calculations +# -D_SINGLE_SINGLE # Single precision for all calculations +# -D_DOUBLE_DOUBLE # Double precision for all calculations +# -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double CUDA_PRECISION = -D_SINGLE_DOUBLE + CUDA_INCLUDE = -I$(CUDA_HOME)/include -CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi -CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math +CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi_stubs +CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC) -CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS -CUDR_OPTS = -O2 +CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -fPIC -I../../src/STUBS +CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias BIN_DIR = ./ OBJ_DIR = ./ @@ -31,5 +46,7 @@ LIB_DIR = ./ AR = ar BSH = /bin/sh +CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini + include Nvidia.makefile -- GitLab