diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index 4a96e244188bb6c7d68987d34696fff392e2c997..c6fe991b9761d5ef20af649f54224b03f2dd7fe8 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,28 @@ # Change Log +## [2.03.00](https://github.com/kokkos/kokkos/tree/2.03.00) (2017-04-25) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.15...2.03.00) + +**Implemented enhancements:** + +- UnorderedMap: make it accept Devices or MemorySpaces [\#711](https://github.com/kokkos/kokkos/issues/711) +- sort to accept DynamicView and \[begin,end\) indices [\#691](https://github.com/kokkos/kokkos/issues/691) +- ENABLE Macros should only be used via \#ifdef or \#if defined [\#675](https://github.com/kokkos/kokkos/issues/675) +- Remove impl/Kokkos\_Synchronic\_\* [\#666](https://github.com/kokkos/kokkos/issues/666) +- Turning off IVDEP for Intel 14. [\#638](https://github.com/kokkos/kokkos/issues/638) +- Using an installed Kokkos in a target application using CMake [\#633](https://github.com/kokkos/kokkos/issues/633) +- Create Kokkos Bill of Materials [\#632](https://github.com/kokkos/kokkos/issues/632) +- MDRangePolicy and tagged evaluators [\#547](https://github.com/kokkos/kokkos/issues/547) +- Add PGI support [\#289](https://github.com/kokkos/kokkos/issues/289) + +**Fixed bugs:** + +- Output from PerTeam fails [\#733](https://github.com/kokkos/kokkos/issues/733) +- Cuda: architecture flag not added to link line [\#688](https://github.com/kokkos/kokkos/issues/688) +- Getting large chunks of memory for a thread team in a universal way [\#664](https://github.com/kokkos/kokkos/issues/664) +- Kokkos RNG normal\(\) function hangs for small seed value [\#655](https://github.com/kokkos/kokkos/issues/655) +- Kokkos Tests Errors on Shepard/HSW Builds [\#644](https://github.com/kokkos/kokkos/issues/644) + ## [2.02.15](https://github.com/kokkos/kokkos/tree/2.02.15) (2017-02-10) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.07...2.02.15) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 16854c839a044e5da9084d2a1a7eeb4360ab0327..1c820660ae375006e83bd50c0d4bbd8472ed0258 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -98,10 +98,10 @@ TRIBITS_ADD_OPTION_AND_DEFINE( ) TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_QTHREAD - KOKKOS_HAVE_QTHREAD - "Enable QTHREAD support in Kokkos." - "${TPL_ENABLE_QTHREAD}" + Kokkos_ENABLE_Qthreads + KOKKOS_HAVE_QTHREADS + "Enable Qthreads support in Kokkos." + "${TPL_ENABLE_QTHREADS}" ) TRIBITS_ADD_OPTION_AND_DEFINE( @@ -110,7 +110,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE( "Enable C++11 support in Kokkos." "${${PROJECT_NAME}_ENABLE_CXX11}" ) - + TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_HWLOC KOKKOS_HAVE_HWLOC @@ -213,4 +213,3 @@ TRIBITS_EXCLUDE_FILES( ) TRIBITS_PACKAGE_POSTPROCESS() - diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 9d00c19027a37387888d9f0265c7cdfecb45cc56..5b094dba8cb786c94c9119a5865fcc0dadf9a76f 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -1,39 +1,38 @@ -# Default settings common options +# Default settings common options. #LAMMPS specific settings: KOKKOS_PATH=../../lib/kokkos CXXFLAGS=$(CCFLAGS) -#Options: OpenMP,Serial,Pthreads,Cuda +# Options: Cuda,OpenMP,Pthreads,Qthreads,Serial KOKKOS_DEVICES ?= "OpenMP" #KOKKOS_DEVICES ?= "Pthreads" -#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX +# Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX KOKKOS_ARCH ?= "" -#Options: yes,no +# Options: yes,no KOKKOS_DEBUG ?= "no" -#Options: hwloc,librt,experimental_memkind +# Options: hwloc,librt,experimental_memkind KOKKOS_USE_TPLS ?= "" -#Options: c++11,c++1z +# Options: c++11,c++1z KOKKOS_CXX_STANDARD ?= "c++11" -#Options: aggressive_vectorization,disable_profiling +# Options: aggressive_vectorization,disable_profiling KOKKOS_OPTIONS ?= "" -#Default settings specific options -#Options: force_uvm,use_ldg,rdc,enable_lambda +# Default settings specific options. +# Options: force_uvm,use_ldg,rdc,enable_lambda KOKKOS_CUDA_OPTIONS ?= "enable_lambda" -# Check for general settings - +# Check for general settings. KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l)) KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l)) KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l)) -# Check for external libraries +# Check for external libraries. KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l)) KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l)) KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l)) -# Check for advanced settings +# Check for advanced settings. KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l)) KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l)) @@ -41,21 +40,21 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | gr KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l)) -# Check for Kokkos Host Execution Spaces one of which must be on - +# Check for Kokkos Host Execution Spaces one of which must be on. KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l)) KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l)) +KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l)) KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l)) -KOKKOS_INTERNAL_USE_QTHREAD := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthread | wc -l)) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) - KOKKOS_INTERNAL_USE_SERIAL := 1 +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) + KOKKOS_INTERNAL_USE_SERIAL := 1 +endif endif endif -# Check for other Execution Spaces - +# Check for other Execution Spaces. KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l)) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) @@ -64,27 +63,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .) endif -# Check OS - +# Check OS. KOKKOS_OS := $(shell uname -s) KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname -s | grep CYGWIN | wc -l) KOKKOS_INTERNAL_OS_LINUX := $(shell uname -s | grep Linux | wc -l) KOKKOS_INTERNAL_OS_DARWIN := $(shell uname -s | grep Darwin | wc -l) -# Check compiler - -KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l) -KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l) -KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l) -KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l) -KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l) +# Check compiler. +KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l) +KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l) +KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l) +KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l) +KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l) ifneq ($(OMPI_CXX),) KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l) endif ifneq ($(MPICH_CXX),) KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l) endif -KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l) +KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) KOKKOS_INTERNAL_COMPILER_CLANG = 1 @@ -95,17 +92,17 @@ endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.') + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0) - $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher) + $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher) endif KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1 endif endif - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -mp + KOKKOS_INTERNAL_OPENMP_FLAG := -mp else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp @@ -114,7 +111,7 @@ else KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp else ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - # OpenMP is turned on by default in Cray compiler environment + # OpenMP is turned on by default in Cray compiler environment. KOKKOS_INTERNAL_OPENMP_FLAG := else KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp @@ -138,9 +135,9 @@ else endif endif -# Check for Kokkos Architecture settings +# Check for Kokkos Architecture settings. -#Intel based +# Intel based. KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l)) KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l)) KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l)) @@ -148,8 +145,8 @@ KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) -#NVIDIA based -NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper +# NVIDIA based. +NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l)) @@ -170,46 +167,46 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_AR + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) -KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l)) -KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ - + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ - + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ - + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) -endif - -#ARM based + KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l)) + KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l)) + KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) +endif + +# ARM based. KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l)) -#IBM based +# IBM based. KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l)) KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_POWER9 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power9 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc)) -#AMD based +# AMD based. KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l)) -#Any AVX? +# Any AVX? KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) -# Decide what ISA level we are able to support -KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) -KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) -KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc )) +# Decide what ISA level we are able to support. +KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) +KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) +KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc )) -#Incompatible flags? +# Incompatible flags? KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc )) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) @@ -220,7 +217,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1) $(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) endif -#Generating the list of Flags +# Generating the list of Flags. KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src @@ -233,98 +230,96 @@ KOKKOS_CXXFLAGS = KOKKOS_LIBS = -lkokkos -ldl KOKKOS_LDFLAGS = -L$(shell pwd) -KOKKOS_SRC = +KOKKOS_SRC = KOKKOS_HEADERS = -#Generating the KokkosCore_config.h file +# Generating the KokkosCore_config.h file. tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp) tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp) tmp := $(shell date >> KokkosCore_config.tmp) tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp) - tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp) + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) +endif + ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) + tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp ) endif -ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREADS 1" >> KokkosCore_config.tmp ) endif -ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) - tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) -endif - -ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) - KOKKOS_CPPFLAGS += -I$(QTHREAD_PATH)/include - KOKKOS_LDFLAGS += -L$(QTHREAD_PATH)/lib - tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREAD 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) endif tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp) ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_CXXFLAGS += -lineinfo + KOKKOS_CXXFLAGS += -lineinfo endif - KOKKOS_CXXFLAGS += -g - KOKKOS_LDFLAGS += -g -ldl - tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -g + KOKKOS_LDFLAGS += -g -ldl + tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) - KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include - KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib - KOKKOS_LIBS += -lhwloc - tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp ) + KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include + KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib + KOKKOS_LIBS += -lhwloc + tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) - tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp ) - KOKKOS_LIBS += -lrt + KOKKOS_LIBS += -lrt endif ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include - KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib - KOKKOS_LIBS += -lmemkind + KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib + KOKKOS_LIBS += -lmemkind tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp ) endif @@ -341,262 +336,286 @@ endif tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += --relocatable-device-code=true - KOKKOS_LDFLAGS += --relocatable-device-code=true + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += --relocatable-device-code=true + KOKKOS_LDFLAGS += --relocatable-device-code=true endif ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -expt-extended-lambda + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -expt-extended-lambda else $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.) endif endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) endif endif + endif -#Add Architecture flags +# Add Architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8-a - KOKKOS_LDFLAGS += -march=armv8-a - endif + KOKKOS_CXXFLAGS += -march=armv8-a + KOKKOS_LDFLAGS += -march=armv8-a endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8.1-a - KOKKOS_LDFLAGS += -march=armv8.1-a - endif + KOKKOS_CXXFLAGS += -march=armv8.1-a + KOKKOS_LDFLAGS += -march=armv8.1-a endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += - KOKKOS_LDFLAGS += - else - KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx - KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx - endif + KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx + KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -mavx - KOKKOS_LDFLAGS += -mavx - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += -tp=sandybridge - KOKKOS_LDFLAGS += -tp=sandybridge - else - # Assume that this is a really a GNU compiler - KOKKOS_CXXFLAGS += -mavx - KOKKOS_LDFLAGS += -mavx - endif - endif - endif + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -mavx + KOKKOS_LDFLAGS += -mavx + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += -tp=sandybridge + KOKKOS_LDFLAGS += -tp=sandybridge + else + # Assume that this is a really a GNU compiler. + KOKKOS_CXXFLAGS += -mavx + KOKKOS_LDFLAGS += -mavx + endif + endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - else - # Assume that this is a really a GNU compiler or it could be XL on P8 - KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 - KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 - endif + else + # Assume that this is a really a GNU compiler or it could be XL on P8. + KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 + KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_POWER9 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - else - # Assume that this is a really a GNU compiler or it could be XL on P9 - KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9 - KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9 - endif + else + # Assume that this is a really a GNU compiler or it could be XL on P9. + KOKKOS_CXXFLAGS += -mcpu=power9 -mtune=power9 + KOKKOS_LDFLAGS += -mcpu=power9 -mtune=power9 + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX2 - KOKKOS_LDFLAGS += -xCORE-AVX2 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - KOKKOS_CXXFLAGS += -tp=haswell - KOKKOS_LDFLAGS += -tp=haswell - else - # Assume that this is a really a GNU compiler - KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 - KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 - endif - endif - endif + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX2 + KOKKOS_LDFLAGS += -xCORE-AVX2 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += -tp=haswell + KOKKOS_LDFLAGS += -tp=haswell + else + # Assume that this is a really a GNU compiler. + KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 + KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 + endif + endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xMIC-AVX512 - KOKKOS_LDFLAGS += -xMIC-AVX512 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xMIC-AVX512 + KOKKOS_LDFLAGS += -xMIC-AVX512 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - else - # Asssume that this is really a GNU compiler - KOKKOS_CXXFLAGS += -march=knl - KOKKOS_LDFLAGS += -march=knl - endif - endif - endif + else + # Asssume that this is really a GNU compiler. + KOKKOS_CXXFLAGS += -march=knl + KOKKOS_LDFLAGS += -march=knl + endif + endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp ) - ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) - KOKKOS_CXXFLAGS += -xCORE-AVX512 - KOKKOS_LDFLAGS += -xCORE-AVX512 - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX512 + KOKKOS_LDFLAGS += -xCORE-AVX512 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) - else - # Nothing here yet - KOKKOS_CXXFLAGS += -march=skylake-avx512 - KOKKOS_LDFLAGS += -march=skylake-avx512 - endif - endif - endif + else + # Nothing here yet. + KOKKOS_CXXFLAGS += -march=skylake-avx512 + KOKKOS_LDFLAGS += -march=skylake-avx512 + endif + endif + endif endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -mmic - KOKKOS_LDFLAGS += -mmic + tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -mmic + KOKKOS_LDFLAGS += -mmic endif -#Figure out the architecture flag for Cuda +# Figure out the architecture flag for Cuda. ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-x cuda --cuda-gpu-arch + KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=--cuda-gpu-arch + KOKKOS_CXXFLAGS += -x cuda endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 endif ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 endif + endif - + KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) -KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l)) + KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l)) else -KOKKOS_INTERNAL_NEW_CONFIG := 1 + KOKKOS_INTERNAL_NEW_CONFIG := 1 endif ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) - tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h) + tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h) endif KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) @@ -609,53 +628,57 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) - KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include - KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 - KOKKOS_LIBS += -lcudart -lcuda + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) + KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include + KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_LIBS += -lcudart -lcuda endif -ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - KOKKOS_LIBS += -lpthread - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) + else + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) + endif + + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) - KOKKOS_LIBS += -lqthread - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp) +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) + KOKKOS_LIBS += -lpthread endif -ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) - KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) - else - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) - endif - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) + KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include + KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib + KOKKOS_LIBS += -lqthread endif -#Explicitly set the GCC Toolchain for Clang +# Explicitly set the GCC Toolchain for Clang. ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) - KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=) - KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC - KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) + KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) + KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=) + KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC + KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) endif -#With Cygwin functions such as fdopen and fileno are not defined -#when strict ansi is enabled. strict ansi gets enabled with --std=c++11 -#though. So we hard undefine it here. Not sure if that has any bad side effects -#This is needed for gtest actually, not for Kokkos itself! +# With Cygwin functions such as fdopen and fileno are not defined +# when strict ansi is enabled. strict ansi gets enabled with --std=c++11 +# though. So we hard undefine it here. Not sure if that has any bad side effects +# This is needed for gtest actually, not for Kokkos itself! ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1) KOKKOS_CXXFLAGS += -U__STRICT_ANSI__ endif -# Setting up dependencies +# Setting up dependencies. KokkosCore_config.h: diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index a48a5f6eb7ea78712b3f6caf695745b4ef18c043..54cacb741b4f35a0033d8de0e57ded9d4dab0a00 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -18,6 +18,8 @@ Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp +Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp @@ -43,11 +45,11 @@ Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokk $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) -Kokkos_QthreadExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp -Kokkos_Qthread_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) +Kokkos_QthreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_QthreadsExec.cpp +Kokkos_Qthreads_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthreads/Kokkos_Qthreads_Task.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) @@ -59,4 +61,3 @@ endif Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp - diff --git a/lib/kokkos/README b/lib/kokkos/README index 7ebde23a1fdbc0bff5f62c025e890b204edec591..257a2e5db475dea8c89f1468c42432614c909762 100644 --- a/lib/kokkos/README +++ b/lib/kokkos/README @@ -45,31 +45,39 @@ Primary tested compilers on X86 are: GCC 4.8.4 GCC 4.9.2 GCC 5.1.0 + GCC 5.2.0 Intel 14.0.4 Intel 15.0.2 Intel 16.0.1 Intel 17.0.098 + Intel 17.1.132 Clang 3.5.2 Clang 3.6.1 + Clang 3.7.1 + Clang 3.8.1 Clang 3.9.0 + PGI 17.1 Primary tested compilers on Power 8 are: GCC 5.4.0 (OpenMP,Serial) IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug) Primary tested compilers on Intel KNL are: + GCC 6.2.0 Intel 16.2.181 (with gcc 4.7.2) Intel 17.0.098 (with gcc 4.7.2) + Intel 17.1.132 (with gcc 4.9.3) + Intel 17.2.174 (with gcc 4.9.3) + Intel 18.0.061 (beta) (with gcc 4.9.3) Secondary tested compilers are: - CUDA 7.0 (with gcc 4.7.2) - CUDA 7.5 (with gcc 4.7.2) + CUDA 7.0 (with gcc 4.8.4) + CUDA 7.5 (with gcc 4.8.4) CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8) CUDA/Clang 8.0 using Clang/Trunk compiler Other compilers working: X86: - PGI 15.4 Cygwin 2.1.0 64bit with gcc 4.9.3 Known non-working combinations: diff --git a/lib/kokkos/algorithms/cmake/Dependencies.cmake b/lib/kokkos/algorithms/cmake/Dependencies.cmake index 1d71d8af341181f689a6a8bf63036b67584cb138..c36b62523fadb628e970b6eccf57a9caaa317f1e 100644 --- a/lib/kokkos/algorithms/cmake/Dependencies.cmake +++ b/lib/kokkos/algorithms/cmake/Dependencies.cmake @@ -1,5 +1,5 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - LIB_REQUIRED_PACKAGES KokkosCore + LIB_REQUIRED_PACKAGES KokkosCore KokkosContainers LIB_OPTIONAL_TPLS Pthread CUDA HWLOC TEST_OPTIONAL_TPLS CUSPARSE ) diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index d376173bf183615e29f66bbecf6bd42cd1134a9e..bd73582362eed46161ee0ac0cf36fec4d5178129 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -547,7 +547,7 @@ namespace Kokkos { KOKKOS_INLINE_FUNCTION Random_XorShift64 (uint64_t state, int state_idx = 0) - : state_(state),state_idx_(state_idx){} + : state_(state==0?uint64_t(1318319):state),state_idx_(state_idx){} KOKKOS_INLINE_FUNCTION uint32_t urand() { @@ -719,6 +719,9 @@ namespace Kokkos { } void init(uint64_t seed, int num_states) { + if(seed==0) + seed = uint64_t(1318319); + num_states_ = num_states; locks_ = lock_type("Kokkos::Random_XorShift64::locks",num_states_); @@ -968,8 +971,9 @@ namespace Kokkos { inline void init(uint64_t seed, int num_states) { + if(seed==0) + seed = uint64_t(1318319); num_states_ = num_states; - locks_ = int_view_type("Kokkos::Random_XorShift1024::locks",num_states_); state_ = state_data_type("Kokkos::Random_XorShift1024::state",num_states_); p_ = int_view_type("Kokkos::Random_XorShift1024::p",num_states_); diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp index 5b8c65fee1869c25681567036314d25beab9a5f2..237de751fe4b30afa1abcf475ca8af8c52cea7ab 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -53,69 +53,122 @@ namespace Kokkos { namespace Impl { - template<class ValuesViewType, int Rank=ValuesViewType::Rank> + template< class DstViewType , class SrcViewType + , int Rank = DstViewType::Rank > struct CopyOp; - template<class ValuesViewType> - struct CopyOp<ValuesViewType,1> { - template<class DstType, class SrcType> + template< class DstViewType , class SrcViewType > + struct CopyOp<DstViewType,SrcViewType,1> { KOKKOS_INLINE_FUNCTION - static void copy(DstType& dst, size_t i_dst, - SrcType& src, size_t i_src ) { + static void copy(DstViewType const& dst, size_t i_dst, + SrcViewType const& src, size_t i_src ) { dst(i_dst) = src(i_src); } }; - template<class ValuesViewType> - struct CopyOp<ValuesViewType,2> { - template<class DstType, class SrcType> + template< class DstViewType , class SrcViewType > + struct CopyOp<DstViewType,SrcViewType,2> { KOKKOS_INLINE_FUNCTION - static void copy(DstType& dst, size_t i_dst, - SrcType& src, size_t i_src ) { - for(int j = 0;j< (int) dst.dimension_1(); j++) + static void copy(DstViewType const& dst, size_t i_dst, + SrcViewType const& src, size_t i_src ) { + for(int j = 0;j< (int) dst.extent(1); j++) dst(i_dst,j) = src(i_src,j); } }; - template<class ValuesViewType> - struct CopyOp<ValuesViewType,3> { - template<class DstType, class SrcType> + template< class DstViewType , class SrcViewType > + struct CopyOp<DstViewType,SrcViewType,3> { KOKKOS_INLINE_FUNCTION - static void copy(DstType& dst, size_t i_dst, - SrcType& src, size_t i_src ) { - for(int j = 0; j<dst.dimension_1(); j++) - for(int k = 0; k<dst.dimension_2(); k++) + static void copy(DstViewType const& dst, size_t i_dst, + SrcViewType const& src, size_t i_src ) { + for(int j = 0; j<dst.extent(1); j++) + for(int k = 0; k<dst.extent(2); k++) dst(i_dst,j,k) = src(i_src,j,k); } }; } -template<class KeyViewType, class BinSortOp, class ExecutionSpace = typename KeyViewType::execution_space, - class SizeType = typename KeyViewType::memory_space::size_type> +//---------------------------------------------------------------------------- + +template< class KeyViewType + , class BinSortOp + , class Space = typename KeyViewType::device_type + , class SizeType = typename KeyViewType::memory_space::size_type + > class BinSort { +public: + template< class DstViewType , class SrcViewType > + struct copy_functor { -public: - template<class ValuesViewType, class PermuteViewType, class CopyOp> - struct bin_sort_sort_functor { - typedef ExecutionSpace execution_space; - typedef typename ValuesViewType::non_const_type values_view_type; - typedef typename ValuesViewType::const_type const_values_view_type; - Kokkos::View<typename values_view_type::const_data_type,typename values_view_type::array_layout, - typename values_view_type::memory_space,Kokkos::MemoryTraits<Kokkos::RandomAccess> > values; - values_view_type sorted_values; - typename PermuteViewType::const_type sort_order; - bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_): - values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {} + typedef typename SrcViewType::const_type src_view_type ; + + typedef Impl::CopyOp< DstViewType , src_view_type > copy_op ; + + DstViewType dst_values ; + src_view_type src_values ; + int dst_offset ; + + copy_functor( DstViewType const & dst_values_ + , int const & dst_offset_ + , SrcViewType const & src_values_ + ) + : dst_values( dst_values_ ) + , src_values( src_values_ ) + , dst_offset( dst_offset_ ) + {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + // printf("copy: dst(%i) src(%i)\n",i+dst_offset,i); + copy_op::copy(dst_values,i+dst_offset,src_values,i); + } + }; + + template< class DstViewType + , class PermuteViewType + , class SrcViewType + > + struct copy_permute_functor { + + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + typedef typename std::conditional + < Kokkos::is_view< SrcViewType >::value + , Kokkos::View< typename SrcViewType::const_data_type + , typename SrcViewType::array_layout + , typename SrcViewType::device_type + , Kokkos::MemoryTraits<Kokkos::RandomAccess> + > + , typename SrcViewType::const_type + >::type src_view_type ; + + typedef typename PermuteViewType::const_type perm_view_type ; + + typedef Impl::CopyOp< DstViewType , src_view_type > copy_op ; + + DstViewType dst_values ; + perm_view_type sort_order ; + src_view_type src_values ; + + copy_permute_functor( DstViewType const & dst_values_ + , PermuteViewType const & sort_order_ + , SrcViewType const & src_values_ + ) + : dst_values( dst_values_ ) + , sort_order( sort_order_ ) + , src_values( src_values_ ) + {} KOKKOS_INLINE_FUNCTION void operator() (const int& i) const { - //printf("Sort: %i %i\n",i,sort_order(i)); - CopyOp::copy(sorted_values,i,values,sort_order(i)); + // printf("copy_permute: dst(%i) src(%i)\n",i,sort_order(i)); + copy_op::copy(dst_values,i,src_values,sort_order(i)); } }; - typedef ExecutionSpace execution_space; + typedef typename Space::execution_space execution_space; typedef BinSortOp bin_op_type; struct bin_count_tag {}; @@ -124,84 +177,137 @@ public: struct bin_sort_bins_tag {}; public: + typedef SizeType size_type; typedef size_type value_type; - typedef Kokkos::View<size_type*, execution_space> offset_type; - typedef Kokkos::View<const int*, execution_space> bin_count_type; + typedef Kokkos::View<size_type*, Space> offset_type; + typedef Kokkos::View<const int*, Space> bin_count_type; + typedef typename KeyViewType::const_type const_key_view_type ; - typedef Kokkos::View<typename KeyViewType::const_data_type, - typename KeyViewType::array_layout, - typename KeyViewType::memory_space> const_key_view_type; - typedef Kokkos::View<typename KeyViewType::const_data_type, - typename KeyViewType::array_layout, - typename KeyViewType::memory_space, - Kokkos::MemoryTraits<Kokkos::RandomAccess> > const_rnd_key_view_type; + // If a Kokkos::View then can generate constant random access + // otherwise can only use the constant type. + + typedef typename std::conditional + < Kokkos::is_view< KeyViewType >::value + , Kokkos::View< typename KeyViewType::const_data_type, + typename KeyViewType::array_layout, + typename KeyViewType::device_type, + Kokkos::MemoryTraits<Kokkos::RandomAccess> > + , const_key_view_type + >::type const_rnd_key_view_type; typedef typename KeyViewType::non_const_value_type non_const_key_scalar; typedef typename KeyViewType::const_value_type const_key_scalar; + typedef Kokkos::View<int*, Space, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic_type ; + private: + const_key_view_type keys; const_rnd_key_view_type keys_rnd; public: - BinSortOp bin_op; - offset_type bin_offsets; + BinSortOp bin_op ; + offset_type bin_offsets ; + bin_count_atomic_type bin_count_atomic ; + bin_count_type bin_count_const ; + offset_type sort_order ; - Kokkos::View<int*, ExecutionSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > bin_count_atomic; - bin_count_type bin_count_const; - - offset_type sort_order; - - bool sort_within_bins; + int range_begin ; + int range_end ; + bool sort_within_bins ; public: - // Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false) - BinSort(const_key_view_type keys_, BinSortOp bin_op_, - bool sort_within_bins_ = false) - :keys(keys_),keys_rnd(keys_), bin_op(bin_op_) { + BinSort() {} - bin_count_atomic = Kokkos::View<int*, ExecutionSpace >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins()); + //---------------------------------------- + // Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false) + BinSort( const_key_view_type keys_ + , int range_begin_ + , int range_end_ + , BinSortOp bin_op_ + , bool sort_within_bins_ = false + ) + : keys(keys_) + , keys_rnd(keys_) + , bin_op(bin_op_) + , bin_offsets() + , bin_count_atomic() + , bin_count_const() + , sort_order() + , range_begin( range_begin_ ) + , range_end( range_end_ ) + , sort_within_bins( sort_within_bins_ ) + { + bin_count_atomic = Kokkos::View<int*, Space >("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins()); bin_count_const = bin_count_atomic; bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins()); - sort_order = offset_type("PermutationVector",keys.dimension_0()); - sort_within_bins = sort_within_bins_; + sort_order = offset_type("PermutationVector",range_end-range_begin); } + BinSort( const_key_view_type keys_ + , BinSortOp bin_op_ + , bool sort_within_bins_ = false + ) + : BinSort( keys_ , 0 , keys_.extent(0), bin_op_ , sort_within_bins_ ) {} + + //---------------------------------------- // Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed void create_permute_vector() { - Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_count_tag> (0,keys.dimension_0()),*this); - Kokkos::parallel_scan(Kokkos::RangePolicy<ExecutionSpace,bin_offset_tag> (0,bin_op.max_bins()) ,*this); + const size_t len = range_end - range_begin ; + Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_count_tag> (0,len),*this); + Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space,bin_offset_tag> (0,bin_op.max_bins()) ,*this); Kokkos::deep_copy(bin_count_atomic,0); - Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_binning_tag> (0,keys.dimension_0()),*this); + Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_binning_tag> (0,len),*this); if(sort_within_bins) - Kokkos::parallel_for (Kokkos::RangePolicy<ExecutionSpace,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this); + Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this); } // Sort a view with respect ot the first dimension using the permutation array template<class ValuesViewType> - void sort(ValuesViewType values) { - ValuesViewType sorted_values = ValuesViewType("Copy", - values.dimension_0(), - values.dimension_1(), - values.dimension_2(), - values.dimension_3(), - values.dimension_4(), - values.dimension_5(), - values.dimension_6(), - values.dimension_7()); - - parallel_for(values.dimension_0(), - bin_sort_sort_functor<ValuesViewType, offset_type, - Impl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order)); - - deep_copy(values,sorted_values); + void sort( ValuesViewType const & values) + { + typedef + Kokkos::View< typename ValuesViewType::data_type, + typename ValuesViewType::array_layout, + typename ValuesViewType::device_type > + scratch_view_type ; + + const size_t len = range_end - range_begin ; + + scratch_view_type + sorted_values("Scratch", + len, + values.extent(1), + values.extent(2), + values.extent(3), + values.extent(4), + values.extent(5), + values.extent(6), + values.extent(7)); + + { + copy_permute_functor< scratch_view_type /* DstViewType */ + , offset_type /* PermuteViewType */ + , ValuesViewType /* SrcViewType */ + > + functor( sorted_values , sort_order , values ); + + parallel_for( Kokkos::RangePolicy<execution_space>(0,len),functor); + } + + { + copy_functor< ValuesViewType , scratch_view_type > + functor( values , range_begin , sorted_values ); + + parallel_for( Kokkos::RangePolicy<execution_space>(0,len),functor); + } } // Get the permutation vector @@ -217,9 +323,11 @@ public: bin_count_type get_bin_count() const {return bin_count_const;} public: + KOKKOS_INLINE_FUNCTION void operator() (const bin_count_tag& tag, const int& i) const { - bin_count_atomic(bin_op.bin(keys,i))++; + const int j = range_begin + i ; + bin_count_atomic(bin_op.bin(keys,j))++; } KOKKOS_INLINE_FUNCTION @@ -232,10 +340,11 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const bin_binning_tag& tag, const int& i) const { - const int bin = bin_op.bin(keys,i); + const int j = range_begin + i ; + const int bin = bin_op.bin(keys,j); const int count = bin_count_atomic(bin)++; - sort_order(bin_offsets(bin) + count) = i; + sort_order(bin_offsets(bin) + count) = j ; } KOKKOS_INLINE_FUNCTION @@ -262,13 +371,19 @@ public: } }; +//---------------------------------------------------------------------------- + template<class KeyViewType> struct BinOp1D { - const int max_bins_; - const double mul_; + int max_bins_; + double mul_; typename KeyViewType::const_value_type range_; typename KeyViewType::const_value_type min_; + BinOp1D():max_bins_(0),mul_(0.0), + range_(typename KeyViewType::const_value_type()), + min_(typename KeyViewType::const_value_type()) {} + //Construct BinOp with number of bins, minimum value and maxuimum value BinOp1D(int max_bins__, typename KeyViewType::const_value_type min, typename KeyViewType::const_value_type max ) @@ -302,12 +417,14 @@ struct BinOp3D { typename KeyViewType::non_const_value_type range_[3]; typename KeyViewType::non_const_value_type min_[3]; + BinOp3D() {} + BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], typename KeyViewType::const_value_type max[] ) { - max_bins_[0] = max_bins__[0]+1; - max_bins_[1] = max_bins__[1]+1; - max_bins_[2] = max_bins__[2]+1; + max_bins_[0] = max_bins__[0]; + max_bins_[1] = max_bins__[1]; + max_bins_[2] = max_bins__[2]; mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]); mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]); mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]); @@ -364,7 +481,7 @@ bool try_std_sort(ViewType view) { possible = possible && (ViewType::Rank == 1); possible = possible && (stride[0] == 1); if(possible) { - std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0()); + std::sort(view.data(),view.data()+view.extent(0)); } return possible; } @@ -386,7 +503,8 @@ struct min_max_functor { } template<class ViewType> -void sort(ViewType view, bool always_use_kokkos_sort = false) { +void sort( ViewType const & view , bool const always_use_kokkos_sort = false) +{ if(!always_use_kokkos_sort) { if(Impl::try_std_sort(view)) return; } @@ -394,14 +512,37 @@ void sort(ViewType view, bool always_use_kokkos_sort = false) { Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result; Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result); - parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.dimension_0()), + parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.extent(0)), Impl::min_max_functor<ViewType>(view),reducer); if(result.min_val == result.max_val) return; - BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,result.min_val,result.max_val),true); + BinSort<ViewType, CompType> bin_sort(view,CompType(view.extent(0)/2,result.min_val,result.max_val),true); bin_sort.create_permute_vector(); bin_sort.sort(view); } +template<class ViewType> +void sort( ViewType view + , size_t const begin + , size_t const end + ) +{ + typedef Kokkos::RangePolicy<typename ViewType::execution_space> range_policy ; + typedef BinOp1D<ViewType> CompType; + + Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result; + Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result); + + parallel_reduce( range_policy( begin , end ) + , Impl::min_max_functor<ViewType>(view),reducer ); + + if(result.min_val == result.max_val) return; + + BinSort<ViewType, CompType> + bin_sort(view,begin,end,CompType((end-begin)/2,result.min_val,result.max_val),true); + + bin_sort.create_permute_vector(); + bin_sort.sort(view); +} } #endif diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index 03e4fb691ef1a4ae6a7bed6471ccba4e3fd53762..61ffa6f43a39ecbb1640a71de5afb9be33cd10dd 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -44,6 +44,7 @@ #include <gtest/gtest.h> #include<Kokkos_Core.hpp> +#include<Kokkos_DynamicView.hpp> #include<Kokkos_Random.hpp> #include<Kokkos_Sort.hpp> @@ -192,17 +193,81 @@ void test_3D_sort(unsigned int n) { double epsilon = 1e-10; unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; - printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails); + if ( sort_fails ) + printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails); + ASSERT_EQ(sort_fails,0); ASSERT_EQ(equal_sum,1); } +//---------------------------------------------------------------------------- + +template<class ExecutionSpace, typename KeyType> +void test_dynamic_view_sort(unsigned int n ) +{ + typedef typename ExecutionSpace::memory_space memory_space ; + typedef Kokkos::Experimental::DynamicView<KeyType*,ExecutionSpace> KeyDynamicViewType; + typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType; + + const size_t upper_bound = 2 * n ; + + typename KeyDynamicViewType::memory_pool + pool( memory_space() , 2 * n * sizeof(KeyType) ); + + KeyDynamicViewType keys("Keys",pool,upper_bound); + + keys.resize_serial(n); + + KeyViewType keys_view("KeysTmp", n ); + + // Test sorting array with all numbers equal + Kokkos::deep_copy(keys_view,KeyType(1)); + Kokkos::Experimental::deep_copy(keys,keys_view); + Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); + + Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931); + Kokkos::fill_random(keys_view,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND); + + Kokkos::Experimental::deep_copy(keys,keys_view); + + double sum_before = 0.0; + double sum_after = 0.0; + unsigned int sort_fails = 0; + + Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys_view),sum_before); + + Kokkos::sort(keys, 0 /* begin */ , n /* end */ ); + + Kokkos::Experimental::deep_copy( keys_view , keys ); + + Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys_view),sum_after); + Kokkos::parallel_reduce(n-1,is_sorted_struct<ExecutionSpace, KeyType>(keys_view),sort_fails); + + double ratio = sum_before/sum_after; + double epsilon = 1e-10; + unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + + if ( sort_fails != 0 || equal_sum != 1 ) { + std::cout << " N = " << n + << " ; sum_before = " << sum_before + << " ; sum_after = " << sum_after + << " ; ratio = " << ratio + << std::endl ; + } + + ASSERT_EQ(sort_fails,0); + ASSERT_EQ(equal_sum,1); +} + +//---------------------------------------------------------------------------- + template<class ExecutionSpace, typename KeyType> void test_sort(unsigned int N) { test_1D_sort<ExecutionSpace,KeyType>(N*N*N, true); test_1D_sort<ExecutionSpace,KeyType>(N*N*N, false); test_3D_sort<ExecutionSpace,KeyType>(N); + test_dynamic_view_sort<ExecutionSpace,KeyType>(N*N); } } diff --git a/lib/kokkos/bin/nvcc_wrapper b/lib/kokkos/bin/nvcc_wrapper index cb206cf88b2c4e3a4f289bc919cc272e22749f36..09fa5d500abcdfe718a6d3bb12db5c91fc5ec174 100755 --- a/lib/kokkos/bin/nvcc_wrapper +++ b/lib/kokkos/bin/nvcc_wrapper @@ -140,6 +140,9 @@ do #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor -pedantic|-Wpedantic|-ansi) ;; + #strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C" + -Woverloaded-virtual) + ;; #strip -Xcompiler because we add it -Xcompiler) if [ $first_xcompiler_arg -eq 1 ]; then @@ -190,7 +193,7 @@ do object_files_xlinker="$object_files_xlinker -Xlinker $1" ;; #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking - *.dylib) + @*|*.dylib) object_files="$object_files -Xlinker $1" object_files_xlinker="$object_files_xlinker -Xlinker $1" ;; diff --git a/lib/kokkos/cmake/deps/QTHREAD.cmake b/lib/kokkos/cmake/deps/QTHREADS.cmake similarity index 98% rename from lib/kokkos/cmake/deps/QTHREAD.cmake rename to lib/kokkos/cmake/deps/QTHREADS.cmake index 994b72b20096f4462beab51d19e4410cd73bf05b..c312f2590bcd29197a0cf3fbd5e0b484579a09c2 100644 --- a/lib/kokkos/cmake/deps/QTHREAD.cmake +++ b/lib/kokkos/cmake/deps/QTHREADS.cmake @@ -63,8 +63,7 @@ # Source: https://code.google.com/p/qthreads # -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS REQUIRED_HEADERS qthread.h REQUIRED_LIBS_NAMES "qthread" ) - diff --git a/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake b/lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake similarity index 98% rename from lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake rename to lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake index 994b72b20096f4462beab51d19e4410cd73bf05b..c312f2590bcd29197a0cf3fbd5e0b484579a09c2 100644 --- a/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake +++ b/lib/kokkos/cmake/tpls/FindTPLQTHREADS.cmake @@ -63,8 +63,7 @@ # Source: https://code.google.com/p/qthreads # -TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREADS REQUIRED_HEADERS qthread.h REQUIRED_LIBS_NAMES "qthread" ) - diff --git a/lib/kokkos/config/kokkos_dev/config-core-all.sh b/lib/kokkos/config/kokkos_dev/config-core-all.sh index fa588c778f68330ff130364e9425d5a6aefa357c..d4fb25a8e139c315a862306173a0b1d2a07e7cbd 100755 --- a/lib/kokkos/config/kokkos_dev/config-core-all.sh +++ b/lib/kokkos/config/kokkos_dev/config-core-all.sh @@ -6,7 +6,7 @@ #----------------------------------------------------------------------------- # Building on 'kokkos-dev.sandia.gov' with enabled capabilities: # -# Cuda, OpenMP, Threads, Qthread, hwloc +# Cuda, OpenMP, Threads, Qthreads, hwloc # # module loaded on 'kokkos-dev.sandia.gov' for this build # @@ -82,13 +82,13 @@ CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" #----------------------------------------------------------------------------- -# Qthread +# Qthreads -QTHREAD_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3" +QTHREADS_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREAD:BOOL=ON" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_INCLUDE_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/include" -CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_LIBRARY_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/lib" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREADS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_INCLUDE_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_LIBRARY_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/lib" #----------------------------------------------------------------------------- # C++11 @@ -108,6 +108,3 @@ rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} - -#----------------------------------------------------------------------------- - diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt index 446cbb021610164980cc6dd0fdced42b162422d7..9eaecb5031b1328989e114b50a86ac07c78b8e29 100644 --- a/lib/kokkos/config/master_history.txt +++ b/lib/kokkos/config/master_history.txt @@ -4,4 +4,5 @@ tag: 2.01.10 date: 09:27:2016 master: e4119325 develop: e6cda11e tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304 tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966 -tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6 +tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6 +tag: 2.03.00 date: 04:25:2017 master: 120d9ce7 develop: 015ba641 diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia index 2c15e951ba25f4831c888fa731b9e25954ee0ead..6909606643df6b83c2dc77c2469768e02a13844d 100755 --- a/lib/kokkos/config/test_all_sandia +++ b/lib/kokkos/config/test_all_sandia @@ -6,29 +6,29 @@ set -o pipefail -# Determine current machine +# Determine current machine. MACHINE="" HOSTNAME=$(hostname) PROCESSOR=`uname -p` if [[ "$HOSTNAME" =~ (white|ride).* ]]; then - MACHINE=white + MACHINE=white elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then - MACHINE=bowman + MACHINE=bowman elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name - if [[ "$PROCESSOR" = "aarch64" ]]; then - MACHINE=sullivan - else - MACHINE=shepard - fi + if [[ "$PROCESSOR" = "aarch64" ]]; then + MACHINE=sullivan + else + MACHINE=shepard + fi elif [[ "$HOSTNAME" =~ apollo ]]; then - MACHINE=apollo + MACHINE=apollo elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then - MACHINE=sems + MACHINE=sems else - echo "Unrecognized machine" >&2 - exit 1 + echo "Unrecognized machine" >&2 + exit 1 fi GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" @@ -45,10 +45,11 @@ CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limi INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" CUDA_WARNING_FLAGS="" -# Default. Machine specific can override +# Default. Machine specific can override. DEBUG=False ARGS="" CUSTOM_BUILD_LIST="" +QTHREADS_PATH="" DRYRUN=False BUILD_ONLY=False declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3 @@ -60,86 +61,90 @@ PRINT_HELP=False OPT_FLAG="" KOKKOS_OPTIONS="" - # -# Handle arguments +# Handle arguments. # while [[ $# > 0 ]] do -key="$1" -case $key in ---kokkos-path*) -KOKKOS_PATH="${key#*=}" -;; ---build-list*) -CUSTOM_BUILD_LIST="${key#*=}" -;; ---debug*) -DEBUG=True -;; ---build-only*) -BUILD_ONLY=True -;; ---test-script*) -TEST_SCRIPT=True -;; ---skip-hwloc*) -SKIP_HWLOC=True -;; ---num*) -NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" -;; ---dry-run*) -DRYRUN=True -;; ---spot-check*) -SPOT_CHECK=True -;; ---arch*) -ARCH_FLAG="--arch=${key#*=}" -;; ---opt-flag*) -OPT_FLAG="${key#*=}" -;; ---with-cuda-options*) -KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}" -;; ---help*) -PRINT_HELP=True -;; -*) -# args, just append -ARGS="$ARGS $1" -;; -esac -shift + key="$1" + + case $key in + --kokkos-path*) + KOKKOS_PATH="${key#*=}" + ;; + --qthreads-path*) + QTHREADS_PATH="${key#*=}" + ;; + --build-list*) + CUSTOM_BUILD_LIST="${key#*=}" + ;; + --debug*) + DEBUG=True + ;; + --build-only*) + BUILD_ONLY=True + ;; + --test-script*) + TEST_SCRIPT=True + ;; + --skip-hwloc*) + SKIP_HWLOC=True + ;; + --num*) + NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" + ;; + --dry-run*) + DRYRUN=True + ;; + --spot-check*) + SPOT_CHECK=True + ;; + --arch*) + ARCH_FLAG="--arch=${key#*=}" + ;; + --opt-flag*) + OPT_FLAG="${key#*=}" + ;; + --with-cuda-options*) + KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}" + ;; + --help*) + PRINT_HELP=True + ;; + *) + # args, just append + ARGS="$ARGS $1" + ;; + esac + + shift done SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd ) -# set kokkos path +# Set kokkos path. if [ -z "$KOKKOS_PATH" ]; then - KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT + KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT else - # Ensure KOKKOS_PATH is abs path - KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) + # Ensure KOKKOS_PATH is abs path. + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) fi # -# Machine specific config +# Machine specific config. # if [ "$MACHINE" = "sems" ]; then - source /projects/sems/modulefiles/utils/sems-modules-init.sh + source /projects/sems/modulefiles/utils/sems-modules-init.sh - BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base" - CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" - CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base" + CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" + CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="" - fi + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="" + fi if [ "$SPOT_CHECK" = "True" ]; then # Format: (compiler module-list build-list exe-name warning-flag) @@ -153,120 +158,118 @@ if [ "$MACHINE" = "sems" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi - elif [ "$MACHINE" = "white" ]; then - source /etc/profile.d/modules.sh - SKIP_HWLOC=True - export SLURM_TASKS_PER_NODE=32 + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" - IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>" - CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0" + BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" + IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>" + CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0" - # Don't do pthread on white - GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" + # Don't do pthread on white. + GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" - # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" - "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" - "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS" - ) - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=Power8,Kepler37" - fi - NUM_JOBS_TO_RUN_IN_PARALLEL=2 + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=Power8,Kepler37" + fi + + NUM_JOBS_TO_RUN_IN_PARALLEL=2 elif [ "$MACHINE" = "bowman" ]; then - source /etc/profile.d/modules.sh - SKIP_HWLOC=True - export SLURM_TASKS_PER_NODE=32 + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>" + BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>" - OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" - # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - ) + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=KNL" - fi + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=KNL" + fi - NUM_JOBS_TO_RUN_IN_PARALLEL=2 + NUM_JOBS_TO_RUN_IN_PARALLEL=2 elif [ "$MACHINE" = "sullivan" ]; then - source /etc/profile.d/modules.sh - SKIP_HWLOC=True - export SLURM_TASKS_PER_NODE=96 + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=96 - BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" + BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>" - # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS") + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS") - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=ARMv8-ThunderX" - fi + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=ARMv8-ThunderX" + fi - NUM_JOBS_TO_RUN_IN_PARALLEL=2 + NUM_JOBS_TO_RUN_IN_PARALLEL=2 elif [ "$MACHINE" = "shepard" ]; then - source /etc/profile.d/modules.sh - SKIP_HWLOC=True - export SLURM_TASKS_PER_NODE=32 + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 - BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>" + BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>" - OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" - # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" - ) + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=HSW" - fi - NUM_JOBS_TO_RUN_IN_PARALLEL=2 + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=HSW" + fi + NUM_JOBS_TO_RUN_IN_PARALLEL=2 elif [ "$MACHINE" = "apollo" ]; then - source /projects/sems/modulefiles/utils/sems-modules-init.sh - module use /home/projects/modulefiles/local/x86-64 - module load kokkos-env + source /projects/sems/modulefiles/utils/sems-modules-init.sh + module use /home/projects/modulefiles/local/x86-64 + module load kokkos-env - module load sems-git - module load sems-tex - module load sems-cmake/3.5.2 - module load sems-gdb + module load sems-git + module load sems-tex + module load sems-cmake/3.5.2 + module load sems-gdb - SKIP_HWLOC=True + SKIP_HWLOC=True - BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base" - CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" - CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" + BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base" + CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base" + CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base" - CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/8.0.44" - NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0" + CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/8.0.44" + NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0" - BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP" - BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread" - BUILD_LIST_CLANG="Serial,Pthread,OpenMP" + BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP" + BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread" + BUILD_LIST_CLANG="Serial,Pthread,OpenMP" if [ "$SPOT_CHECK" = "True" ]; then # Format: (compiler module-list build-list exe-name warning-flag) @@ -297,16 +300,16 @@ elif [ "$MACHINE" = "apollo" ]; then ) fi - if [ -z "$ARCH_FLAG" ]; then - ARCH_FLAG="--arch=SNB,Kepler35" - fi - NUM_JOBS_TO_RUN_IN_PARALLEL=2 -else - echo "Unhandled machine $MACHINE" >&2 - exit 1 -fi + if [ -z "$ARCH_FLAG" ]; then + ARCH_FLAG="--arch=SNB,Kepler35" + fi + NUM_JOBS_TO_RUN_IN_PARALLEL=2 +else + echo "Unhandled machine $MACHINE" >&2 + exit 1 +fi export OMP_NUM_THREADS=4 @@ -315,119 +318,149 @@ declare -i NUM_RESULTS_TO_KEEP=7 RESULT_ROOT_PREFIX=TestAll if [ "$PRINT_HELP" = "True" ]; then -echo "test_all_sandia <ARGS> <OPTIONS>:" -echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" -echo " Defaults to root repo containing this script" -echo "--debug: Run tests in debug. Defaults to False" -echo "--test-script: Test this script, not Kokkos" -echo "--skip-hwloc: Do not do hwloc tests" -echo "--num=N: Number of jobs to run in parallel" -echo "--spot-check: Minimal test set to issue pull request" -echo "--dry-run: Just print what would be executed" -echo "--build-only: Just do builds, don't run anything" -echo "--opt-flag=FLAG: Optimization flag (default: -O3)" -echo "--arch=ARCHITECTURE: overwrite architecture flags" -echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" -echo "--build-list=BUILD,BUILD,BUILD..." -echo " Provide a comma-separated list of builds instead of running all builds" -echo " Valid items:" -echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" -echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" -echo "" - -echo "ARGS: list of expressions matching compilers to test" -echo " supported compilers sems" -for COMPILER_DATA in "${COMPILERS[@]}"; do + echo "test_all_sandia <ARGS> <OPTIONS>:" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" + echo " Defaults to root repo containing this script" + echo "--debug: Run tests in debug. Defaults to False" + echo "--test-script: Test this script, not Kokkos" + echo "--skip-hwloc: Do not do hwloc tests" + echo "--num=N: Number of jobs to run in parallel" + echo "--spot-check: Minimal test set to issue pull request" + echo "--dry-run: Just print what would be executed" + echo "--build-only: Just do builds, don't run anything" + echo "--opt-flag=FLAG: Optimization flag (default: -O3)" + echo "--arch=ARCHITECTURE: overwrite architecture flags" + echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS" + echo "--build-list=BUILD,BUILD,BUILD..." + echo " Provide a comma-separated list of builds instead of running all builds" + echo " Valid items:" + echo " OpenMP, Pthread, Qthreads, Serial, OpenMP_Serial, Pthread_Serial" + echo " Qthreads_Serial, Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" + echo "" + + echo "ARGS: list of expressions matching compilers to test" + echo " supported compilers sems" + for COMPILER_DATA in "${COMPILERS[@]}"; do ARR=($COMPILER_DATA) COMPILER=${ARR[0]} echo " $COMPILER" -done -echo "" - -echo "Examples:" -echo " Run all tests" -echo " % test_all_sandia" -echo "" -echo " Run all gcc tests" -echo " % test_all_sandia gcc" -echo "" -echo " Run all gcc/4.7.2 and all intel tests" -echo " % test_all_sandia gcc/4.7.2 intel" -echo "" -echo " Run all tests in debug" -echo " % test_all_sandia --debug" -echo "" -echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds" -echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial" -echo "" -echo "If you want to kill the tests, do:" -echo " hit ctrl-z" -echo " % kill -9 %1" -echo -exit 0 + done + echo "" + + echo "Examples:" + echo " Run all tests" + echo " % test_all_sandia" + echo "" + echo " Run all gcc tests" + echo " % test_all_sandia gcc" + echo "" + echo " Run all gcc/4.7.2 and all intel tests" + echo " % test_all_sandia gcc/4.7.2 intel" + echo "" + echo " Run all tests in debug" + echo " % test_all_sandia --debug" + echo "" + echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds" + echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial" + echo "" + echo "If you want to kill the tests, do:" + echo " hit ctrl-z" + echo " % kill -9 %1" + echo + exit 0 fi -# set build type +# Set build type. if [ "$DEBUG" = "True" ]; then - BUILD_TYPE=debug + BUILD_TYPE=debug else - BUILD_TYPE=release + BUILD_TYPE=release fi -# If no args provided, do all compilers +# If no args provided, do all compilers. if [ -z "$ARGS" ]; then - ARGS='?' + ARGS='?' fi -# Process args to figure out which compilers to test +# Process args to figure out which compilers to test. COMPILERS_TO_TEST="" + for ARG in $ARGS; do - for COMPILER_DATA in "${COMPILERS[@]}"; do - ARR=($COMPILER_DATA) - COMPILER=${ARR[0]} - if [[ "$COMPILER" = $ARG* ]]; then - if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then - COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER" - else - echo "Tried to add $COMPILER twice" - fi - fi - done + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + + if [[ "$COMPILER" = $ARG* ]]; then + if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then + COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER" + else + echo "Tried to add $COMPILER twice" + fi + fi + done done +# Check if Qthreads build requested. +HAVE_QTHREADS_BUILD="False" +if [ -n "$CUSTOM_BUILD_LIST" ]; then + if [[ "$CUSTOM_BUILD_LIST" = *Qthreads* ]]; then + HAVE_QTHREADS_BUILD="True" + fi +else + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + BUILD_LIST=${ARR[2]} + if [[ "$BUILD_LIST" = *Qthreads* ]]; then + HAVE_QTHREADS_BUILD="True" + fi + done +fi + +# Ensure Qthreads path is set if Qthreads build is requested. +if [ "$HAVE_QTHREADS_BUILD" = "True" ]; then + if [ -z "$QTHREADS_PATH" ]; then + echo "Need to supply Qthreads path (--qthreads-path) when testing Qthreads backend." >&2 + exit 1 + else + # Strip trailing slashes from path. + QTHREADS_PATH=$(echo $QTHREADS_PATH | sed 's/\/*$//') + fi +fi + # -# Functions +# Functions. # # get_compiler_name <COMPILER> get_compiler_name() { - echo $1 | cut -d/ -f1 + echo $1 | cut -d/ -f1 } # get_compiler_version <COMPILER> get_compiler_version() { - echo $1 | cut -d/ -f2 + echo $1 | cut -d/ -f2 } -# Do not call directly +# Do not call directly. get_compiler_data() { - local compiler=$1 - local item=$2 - local compiler_name=$(get_compiler_name $compiler) - local compiler_vers=$(get_compiler_version $compiler) - - local compiler_data - for compiler_data in "${COMPILERS[@]}" ; do - local arr=($compiler_data) - if [ "$compiler" = "${arr[0]}" ]; then - echo "${arr[$item]}" | tr , ' ' | sed -e "s/<COMPILER_NAME>/$compiler_name/g" -e "s/<COMPILER_VERSION>/$compiler_vers/g" - return 0 - fi - done - - # Not found - echo "Unreconized compiler $compiler" >&2 - exit 1 + local compiler=$1 + local item=$2 + local compiler_name=$(get_compiler_name $compiler) + local compiler_vers=$(get_compiler_version $compiler) + + local compiler_data + for compiler_data in "${COMPILERS[@]}" ; do + local arr=($compiler_data) + + if [ "$compiler" = "${arr[0]}" ]; then + echo "${arr[$item]}" | tr , ' ' | sed -e "s/<COMPILER_NAME>/$compiler_name/g" -e "s/<COMPILER_VERSION>/$compiler_vers/g" + return 0 + fi + done + + # Not found. + echo "Unreconized compiler $compiler" >&2 + exit 1 } # @@ -435,227 +468,232 @@ get_compiler_data() { # get_compiler_modules() { - get_compiler_data $1 1 + get_compiler_data $1 1 } get_compiler_build_list() { - get_compiler_data $1 2 + get_compiler_data $1 2 } get_compiler_exe_name() { - get_compiler_data $1 3 + get_compiler_data $1 3 } get_compiler_warning_flags() { - get_compiler_data $1 4 + get_compiler_data $1 4 } run_cmd() { - echo "RUNNING: $*" - if [ "$DRYRUN" != "True" ]; then - eval "$* 2>&1" - fi + echo "RUNNING: $*" + if [ "$DRYRUN" != "True" ]; then + eval "$* 2>&1" + fi } # report_and_log_test_results <SUCCESS> <DESC> <COMMENT> report_and_log_test_result() { - # Use sane var names - local success=$1; local desc=$2; local comment=$3; + # Use sane var names. + local success=$1; local desc=$2; local comment=$3; - if [ "$success" = "0" ]; then - echo " PASSED $desc" - echo $comment > $PASSED_DIR/$desc - else - # For failures, comment should be the name of the phase that failed - echo " FAILED $desc" >&2 - echo $comment > $FAILED_DIR/$desc - cat ${desc}.${comment}.log - fi + if [ "$success" = "0" ]; then + echo " PASSED $desc" + echo $comment > $PASSED_DIR/$desc + else + # For failures, comment should be the name of the phase that failed. + echo " FAILED $desc" >&2 + echo $comment > $FAILED_DIR/$desc + cat ${desc}.${comment}.log + fi } setup_env() { - local compiler=$1 - local compiler_modules=$(get_compiler_modules $compiler) - - module purge - - local mod - for mod in $compiler_modules; do - echo "Loading module $mod" - module load $mod 2>&1 - # It is ridiculously hard to check for the success of a loaded - # module. Module does not return error codes and piping to grep - # causes module to run in a subshell. - module list 2>&1 | grep "$mod" >& /dev/null || return 1 - done - - return 0 + local compiler=$1 + local compiler_modules=$(get_compiler_modules $compiler) + + module purge + + local mod + for mod in $compiler_modules; do + echo "Loading module $mod" + module load $mod 2>&1 + # It is ridiculously hard to check for the success of a loaded + # module. Module does not return error codes and piping to grep + # causes module to run in a subshell. + module list 2>&1 | grep "$mod" >& /dev/null || return 1 + done + + return 0 } # single_build_and_test <COMPILER> <BUILD> <BUILD_TYPE> single_build_and_test() { - # Use sane var names - local compiler=$1; local build=$2; local build_type=$3; + # Use sane var names. + local compiler=$1; local build=$2; local build_type=$3; + + # Set up env. + mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type" + cd $ROOT_DIR/$compiler/"${build}-$build_type" + local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g') + setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } - # set up env - mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type" - cd $ROOT_DIR/$compiler/"${build}-$build_type" - local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g') - setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + # Set up flags. + local compiler_warning_flags=$(get_compiler_warning_flags $compiler) + local compiler_exe=$(get_compiler_exe_name $compiler) - # Set up flags - local compiler_warning_flags=$(get_compiler_warning_flags $compiler) - local compiler_exe=$(get_compiler_exe_name $compiler) + if [[ "$build_type" = hwloc* ]]; then + local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) + fi + if [[ "$build" = *Qthreads* ]]; then if [[ "$build_type" = hwloc* ]]; then - local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) + local extra_args="$extra_args --qthreads-path=${QTHREADS_PATH}_hwloc" + else + local extra_args="$extra_args --qthreads-path=$QTHREADS_PATH" fi + fi - if [[ "$OPT_FLAG" = "" ]]; then - OPT_FLAG="-O3" - fi + if [[ "$OPT_FLAG" = "" ]]; then + OPT_FLAG="-O3" + fi - if [[ "$build_type" = *debug* ]]; then - local extra_args="$extra_args --debug" - local cxxflags="-g $compiler_warning_flags" - else - local cxxflags="$OPT_FLAG $compiler_warning_flags" - fi + if [[ "$build_type" = *debug* ]]; then + local extra_args="$extra_args --debug" + local cxxflags="-g $compiler_warning_flags" + else + local cxxflags="$OPT_FLAG $compiler_warning_flags" + fi - if [[ "$compiler" == cuda* ]]; then - cxxflags="--keep --keep-dir=$(pwd) $cxxflags" - export TMPDIR=$(pwd) - fi + if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then + local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS" + fi - if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then - local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS" - fi + echo " Starting job $desc" - echo " Starting job $desc" + local comment="no_comment" - local comment="no_comment" + if [ "$TEST_SCRIPT" = "True" ]; then + local rand=$[ 1 + $[ RANDOM % 10 ]] + sleep $rand - if [ "$TEST_SCRIPT" = "True" ]; then - local rand=$[ 1 + $[ RANDOM % 10 ]] - sleep $rand - if [ $rand -gt 5 ]; then - run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } - fi - else - run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } - local -i build_start_time=$(date +%s) - run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } - local -i build_end_time=$(date +%s) - comment="build_time=$(($build_end_time-$build_start_time))" - if [[ "$BUILD_ONLY" == False ]]; then - run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } - local -i run_end_time=$(date +%s) - comment="$comment run_time=$(($run_end_time-$build_end_time))" - fi + if [ $rand -gt 5 ]; then + run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } fi + else + run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + local -i build_start_time=$(date +%s) + run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } + local -i build_end_time=$(date +%s) + comment="build_time=$(($build_end_time-$build_start_time))" + + if [[ "$BUILD_ONLY" == False ]]; then + run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } + local -i run_end_time=$(date +%s) + comment="$comment run_time=$(($run_end_time-$build_end_time))" + fi + fi - report_and_log_test_result 0 $desc "$comment" + report_and_log_test_result 0 $desc "$comment" - return 0 + return 0 } # wait_for_jobs <NUM-JOBS> wait_for_jobs() { - local -i max_jobs=$1 - local -i num_active_jobs=$(jobs | wc -l) - while [ $num_active_jobs -ge $max_jobs ] - do - sleep 1 - num_active_jobs=$(jobs | wc -l) - jobs >& /dev/null - done + local -i max_jobs=$1 + local -i num_active_jobs=$(jobs | wc -l) + while [ $num_active_jobs -ge $max_jobs ] + do + sleep 1 + num_active_jobs=$(jobs | wc -l) + jobs >& /dev/null + done } # run_in_background <COMPILER> <BUILD> <BUILD_TYPE> run_in_background() { - local compiler=$1 - - local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL - # don't override command line input - # if [[ "$BUILD_ONLY" == True ]]; then - # num_jobs=8 - # else - if [[ "$compiler" == cuda* ]]; then - num_jobs=1 - fi - # fi - wait_for_jobs $num_jobs - - single_build_and_test $* & + local compiler=$1 + + local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL + # Don't override command line input. + # if [[ "$BUILD_ONLY" == True ]]; then + # num_jobs=8 + # else + if [[ "$compiler" == cuda* ]]; then + num_jobs=1 + fi + # fi + wait_for_jobs $num_jobs + + single_build_and_test $* & } # build_and_test_all <COMPILER> build_and_test_all() { - # Get compiler data - local compiler=$1 - if [ -z "$CUSTOM_BUILD_LIST" ]; then - local compiler_build_list=$(get_compiler_build_list $compiler) - else - local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ') - fi + # Get compiler data. + local compiler=$1 + if [ -z "$CUSTOM_BUILD_LIST" ]; then + local compiler_build_list=$(get_compiler_build_list $compiler) + else + local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ') + fi - # do builds - local build - for build in $compiler_build_list - do - run_in_background $compiler $build $BUILD_TYPE + # Do builds. + local build + for build in $compiler_build_list + do + run_in_background $compiler $build $BUILD_TYPE - # If not cuda, do a hwloc test too - if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then - run_in_background $compiler $build "hwloc-$BUILD_TYPE" - fi - done + # If not cuda, do a hwloc test too. + if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then + run_in_background $compiler $build "hwloc-$BUILD_TYPE" + fi + done - return 0 + return 0 } get_test_root_dir() { - local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort) - local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l) - local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP} + local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort) + local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l) + local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP} - if [ $num_to_delete -gt 0 ]; then - /bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete) - fi + if [ $num_to_delete -gt 0 ]; then + /bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete) + fi - echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S") + echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S") } wait_summarize_and_exit() { - wait_for_jobs 1 - - echo "#######################################################" - echo "PASSED TESTS" - echo "#######################################################" - - local passed_test - for passed_test in $(\ls -1 $PASSED_DIR | sort) - do - echo $passed_test $(cat $PASSED_DIR/$passed_test) - done - - echo "#######################################################" - echo "FAILED TESTS" - echo "#######################################################" - - local failed_test - local -i rv=0 - for failed_test in $(\ls -1 $FAILED_DIR | sort) - do - echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" - rv=$rv+1 - done - - exit $rv + wait_for_jobs 1 + + echo "#######################################################" + echo "PASSED TESTS" + echo "#######################################################" + + local passed_test + for passed_test in $(\ls -1 $PASSED_DIR | sort) + do + echo $passed_test $(cat $PASSED_DIR/$passed_test) + done + + echo "#######################################################" + echo "FAILED TESTS" + echo "#######################################################" + + local failed_test + local -i rv=0 + for failed_test in $(\ls -1 $FAILED_DIR | sort) + do + echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" + rv=$rv+1 + done + + exit $rv } # -# Main +# Main. # ROOT_DIR=$(get_test_root_dir) @@ -669,8 +707,8 @@ mkdir -p $FAILED_DIR echo "Going to test compilers: " $COMPILERS_TO_TEST for COMPILER in $COMPILERS_TO_TEST; do - echo "Testing compiler $COMPILER" - build_and_test_all $COMPILER + echo "Testing compiler $COMPILER" + build_and_test_all $COMPILER done wait_summarize_and_exit diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp index 3277c007d0845485a57ed7aabfa35202f1b22d1b..53e0eab693afeca7bbe0c164666612dc5ccc36d9 100644 --- a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp @@ -60,7 +60,7 @@ class DynamicView : public Kokkos::ViewTraits< DataType , P ... > { public: - typedef ViewTraits< DataType , P ... > traits ; + typedef Kokkos::ViewTraits< DataType , P ... > traits ; private: @@ -123,30 +123,41 @@ public: enum { Rank = 1 }; - KOKKOS_INLINE_FUNCTION constexpr size_t size() const + KOKKOS_INLINE_FUNCTION + size_t size() const noexcept { - return - Kokkos::Impl::MemorySpaceAccess - < Kokkos::Impl::ActiveExecutionMemorySpace - , typename traits::memory_space - >::accessible - ? // Runtime size is at the end of the chunk pointer array - (*reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max )) - << m_chunk_shift - : 0 ; + uintptr_t n = 0 ; + + if ( Kokkos::Impl::MemorySpaceAccess + < Kokkos::Impl::ActiveExecutionMemorySpace + , typename traits::memory_space + >::accessible ) { + n = *reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max ); + } +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + else { + Kokkos::Impl::DeepCopy< Kokkos::HostSpace + , typename traits::memory_space + , Kokkos::HostSpace::execution_space > + ( & n + , reinterpret_cast<const uintptr_t*>( m_chunks + m_chunk_max ) + , sizeof(uintptr_t) ); + } +#endif + return n << m_chunk_shift ; } template< typename iType > - KOKKOS_INLINE_FUNCTION constexpr + KOKKOS_INLINE_FUNCTION size_t extent( const iType & r ) const { return r == 0 ? size() : 1 ; } template< typename iType > - KOKKOS_INLINE_FUNCTION constexpr + KOKKOS_INLINE_FUNCTION size_t extent_int( const iType & r ) const { return r == 0 ? size() : 1 ; } - KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return size(); } + KOKKOS_INLINE_FUNCTION size_t dimension_0() const { return size(); } KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return 1 ; } KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return 1 ; } KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return 1 ; } @@ -270,10 +281,18 @@ public: } /** \brief Resizing in serial can grow or shrink the array size, */ + template< typename IntType > inline - void resize_serial( size_t n ) + typename std::enable_if + < std::is_integral<IntType>::value && + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace + , typename traits::memory_space + >::accessible + >::type + resize_serial( IntType const & n ) { - DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); + typedef typename traits::value_type value_type ; + typedef value_type * pointer_type ; const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ; @@ -286,8 +305,8 @@ public: if ( *pc < NC ) { while ( *pc < NC ) { - m_chunks[*pc] = - m_pool.allocate( sizeof(traits::value_type) << m_chunk_shift ); + m_chunks[*pc] = reinterpret_cast<pointer_type> + ( m_pool.allocate( sizeof(value_type) << m_chunk_shift ) ); ++*pc ; } } @@ -295,12 +314,90 @@ public: while ( NC + 1 <= *pc ) { --*pc ; m_pool.deallocate( m_chunks[*pc] - , sizeof(traits::value_type) << m_chunk_shift ); + , sizeof(value_type) << m_chunk_shift ); m_chunks[*pc] = 0 ; } } } + //---------------------------------------- + + struct ResizeSerial { + memory_pool m_pool ; + typename traits::value_type ** m_chunks ; + uintptr_t * m_pc ; + uintptr_t m_nc ; + unsigned m_chunk_shift ; + + KOKKOS_INLINE_FUNCTION + void operator()( int ) const + { + typedef typename traits::value_type value_type ; + typedef value_type * pointer_type ; + + if ( *m_pc < m_nc ) { + while ( *m_pc < m_nc ) { + m_chunks[*m_pc] = reinterpret_cast<pointer_type> + ( m_pool.allocate( sizeof(value_type) << m_chunk_shift ) ); + ++*m_pc ; + } + } + else { + while ( m_nc + 1 <= *m_pc ) { + --*m_pc ; + m_pool.deallocate( m_chunks[*m_pc] + , sizeof(value_type) << m_chunk_shift ); + m_chunks[*m_pc] = 0 ; + } + } + } + + ResizeSerial( memory_pool const & arg_pool + , typename traits::value_type ** arg_chunks + , uintptr_t * arg_pc + , uintptr_t arg_nc + , unsigned arg_chunk_shift + ) + : m_pool( arg_pool ) + , m_chunks( arg_chunks ) + , m_pc( arg_pc ) + , m_nc( arg_nc ) + , m_chunk_shift( arg_chunk_shift ) + {} + }; + + template< typename IntType > + inline + typename std::enable_if + < std::is_integral<IntType>::value && + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace + , typename traits::memory_space + >::accessible + >::type + resize_serial( IntType const & n ) + { + const uintptr_t NC = ( n + m_chunk_mask ) >> m_chunk_shift ; + + if ( m_chunk_max < NC ) { + Kokkos::abort("DynamicView::resize_serial exceeded maximum size"); + } + + // Must dispatch kernel + + typedef Kokkos::RangePolicy< typename traits::execution_space > Range ; + + uintptr_t * const pc = + reinterpret_cast<uintptr_t*>( m_chunks + m_chunk_max ); + + Kokkos::Impl::ParallelFor<ResizeSerial,Range> + closure( ResizeSerial( m_pool, m_chunks, pc, NC, m_chunk_shift ) + , Range(0,1) ); + + closure.execute(); + + traits::execution_space::fence(); + } + //---------------------------------------------------------------------- ~DynamicView() = default ; @@ -311,15 +408,17 @@ public: DynamicView & operator = ( const DynamicView & ) = default ; template< class RT , class ... RP > - KOKKOS_INLINE_FUNCTION DynamicView( const DynamicView<RT,RP...> & rhs ) : m_pool( rhs.m_pool ) , m_track( rhs.m_track ) - , m_chunks( rhs.m_chunks ) + , m_chunks( (typename traits::value_type **) rhs.m_chunks ) , m_chunk_shift( rhs.m_chunk_shift ) , m_chunk_mask( rhs.m_chunk_mask ) , m_chunk_max( rhs.m_chunk_max ) { + typedef typename DynamicView<RT,RP...>::traits SrcTraits ; + typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynamicView copy construction" ); } //---------------------------------------------------------------------- @@ -400,8 +499,6 @@ public: , m_chunk_mask( ( 1 << m_chunk_shift ) - 1 ) , m_chunk_max( ( arg_size_max + m_chunk_mask ) >> m_chunk_shift ) { - DynamicView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); - // A functor to deallocate all of the chunks upon final destruction typedef typename traits::memory_space memory_space ; diff --git a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp index 8646d277921aff5c71b70c48d768ee39944b3455..193f1bc334dd76177e3823f6decee9dbd71b137e 100644 --- a/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp +++ b/lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp @@ -230,16 +230,17 @@ public: typedef typename Impl::remove_const<declared_value_type>::type value_type; typedef typename Impl::add_const<value_type>::type const_value_type; - typedef Device execution_space; + typedef Device device_type; + typedef typename Device::execution_space execution_space; typedef Hasher hasher_type; typedef EqualTo equal_to_type; typedef uint32_t size_type; //map_types - typedef UnorderedMap<declared_key_type,declared_value_type,execution_space,hasher_type,equal_to_type> declared_map_type; - typedef UnorderedMap<key_type,value_type,execution_space,hasher_type,equal_to_type> insertable_map_type; - typedef UnorderedMap<const_key_type,value_type,execution_space,hasher_type,equal_to_type> modifiable_map_type; - typedef UnorderedMap<const_key_type,const_value_type,execution_space,hasher_type,equal_to_type> const_map_type; + typedef UnorderedMap<declared_key_type,declared_value_type,device_type,hasher_type,equal_to_type> declared_map_type; + typedef UnorderedMap<key_type,value_type,device_type,hasher_type,equal_to_type> insertable_map_type; + typedef UnorderedMap<const_key_type,value_type,device_type,hasher_type,equal_to_type> modifiable_map_type; + typedef UnorderedMap<const_key_type,const_value_type,device_type,hasher_type,equal_to_type> const_map_type; static const bool is_set = std::is_same<void,value_type>::value; static const bool has_const_key = std::is_same<const_key_type,declared_key_type>::value; @@ -264,18 +265,18 @@ private: typedef typename Impl::if_c< is_set, int, declared_value_type>::type impl_value_type; typedef typename Impl::if_c< is_insertable_map - , View< key_type *, execution_space> - , View< const key_type *, execution_space, MemoryTraits<RandomAccess> > + , View< key_type *, device_type> + , View< const key_type *, device_type, MemoryTraits<RandomAccess> > >::type key_type_view; typedef typename Impl::if_c< is_insertable_map || is_modifiable_map - , View< impl_value_type *, execution_space> - , View< const impl_value_type *, execution_space, MemoryTraits<RandomAccess> > + , View< impl_value_type *, device_type> + , View< const impl_value_type *, device_type, MemoryTraits<RandomAccess> > >::type value_type_view; typedef typename Impl::if_c< is_insertable_map - , View< size_type *, execution_space> - , View< const size_type *, execution_space, MemoryTraits<RandomAccess> > + , View< size_type *, device_type> + , View< const size_type *, device_type, MemoryTraits<RandomAccess> > >::type size_type_view; typedef typename Impl::if_c< is_insertable_map @@ -285,7 +286,7 @@ private: enum { modified_idx = 0, erasable_idx = 1, failed_insert_idx = 2 }; enum { num_scalars = 3 }; - typedef View< int[num_scalars], LayoutLeft, execution_space> scalars_view; + typedef View< int[num_scalars], LayoutLeft, device_type> scalars_view; public: //! \name Public member functions @@ -757,7 +758,7 @@ public: Kokkos::deep_copy(tmp.m_available_indexes, src.m_available_indexes); - typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, typename SDevice::memory_space > raw_deep_copy; + typedef Kokkos::Impl::DeepCopy< typename device_type::memory_space, typename SDevice::memory_space > raw_deep_copy; raw_deep_copy(tmp.m_hash_lists.ptr_on_device(), src.m_hash_lists.ptr_on_device(), sizeof(size_type)*src.m_hash_lists.dimension_0()); raw_deep_copy(tmp.m_next_index.ptr_on_device(), src.m_next_index.ptr_on_device(), sizeof(size_type)*src.m_next_index.dimension_0()); @@ -781,21 +782,21 @@ private: // private member functions void set_flag(int flag) const { - typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; + typedef Kokkos::Impl::DeepCopy< typename device_type::memory_space, Kokkos::HostSpace > raw_deep_copy; const int true_ = true; raw_deep_copy(m_scalars.ptr_on_device() + flag, &true_, sizeof(int)); } void reset_flag(int flag) const { - typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; + typedef Kokkos::Impl::DeepCopy< typename device_type::memory_space, Kokkos::HostSpace > raw_deep_copy; const int false_ = false; raw_deep_copy(m_scalars.ptr_on_device() + flag, &false_, sizeof(int)); } bool get_flag(int flag) const { - typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename execution_space::memory_space > raw_deep_copy; + typedef Kokkos::Impl::DeepCopy< Kokkos::HostSpace, typename device_type::memory_space > raw_deep_copy; int result = false; raw_deep_copy(&result, m_scalars.ptr_on_device() + flag, sizeof(int)); return result; diff --git a/lib/kokkos/containers/unit_tests/CMakeLists.txt b/lib/kokkos/containers/unit_tests/CMakeLists.txt index b9d860f32fd854a59e0258adabdc540a1ef0c512..0c59c616d620598b835525eb70410d0a26f6af6b 100644 --- a/lib/kokkos/containers/unit_tests/CMakeLists.txt +++ b/lib/kokkos/containers/unit_tests/CMakeLists.txt @@ -3,38 +3,49 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) -SET(SOURCES - UnitTestMain.cpp - TestCuda.cpp - ) - SET(LIBRARIES kokkoscore) IF(Kokkos_ENABLE_Pthread) - LIST( APPEND SOURCES - TestThreads.cpp +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Threads + SOURCES TestThreads.cpp UnitTestMain.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ) ENDIF() IF(Kokkos_ENABLE_Serial) - LIST( APPEND SOURCES - TestSerial.cpp +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_Serial + SOURCES TestSerial.cpp UnitTestMain.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ) ENDIF() IF(Kokkos_ENABLE_OpenMP) - LIST( APPEND SOURCES - TestOpenMP.cpp +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest_OpenMP + SOURCES TestOpenMP.cpp UnitTestMain.cpp + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest ) ENDIF() - +IF(Kokkos_ENABLE_Cuda) TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest - SOURCES ${SOURCES} + UnitTest_Cuda + SOURCES TestCuda.cpp UnitTestMain.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " TESTONLYLIBS kokkos_gtest ) - +ENDIF() + diff --git a/lib/kokkos/containers/unit_tests/TestDynamicView.hpp b/lib/kokkos/containers/unit_tests/TestDynamicView.hpp index 7e3ca005f4b6401a088208fca120c097143afc49..beb07bd791cf162c31706b1eeaf31a4c25c91ba5 100644 --- a/lib/kokkos/containers/unit_tests/TestDynamicView.hpp +++ b/lib/kokkos/containers/unit_tests/TestDynamicView.hpp @@ -64,6 +64,7 @@ struct TestDynamicView typedef Kokkos::Experimental::MemoryPool<typename Space::device_type> memory_pool_type; typedef Kokkos::Experimental::DynamicView<Scalar*,Space> view_type; + typedef typename view_type::const_type const_view_type ; typedef typename Kokkos::TeamPolicy<execution_space>::member_type member_type ; typedef double value_type; @@ -136,6 +137,8 @@ struct TestDynamicView view_type da("A",pool,arg_total_size); + const_view_type ca(da); + // printf("TestDynamicView::run(%d) construct test functor\n",arg_total_size); TestDynamicView functor(da,arg_total_size); diff --git a/lib/kokkos/core/cmake/Dependencies.cmake b/lib/kokkos/core/cmake/Dependencies.cmake index ae9a20c50efeadec69ab22e3365cd3ec26a5e451..8d9872725e59655f256a9e62bf3f706a79e80e59 100644 --- a/lib/kokkos/core/cmake/Dependencies.cmake +++ b/lib/kokkos/core/cmake/Dependencies.cmake @@ -1,6 +1,6 @@ TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( - LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREAD DLlib + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC QTHREADS DLlib TEST_OPTIONAL_TPLS CUSPARSE ) -TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib) \ No newline at end of file +TRIBITS_TPL_TENTATIVELY_ENABLE(DLlib) diff --git a/lib/kokkos/core/cmake/KokkosCore_config.h.in b/lib/kokkos/core/cmake/KokkosCore_config.h.in index 9359b5a32b71f06230ea8a2e878e0f457f8eee85..a71e60f20742edd8417365bb99c45f172dc5b218 100644 --- a/lib/kokkos/core/cmake/KokkosCore_config.h.in +++ b/lib/kokkos/core/cmake/KokkosCore_config.h.in @@ -30,7 +30,7 @@ #cmakedefine KOKKOS_HAVE_PTHREAD #cmakedefine KOKKOS_HAVE_SERIAL -#cmakedefine KOKKOS_HAVE_QTHREAD +#cmakedefine KOKKOS_HAVE_QTHREADS #cmakedefine KOKKOS_HAVE_Winthread #cmakedefine KOKKOS_HAVE_OPENMP #cmakedefine KOKKOS_HAVE_HWLOC diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile index 85f869971a33c349769bd318af28759f3e3eca12..3a0ad2d4c16a4e16d73e91eec131ee092bf9f47e 100644 --- a/lib/kokkos/core/perf_test/Makefile +++ b/lib/kokkos/core/perf_test/Makefile @@ -60,4 +60,3 @@ clean: kokkos-clean gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc - diff --git a/lib/kokkos/core/perf_test/PerfTestCuda.cpp b/lib/kokkos/core/perf_test/PerfTestCuda.cpp index 7386ecef2032f32da8d4e672999e09021b5a673c..65ce61fb53b9e5d8025f1f6f59e8ecf194ec45f0 100644 --- a/lib/kokkos/core/perf_test/PerfTestCuda.cpp +++ b/lib/kokkos/core/perf_test/PerfTestCuda.cpp @@ -52,6 +52,8 @@ #include <impl/Kokkos_Timer.hpp> +#include <PerfTestMDRange.hpp> + #include <PerfTestHexGrad.hpp> #include <PerfTestBlasKernels.hpp> #include <PerfTestGramSchmidt.hpp> @@ -72,6 +74,14 @@ class cuda : public ::testing::Test { } }; +//TEST_F( cuda, mdrange_lr ) { +// EXPECT_NO_THROW( (run_test_mdrange<Kokkos::Cuda , Kokkos::LayoutRight>( 5, 8, "Kokkos::Cuda" )) ); +//} + +//TEST_F( cuda, mdrange_ll ) { +// EXPECT_NO_THROW( (run_test_mdrange<Kokkos::Cuda , Kokkos::LayoutLeft>( 5, 8, "Kokkos::Cuda" )) ); +//} + TEST_F( cuda, hexgrad ) { EXPECT_NO_THROW( run_test_hexgrad< Kokkos::Cuda >( 10 , 20, "Kokkos::Cuda" ) ); diff --git a/lib/kokkos/core/perf_test/PerfTestDriver.hpp b/lib/kokkos/core/perf_test/PerfTestDriver.hpp index 7b6cfc5b5ce96399dcff47e1976b630088650af2..4732c3275a7f92cf1b1fc8f4d457c059ceb0679e 100644 --- a/lib/kokkos/core/perf_test/PerfTestDriver.hpp +++ b/lib/kokkos/core/perf_test/PerfTestDriver.hpp @@ -60,6 +60,342 @@ namespace Test { enum { NUMBER_OF_TRIALS = 5 }; +template< class DeviceType , class LayoutType > +void run_test_mdrange( int exp_beg , int exp_end, const char deviceTypeName[], int range_offset = 0, int tile_offset = 0 ) +// exp_beg = 6 => 2^6 = 64 is starting range length +{ +#define MDRANGE_PERFORMANCE_OUTPUT_VERBOSE 0 + + std::string label_mdrange ; + label_mdrange.append( "\"MDRange< double , " ); + label_mdrange.append( deviceTypeName ); + label_mdrange.append( " >\"" ); + + std::string label_range_col2 ; + label_range_col2.append( "\"RangeColTwo< double , " ); + label_range_col2.append( deviceTypeName ); + label_range_col2.append( " >\"" ); + + std::string label_range_col_all ; + label_range_col_all.append( "\"RangeColAll< double , " ); + label_range_col_all.append( deviceTypeName ); + label_range_col_all.append( " >\"" ); + + if ( std::is_same<LayoutType, Kokkos::LayoutRight>::value) { + std::cout << "--------------------------------------------------------------\n" + << "Performance tests for MDRange Layout Right" + << "\n--------------------------------------------------------------" << std::endl; + } else { + std::cout << "--------------------------------------------------------------\n" + << "Performance tests for MDRange Layout Left" + << "\n--------------------------------------------------------------" << std::endl; + } + + + for (int i = exp_beg ; i < exp_end ; ++i) { + const int range_length = (1<<i) + range_offset; + + std::cout << "\n--------------------------------------------------------------\n" + << "--------------------------------------------------------------\n" + << "MDRange Test: range bounds: " << range_length << " , " << range_length << " , " << range_length + << "\n--------------------------------------------------------------\n" + << "--------------------------------------------------------------\n"; +// << std::endl; + + int t0_min = 0, t1_min = 0, t2_min = 0; + double seconds_min = 0.0; + + // Test 1: The MDRange in full + { + int t0 = 1, t1 = 1, t2 = 1; + int counter = 1; +#if !defined(KOKKOS_HAVE_CUDA) + int min_bnd = 8; + int tfast = range_length; +#else + int min_bnd = 2; + int tfast = 32; +#endif + while ( tfast >= min_bnd ) { + int tmid = min_bnd; + while ( tmid < tfast ) { + t0 = min_bnd; + t1 = tmid; + t2 = tfast; + int t2_rev = min_bnd; + int t1_rev = tmid; + int t0_rev = tfast; + +#if defined(KOKKOS_HAVE_CUDA) + //Note: Product of tile sizes must be < 1024 for Cuda + if ( t0*t1*t2 >= 1024 ) { + printf(" Exceeded Cuda tile limits; onto next range set\n\n"); + break; + } +#endif + + // Run 1 with tiles LayoutRight style + double seconds_1 = 0; + { seconds_1 = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, t0, t1, t2) ; } + +#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE + std::cout << label_mdrange + << " , " << t0 << " , " << t1 << " , " << t2 + << " , " << seconds_1 + << std::endl ; +#endif + + if ( counter == 1 ) { + seconds_min = seconds_1; + t0_min = t0; + t1_min = t1; + t2_min = t2; + } + else { + if ( seconds_1 < seconds_min ) + { + seconds_min = seconds_1; + t0_min = t0; + t1_min = t1; + t2_min = t2; + } + } + + // Run 2 with tiles LayoutLeft style - reverse order of tile dims + double seconds_1rev = 0; + { seconds_1rev = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, t0_rev, t1_rev, t2_rev) ; } + +#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE + std::cout << label_mdrange + << " , " << t0_rev << " , " << t1_rev << " , " << t2_rev + << " , " << seconds_1rev + << std::endl ; +#endif + + if ( seconds_1rev < seconds_min ) + { + seconds_min = seconds_1rev; + t0_min = t0_rev; + t1_min = t1_rev; + t2_min = t2_rev; + } + + ++counter; + tmid <<= 1; + } //end inner while + tfast >>=1; + } //end outer while + + std::cout << "\n" + << "--------------------------------------------------------------\n" + << label_mdrange + << "\n Min values " + << "\n Range length per dim (3D): " << range_length + << "\n TileDims: " << t0_min << " , " << t1_min << " , " << t2_min + << "\n Min time: " << seconds_min + << "\n---------------------------------------------------------------" + << std::endl ; + } //end scope + +#if !defined(KOKKOS_HAVE_CUDA) + double seconds_min_c = 0.0; + int t0c_min = 0, t1c_min = 0, t2c_min = 0; + int counter = 1; + { + int min_bnd = 8; + // Test 1_c: MDRange with 0 for 'inner' tile dim; this case will utilize the full span in that direction, should be similar to Collapse<2> + if ( std::is_same<LayoutType, Kokkos::LayoutRight>::value ) { + for ( unsigned int T0 = min_bnd; T0 < static_cast<unsigned int>(range_length); T0<<=1 ) { + for ( unsigned int T1 = min_bnd; T1 < static_cast<unsigned int>(range_length); T1<<=1 ) { + double seconds_c = 0; + { seconds_c = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, T0, T1, 0) ; } + +#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE + std::cout << " MDRange LR with '0' tile - collapse-like \n" + << label_mdrange + << " , " << T0 << " , " << T1 << " , " << range_length + << " , " << seconds_c + << std::endl ; +#endif + + t2c_min = range_length; + if ( counter == 1 ) { + seconds_min_c = seconds_c; + t0c_min = T0; + t1c_min = T1; + } + else { + if ( seconds_c < seconds_min_c ) + { + seconds_min_c = seconds_c; + t0c_min = T0; + t1c_min = T1; + } + } + ++counter; + } + } + } + else { + for ( unsigned int T1 = min_bnd; T1 <= static_cast<unsigned int>(range_length); T1<<=1 ) { + for ( unsigned int T2 = min_bnd; T2 <= static_cast<unsigned int>(range_length); T2<<=1 ) { + double seconds_c = 0; + { seconds_c = MultiDimRangePerf3D< DeviceType , double , LayoutType >::test_multi_index(range_length,range_length,range_length, 0, T1, T2) ; } + +#if MDRANGE_PERFORMANCE_OUTPUT_VERBOSE + std::cout << " MDRange LL with '0' tile - collapse-like \n" + << label_mdrange + << " , " <<range_length << " < " << T1 << " , " << T2 + << " , " << seconds_c + << std::endl ; +#endif + + + t0c_min = range_length; + if ( counter == 1 ) { + seconds_min_c = seconds_c; + t1c_min = T1; + t2c_min = T2; + } + else { + if ( seconds_c < seconds_min_c ) + { + seconds_min_c = seconds_c; + t1c_min = T1; + t2c_min = T2; + } + } + ++counter; + } + } + } + + std::cout +// << "--------------------------------------------------------------\n" + << label_mdrange + << " Collapse<2> style: " + << "\n Min values " + << "\n Range length per dim (3D): " << range_length + << "\n TileDims: " << t0c_min << " , " << t1c_min << " , " << t2c_min + << "\n Min time: " << seconds_min_c + << "\n---------------------------------------------------------------" + << std::endl ; + } //end scope test 2 +#endif + + + // Test 2: RangePolicy Collapse2 style + double seconds_2 = 0; + { seconds_2 = RangePolicyCollapseTwo< DeviceType , double , LayoutType >::test_index_collapse_two(range_length,range_length,range_length) ; } + std::cout << label_range_col2 + << " , " << range_length + << " , " << seconds_2 + << std::endl ; + + + // Test 3: RangePolicy Collapse all style - not necessary, always slow + /* + double seconds_3 = 0; + { seconds_3 = RangePolicyCollapseAll< DeviceType , double , LayoutType >::test_collapse_all(range_length,range_length,range_length) ; } + std::cout << label_range_col_all + << " , " << range_length + << " , " << seconds_3 + << "\n---------------------------------------------------------------" + << std::endl ; + */ + + // Compare fastest times... will never be collapse all so ignore it + // seconds_min = tiled MDRange + // seconds_min_c = collapse<2>-like MDRange (tiledim = span for fast dim) - only for non-Cuda, else tile too long + // seconds_2 = collapse<2>-style RangePolicy + // seconds_3 = collapse<3>-style RangePolicy + +#if !defined(KOKKOS_HAVE_CUDA) + if ( seconds_min < seconds_min_c ) { + if ( seconds_min < seconds_2 ) { + std::cout << "--------------------------------------------------------------\n" + << " Fastest run: MDRange tiled\n" + << " Time: " << seconds_min + << " Difference: " << seconds_2 - seconds_min + << " Other times: \n" + << " MDrange collapse-like (tiledim = span on fast dim) type: " << seconds_min_c << "\n" + << " Collapse2 Range Policy: " << seconds_2 << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } + else if ( seconds_min > seconds_2 ) { + std::cout << " Fastest run: Collapse2 RangePolicy\n" + << " Time: " << seconds_2 + << " Difference: " << seconds_min - seconds_2 + << " Other times: \n" + << " MDrange Tiled: " << seconds_min << "\n" + << " MDrange collapse-like (tiledim = span on fast dim) type: " << seconds_min_c << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } + } + else if ( seconds_min > seconds_min_c ) { + if ( seconds_min_c < seconds_2 ) { + std::cout << "--------------------------------------------------------------\n" + << " Fastest run: MDRange collapse-like (tiledim = span on fast dim) type\n" + << " Time: " << seconds_min_c + << " Difference: " << seconds_2 - seconds_min_c + << " Other times: \n" + << " MDrange Tiled: " << seconds_min << "\n" + << " Collapse2 Range Policy: " << seconds_2 << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } + else if ( seconds_min_c > seconds_2 ) { + std::cout << " Fastest run: Collapse2 RangePolicy\n" + << " Time: " << seconds_2 + << " Difference: " << seconds_min_c - seconds_2 + << " Other times: \n" + << " MDrange Tiled: " << seconds_min << "\n" + << " MDrange collapse-like (tiledim = span on fast dim) type: " << seconds_min_c << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } + } // end else if +#else + if ( seconds_min < seconds_2 ) { + std::cout << "--------------------------------------------------------------\n" + << " Fastest run: MDRange tiled\n" + << " Time: " << seconds_min + << " Difference: " << seconds_2 - seconds_min + << " Other times: \n" + << " Collapse2 Range Policy: " << seconds_2 << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } + else if ( seconds_min > seconds_2 ) { + std::cout << " Fastest run: Collapse2 RangePolicy\n" + << " Time: " << seconds_2 + << " Difference: " << seconds_min - seconds_2 + << " Other times: \n" + << " MDrange Tiled: " << seconds_min << "\n" + << "\n--------------------------------------------------------------" + << "\n--------------------------------------------------------------" + //<< "\n\n" + << std::endl; + } +#endif + + } //end for + +#undef MDRANGE_PERFORMANCE_OUTPUT_VERBOSE + +} template< class DeviceType > diff --git a/lib/kokkos/core/perf_test/PerfTestHost.cpp b/lib/kokkos/core/perf_test/PerfTestHost.cpp index 606177ca50effc8a6cf88ced253ce2e1ea9930a2..831d581109984319a4c8a61674a42a297ace443a 100644 --- a/lib/kokkos/core/perf_test/PerfTestHost.cpp +++ b/lib/kokkos/core/perf_test/PerfTestHost.cpp @@ -66,6 +66,8 @@ const char TestHostDeviceName[] = "Kokkos::Serial" ; #include <impl/Kokkos_Timer.hpp> +#include <PerfTestMDRange.hpp> + #include <PerfTestHexGrad.hpp> #include <PerfTestBlasKernels.hpp> #include <PerfTestGramSchmidt.hpp> @@ -102,6 +104,14 @@ protected: } }; +//TEST_F( host, mdrange_lr ) { +// EXPECT_NO_THROW( (run_test_mdrange<TestHostDevice , Kokkos::LayoutRight> (5, 8, TestHostDeviceName) ) ); +//} + +//TEST_F( host, mdrange_ll ) { +// EXPECT_NO_THROW( (run_test_mdrange<TestHostDevice , Kokkos::LayoutLeft> (5, 8, TestHostDeviceName) ) ); +//} + TEST_F( host, hexgrad ) { EXPECT_NO_THROW(run_test_hexgrad< TestHostDevice>( 10, 20, TestHostDeviceName )); } diff --git a/lib/kokkos/core/perf_test/PerfTestMDRange.hpp b/lib/kokkos/core/perf_test/PerfTestMDRange.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d910b513c67f94eec4c1254fd4528ec4d74c62a5 --- /dev/null +++ b/lib/kokkos/core/perf_test/PerfTestMDRange.hpp @@ -0,0 +1,564 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +namespace Test { +template< class DeviceType + , typename ScalarType = double + , typename TestLayout = Kokkos::LayoutRight + > +struct MultiDimRangePerf3D +{ + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + + using iterate_type = Kokkos::Experimental::Iterate; + + typedef Kokkos::View<ScalarType***, TestLayout, DeviceType> view_type; + typedef typename view_type::HostMirror host_view_type; + + view_type A; + view_type B; + const long irange; + const long jrange; + const long krange; + + MultiDimRangePerf3D(const view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_) + : A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long i, const long j, const long k) const + { + A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k) + + B(i,j+2,k) + B(i,j+1,k) + + B(i,j,k+2) + B(i,j,k+1) + + B(i,j,k) ); + } + + + struct InitZeroTag {}; +// struct InitViewTag {}; + + struct Init + { + + Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_) + : input(input_), irange(irange_), jrange(jrange_), krange(krange_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long i, const long j, const long k) const + { + input(i,j,k) = 1.0; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const InitZeroTag&, const long i, const long j, const long k) const + { + input(i,j,k) = 0; + } + + view_type input; + const long irange; + const long jrange; + const long krange; + }; + + + static double test_multi_index(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const unsigned int Ti = 1, const unsigned int Tj = 1, const unsigned int Tk = 1, const long iter = 1) + { + //This test performs multidim range over all dims + view_type Atest("Atest", icount, jcount, kcount); + view_type Btest("Btest", icount+2, jcount+2, kcount+2); + typedef MultiDimRangePerf3D<execution_space,ScalarType,TestLayout> FunctorType; + + double dt_min = 0; + + // LayoutRight + if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value ) { + Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > policy_initA({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}}); + Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > policy_initB({{0,0,0}},{{icount+2,jcount+2,kcount+2}},{{Ti,Tj,Tk}}); + + typedef typename Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > MDRangeType; + using tile_type = typename MDRangeType::tile_type; + using point_type = typename MDRangeType::point_type; + + Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Right, iterate_type::Right>, execution_space > policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}} ); + + Kokkos::Experimental::md_parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) ); + execution_space::fence(); + Kokkos::Experimental::md_parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) ); + execution_space::fence(); + + for (int i = 0; i < iter; ++i) + { + Kokkos::Timer timer; + Kokkos::Experimental::md_parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) ); + execution_space::fence(); + const double dt = timer.seconds(); + if ( 0 == i ) dt_min = dt ; + else dt_min = dt < dt_min ? dt : dt_min ; + + //Correctness check - only the first run + if ( 0 == i ) + { + long numErrors = 0; + host_view_type Ahost("Ahost", icount, jcount, kcount); + Kokkos::deep_copy(Ahost, Atest); + host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2); + Kokkos::deep_copy(Bhost, Btest); + + // On KNL, this may vectorize - add print statement to prevent + // Also, compare against epsilon, as vectorization can change bitwise answer + for ( long l = 0; l < static_cast<long>(icount); ++l ) { + for ( long j = 0; j < static_cast<long>(jcount); ++j ) { + for ( long k = 0; k < static_cast<long>(kcount); ++k ) { + ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k) + + Bhost(l,j+2,k) + Bhost(l,j+1,k) + + Bhost(l,j,k+2) + Bhost(l,j,k+1) + + Bhost(l,j,k) ); + if ( Ahost(l,j,k) - check != 0 ) { + ++numErrors; + std::cout << " Correctness error at index: " << l << ","<<j<<","<<k<<"\n" + << " multi Ahost = " << Ahost(l,j,k) << " expected = " << check + << " multi Bhost(ijk) = " << Bhost(l,j,k) + << " multi Bhost(l+1jk) = " << Bhost(l+1,j,k) + << " multi Bhost(l+2jk) = " << Bhost(l+2,j,k) + << " multi Bhost(ij+1k) = " << Bhost(l,j+1,k) + << " multi Bhost(ij+2k) = " << Bhost(l,j+2,k) + << " multi Bhost(ijk+1) = " << Bhost(l,j,k+1) + << " multi Bhost(ijk+2) = " << Bhost(l,j,k+2) + << std::endl; + //exit(-1); + } + } } } + if ( numErrors != 0 ) { std::cout << "LR multi: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; } + //else { std::cout << " multi: No errors!" << std::endl; } + } + } //end for + + } + // LayoutLeft + else { + Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3,iterate_type::Left,iterate_type::Left>, execution_space > policy_initA({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}}); + Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3,iterate_type::Left,iterate_type::Left>, execution_space > policy_initB({{0,0,0}},{{icount+2,jcount+2,kcount+2}},{{Ti,Tj,Tk}}); + + //typedef typename Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Left, iterate_type::Left>, execution_space > MDRangeType; + //using tile_type = typename MDRangeType::tile_type; + //using point_type = typename MDRangeType::point_type; + //Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Left, iterate_type::Left>, execution_space > policy(point_type{{0,0,0}},point_type{{icount,jcount,kcount}},tile_type{{Ti,Tj,Tk}} ); + Kokkos::Experimental::MDRangePolicy<Kokkos::Experimental::Rank<3, iterate_type::Left, iterate_type::Left>, execution_space > policy({{0,0,0}},{{icount,jcount,kcount}},{{Ti,Tj,Tk}} ); + + Kokkos::Experimental::md_parallel_for( policy_initA, Init(Atest, icount, jcount, kcount) ); + execution_space::fence(); + Kokkos::Experimental::md_parallel_for( policy_initB, Init(Btest, icount+2, jcount+2, kcount+2) ); + execution_space::fence(); + + for (int i = 0; i < iter; ++i) + { + Kokkos::Timer timer; + Kokkos::Experimental::md_parallel_for( policy, FunctorType(Atest, Btest, icount, jcount, kcount) ); + execution_space::fence(); + const double dt = timer.seconds(); + if ( 0 == i ) dt_min = dt ; + else dt_min = dt < dt_min ? dt : dt_min ; + + //Correctness check - only the first run + if ( 0 == i ) + { + long numErrors = 0; + host_view_type Ahost("Ahost", icount, jcount, kcount); + Kokkos::deep_copy(Ahost, Atest); + host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2); + Kokkos::deep_copy(Bhost, Btest); + + // On KNL, this may vectorize - add print statement to prevent + // Also, compare against epsilon, as vectorization can change bitwise answer + for ( long l = 0; l < static_cast<long>(icount); ++l ) { + for ( long j = 0; j < static_cast<long>(jcount); ++j ) { + for ( long k = 0; k < static_cast<long>(kcount); ++k ) { + ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k) + + Bhost(l,j+2,k) + Bhost(l,j+1,k) + + Bhost(l,j,k+2) + Bhost(l,j,k+1) + + Bhost(l,j,k) ); + if ( Ahost(l,j,k) - check != 0 ) { + ++numErrors; + std::cout << " Correctness error at index: " << l << ","<<j<<","<<k<<"\n" + << " multi Ahost = " << Ahost(l,j,k) << " expected = " << check + << " multi Bhost(ijk) = " << Bhost(l,j,k) + << " multi Bhost(l+1jk) = " << Bhost(l+1,j,k) + << " multi Bhost(l+2jk) = " << Bhost(l+2,j,k) + << " multi Bhost(ij+1k) = " << Bhost(l,j+1,k) + << " multi Bhost(ij+2k) = " << Bhost(l,j+2,k) + << " multi Bhost(ijk+1) = " << Bhost(l,j,k+1) + << " multi Bhost(ijk+2) = " << Bhost(l,j,k+2) + << std::endl; + //exit(-1); + } + } } } + if ( numErrors != 0 ) { std::cout << " LL multi run: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; } + //else { std::cout << " multi: No errors!" << std::endl; } + + } + } //end for + } + + return dt_min; + } + +}; + + +template< class DeviceType + , typename ScalarType = double + , typename TestLayout = Kokkos::LayoutRight + > +struct RangePolicyCollapseTwo +{ + // RangePolicy for 3D range, but will collapse only 2 dims => like Rank<2> for multi-dim; unroll 2 dims in one-dim + + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + typedef TestLayout layout; + + using iterate_type = Kokkos::Experimental::Iterate; + + typedef Kokkos::View<ScalarType***, TestLayout, DeviceType> view_type; + typedef typename view_type::HostMirror host_view_type; + + view_type A; + view_type B; + const long irange; + const long jrange; + const long krange; + + RangePolicyCollapseTwo(view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_) + : A(A_), B(B_) , irange(irange_), jrange(jrange_), krange(krange_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long r) const + { + if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value ) + { +//id(i,j,k) = k + j*Nk + i*Nk*Nj = k + Nk*(j + i*Nj) = k + Nk*r +//r = j + i*Nj + long i = int(r / jrange); + long j = int( r - i*jrange); + for (int k = 0; k < krange; ++k) { + A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k) + + B(i,j+2,k) + B(i,j+1,k) + + B(i,j,k+2) + B(i,j,k+1) + + B(i,j,k) ); + } + } + else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value ) + { +//id(i,j,k) = i + j*Ni + k*Ni*Nj = i + Ni*(j + k*Nj) = i + Ni*r +//r = j + k*Nj + long k = int(r / jrange); + long j = int( r - k*jrange); + for (int i = 0; i < irange; ++i) { + A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k) + + B(i,j+2,k) + B(i,j+1,k) + + B(i,j,k+2) + B(i,j,k+1) + + B(i,j,k) ); + } + } + } + + + struct Init + { + view_type input; + const long irange; + const long jrange; + const long krange; + + Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_) + : input(input_), irange(irange_), jrange(jrange_), krange(krange_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long r) const + { + if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value ) + { + long i = int(r / jrange); + long j = int( r - i*jrange); + for (int k = 0; k < krange; ++k) { + input(i,j,k) = 1; + } + } + else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value ) + { + long k = int(r / jrange); + long j = int( r - k*jrange); + for (int i = 0; i < irange; ++i) { + input(i,j,k) = 1; + } + } + } + }; + + + static double test_index_collapse_two(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const long iter = 1) + { + // This test refers to collapsing two dims while using the RangePolicy + view_type Atest("Atest", icount, jcount, kcount); + view_type Btest("Btest", icount+2, jcount+2, kcount+2); + typedef RangePolicyCollapseTwo<execution_space,ScalarType,TestLayout> FunctorType; + + long collapse_index_rangeA = 0; + long collapse_index_rangeB = 0; + if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value ) { + collapse_index_rangeA = icount*jcount; + collapse_index_rangeB = (icount+2)*(jcount+2); +// std::cout << " LayoutRight " << std::endl; + } else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value ) { + collapse_index_rangeA = kcount*jcount; + collapse_index_rangeB = (kcount+2)*(jcount+2); +// std::cout << " LayoutLeft " << std::endl; + } else { + std::cout << " LayoutRight or LayoutLeft required - will pass 0 as range instead " << std::endl; + exit(-1); + } + + Kokkos::RangePolicy<execution_space> policy(0, (collapse_index_rangeA) ); + Kokkos::RangePolicy<execution_space> policy_initB(0, (collapse_index_rangeB) ); + + double dt_min = 0; + + Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) ); + execution_space::fence(); + Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) ); + execution_space::fence(); + + for (int i = 0; i < iter; ++i) + { + Kokkos::Timer timer; + Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount)); + execution_space::fence(); + const double dt = timer.seconds(); + if ( 0 == i ) dt_min = dt ; + else dt_min = dt < dt_min ? dt : dt_min ; + + //Correctness check - first iteration only + if ( 0 == i ) + { + long numErrors = 0; + host_view_type Ahost("Ahost", icount, jcount, kcount); + Kokkos::deep_copy(Ahost, Atest); + host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2); + Kokkos::deep_copy(Bhost, Btest); + + // On KNL, this may vectorize - add print statement to prevent + // Also, compare against epsilon, as vectorization can change bitwise answer + for ( long l = 0; l < static_cast<long>(icount); ++l ) { + for ( long j = 0; j < static_cast<long>(jcount); ++j ) { + for ( long k = 0; k < static_cast<long>(kcount); ++k ) { + ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k) + + Bhost(l,j+2,k) + Bhost(l,j+1,k) + + Bhost(l,j,k+2) + Bhost(l,j,k+1) + + Bhost(l,j,k) ); + if ( Ahost(l,j,k) - check != 0 ) { + ++numErrors; + std::cout << " Correctness error at index: " << l << ","<<j<<","<<k<<"\n" + << " flat Ahost = " << Ahost(l,j,k) << " expected = " << check << std::endl; + //exit(-1); + } + } } } + if ( numErrors != 0 ) { std::cout << " RP collapse2: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; } + //else { std::cout << " RP collapse2: Pass! " << std::endl; } + } + } + + return dt_min; + } + +}; + + +template< class DeviceType + , typename ScalarType = double + , typename TestLayout = Kokkos::LayoutRight + > +struct RangePolicyCollapseAll +{ + // RangePolicy for 3D range, but will collapse all dims + + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + typedef TestLayout layout; + + typedef Kokkos::View<ScalarType***, TestLayout, DeviceType> view_type; + typedef typename view_type::HostMirror host_view_type; + + view_type A; + view_type B; + const long irange; + const long jrange; + const long krange; + + RangePolicyCollapseAll(view_type & A_, const view_type & B_, const long &irange_, const long &jrange_, const long &krange_) + : A(A_), B(B_), irange(irange_), jrange(jrange_), krange(krange_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long r) const + { + if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value ) + { + long i = int(r / (jrange*krange)); + long j = int(( r - i*jrange*krange)/krange); + long k = int(r - i*jrange*krange - j*krange); + A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k) + + B(i,j+2,k) + B(i,j+1,k) + + B(i,j,k+2) + B(i,j,k+1) + + B(i,j,k) ); + } + else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value ) + { + long k = int(r / (irange*jrange)); + long j = int(( r - k*irange*jrange)/irange); + long i = int(r - k*irange*jrange - j*irange); + A(i,j,k) = 0.25*(ScalarType)( B(i+2,j,k) + B(i+1,j,k) + + B(i,j+2,k) + B(i,j+1,k) + + B(i,j,k+2) + B(i,j,k+1) + + B(i,j,k) ); + } + } + + + struct Init + { + view_type input; + const long irange; + const long jrange; + const long krange; + + Init(const view_type & input_, const long &irange_, const long &jrange_, const long &krange_) + : input(input_), irange(irange_), jrange(jrange_), krange(krange_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const long r) const + { + if ( std::is_same<TestLayout, Kokkos::LayoutRight>::value ) + { + long i = int(r / (jrange*krange)); + long j = int(( r - i*jrange*krange)/krange); + long k = int(r - i*jrange*krange - j*krange); + input(i,j,k) = 1; + } + else if ( std::is_same<TestLayout, Kokkos::LayoutLeft>::value ) + { + long k = int(r / (irange*jrange)); + long j = int(( r - k*irange*jrange)/irange); + long i = int(r - k*irange*jrange - j*irange); + input(i,j,k) = 1; + } + } + }; + + + static double test_collapse_all(const unsigned int icount, const unsigned int jcount, const unsigned int kcount, const long iter = 1) + { + //This test refers to collapsing all dims using the RangePolicy + view_type Atest("Atest", icount, jcount, kcount); + view_type Btest("Btest", icount+2, jcount+2, kcount+2); + typedef RangePolicyCollapseAll<execution_space,ScalarType,TestLayout> FunctorType; + + const long flat_index_range = icount*jcount*kcount; + Kokkos::RangePolicy<execution_space> policy(0, flat_index_range ); + Kokkos::RangePolicy<execution_space> policy_initB(0, (icount+2)*(jcount+2)*(kcount+2) ); + + double dt_min = 0; + + Kokkos::parallel_for( policy, Init(Atest,icount,jcount,kcount) ); + execution_space::fence(); + Kokkos::parallel_for( policy_initB, Init(Btest,icount+2,jcount+2,kcount+2) ); + execution_space::fence(); + + for (int i = 0; i < iter; ++i) + { + Kokkos::Timer timer; + Kokkos::parallel_for(policy, FunctorType(Atest, Btest, icount, jcount, kcount)); + execution_space::fence(); + const double dt = timer.seconds(); + if ( 0 == i ) dt_min = dt ; + else dt_min = dt < dt_min ? dt : dt_min ; + + //Correctness check - first iteration only + if ( 0 == i ) + { + long numErrors = 0; + host_view_type Ahost("Ahost", icount, jcount, kcount); + Kokkos::deep_copy(Ahost, Atest); + host_view_type Bhost("Bhost", icount+2, jcount+2, kcount+2); + Kokkos::deep_copy(Bhost, Btest); + + // On KNL, this may vectorize - add print statement to prevent + // Also, compare against epsilon, as vectorization can change bitwise answer + for ( long l = 0; l < static_cast<long>(icount); ++l ) { + for ( long j = 0; j < static_cast<long>(jcount); ++j ) { + for ( long k = 0; k < static_cast<long>(kcount); ++k ) { + ScalarType check = 0.25*(ScalarType)( Bhost(l+2,j,k) + Bhost(l+1,j,k) + + Bhost(l,j+2,k) + Bhost(l,j+1,k) + + Bhost(l,j,k+2) + Bhost(l,j,k+1) + + Bhost(l,j,k) ); + if ( Ahost(l,j,k) - check != 0 ) { + ++numErrors; + std::cout << " Callapse ALL Correctness error at index: " << l << ","<<j<<","<<k<<"\n" + << " flat Ahost = " << Ahost(l,j,k) << " expected = " << check << std::endl; + //exit(-1); + } + } } } + if ( numErrors != 0 ) { std::cout << " RP collapse all: errors " << numErrors << " range product " << icount*jcount*kcount << " LL " << jcount*kcount << " LR " << icount*jcount << std::endl; } + //else { std::cout << " RP collapse all: Pass! " << std::endl; } + } + } + + return dt_min; + } + +}; + +} //end namespace Test diff --git a/lib/kokkos/core/src/CMakeLists.txt b/lib/kokkos/core/src/CMakeLists.txt index 807a01ed01b128c531b87df0c27e1d406525b603..492470d05d07ee5684a04bff54fc103e82708ba9 100644 --- a/lib/kokkos/core/src/CMakeLists.txt +++ b/lib/kokkos/core/src/CMakeLists.txt @@ -92,13 +92,13 @@ LIST(APPEND SOURCES ${SOURCES_CUDA} ) INSTALL(FILES ${HEADERS_CUDA} DESTINATION ${TRILINOS_INCDIR}/Cuda/) #----------------------------------------------------------------------------- -FILE(GLOB HEADERS_QTHREAD Qthread/*.hpp) -FILE(GLOB SOURCES_QTHREAD Qthread/*.cpp) +FILE(GLOB HEADERS_QTHREADS Qthreads/*.hpp) +FILE(GLOB SOURCES_QTHREADS Qthreads/*.cpp) -LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREAD} ) -LIST(APPEND SOURCES ${SOURCES_QTHREAD} ) +LIST(APPEND HEADERS_PRIVATE ${HEADERS_QTHREADS} ) +LIST(APPEND SOURCES ${SOURCES_QTHREADS} ) -INSTALL(FILES ${HEADERS_QTHREAD} DESTINATION ${TRILINOS_INCDIR}/Qthread/) +INSTALL(FILES ${HEADERS_QTHREADS} DESTINATION ${TRILINOS_INCDIR}/Qthreads/) #----------------------------------------------------------------------------- @@ -109,5 +109,3 @@ TRIBITS_ADD_LIBRARY( SOURCES ${SOURCES} DEPLIBS ) - - diff --git a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile.hpp b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e0eadb25a005f09e1c9d37400bd76a611cc4eb3b --- /dev/null +++ b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile.hpp @@ -0,0 +1,1300 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_CUDA_EXP_ITERATE_TILE_HPP +#define KOKKOS_CUDA_EXP_ITERATE_TILE_HPP + +#include <iostream> +#include <algorithm> +#include <stdio.h> + +#include <Kokkos_Macros.hpp> + +/* only compile this file if CUDA is enabled for Kokkos */ +#if defined( __CUDACC__ ) && defined( KOKKOS_HAVE_CUDA ) + +#include <utility> + +//#include<Cuda/Kokkos_CudaExec.hpp> +// Including the file above, leads to following type of errors: +// /home/ndellin/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp(84): error: incomplete type is not allowed +// As a result, recreate cuda_parallel_launch and associated code + +#if defined(KOKKOS_ENABLE_PROFILING) +#include <impl/Kokkos_Profiling_Interface.hpp> +#include <typeinfo> +#endif + +namespace Kokkos { namespace Experimental { namespace Impl { + +// ------------------------------------------------------------------ // + +template< class DriverType > +__global__ +static void cuda_parallel_launch( const DriverType driver ) +{ + driver(); +} + +template< class DriverType > +struct CudaLaunch +{ + inline + CudaLaunch( const DriverType & driver + , const dim3 & grid + , const dim3 & block + ) + { + cuda_parallel_launch< DriverType ><<< grid , block >>>(driver); + } + +}; + +// ------------------------------------------------------------------ // +template< int N , typename RP , typename Functor , typename Tag > +struct apply_impl; + +//Rank 2 +// Specializations for void tag type +template< typename RP , typename Functor > +struct apply_impl<2,RP,Functor,void > +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + /* + index_type offset_1 = blockIdx.y*m_rp.m_tile[1] + threadIdx.y; + index_type offset_0 = blockIdx.x*m_rp.m_tile[0] + threadIdx.x; + + for ( index_type j = offset_1; j < m_rp.m_upper[1], threadIdx.y < m_rp.m_tile[1]; j += (gridDim.y*m_rp.m_tile[1]) ) { + for ( index_type i = offset_0; i < m_rp.m_upper[0], threadIdx.x < m_rp.m_tile[0]; i += (gridDim.x*m_rp.m_tile[0]) ) { + m_func(i, j); + } } +*/ + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + m_func(offset_0 , offset_1); + } + } + } + } + } +// LR + else { +/* + index_type offset_1 = blockIdx.y*m_rp.m_tile[1] + threadIdx.y; + index_type offset_0 = blockIdx.x*m_rp.m_tile[0] + threadIdx.x; + + for ( index_type i = offset_0; i < m_rp.m_upper[0], threadIdx.x < m_rp.m_tile[0]; i += (gridDim.x*m_rp.m_tile[0]) ) { + for ( index_type j = offset_1; j < m_rp.m_upper[1], threadIdx.y < m_rp.m_tile[1]; j += (gridDim.y*m_rp.m_tile[1]) ) { + m_func(i, j); + } } +*/ + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + m_func(offset_0 , offset_1); + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; + +}; + +// Specializations for tag type +template< typename RP , typename Functor , typename Tag > +struct apply_impl<2,RP,Functor,Tag> +{ + using index_type = typename RP::index_type; + + inline __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + inline __device__ + void exec_range() const + { + if (RP::inner_direction == RP::Left) { + // Loop over size maxnumblocks until full range covered +/* + index_type offset_1 = blockIdx.y*m_rp.m_tile[1] + threadIdx.y; + index_type offset_0 = blockIdx.x*m_rp.m_tile[0] + threadIdx.x; + + for ( index_type j = offset_1; j < m_rp.m_upper[1], threadIdx.y < m_rp.m_tile[1]; j += (gridDim.y*m_rp.m_tile[1]) ) { + for ( index_type i = offset_0; i < m_rp.m_upper[0], threadIdx.x < m_rp.m_tile[0]; i += (gridDim.x*m_rp.m_tile[0]) ) { + m_func(Tag(), i, j); + } } +*/ + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + m_func(Tag(), offset_0 , offset_1); + } + } + } + } + } + else { +/* + index_type offset_1 = blockIdx.y*m_rp.m_tile[1] + threadIdx.y; + index_type offset_0 = blockIdx.x*m_rp.m_tile[0] + threadIdx.x; + + for ( index_type i = offset_0; i < m_rp.m_upper[0], threadIdx.x < m_rp.m_tile[0]; i += (gridDim.x*m_rp.m_tile[0]) ) { + for ( index_type j = offset_1; j < m_rp.m_upper[1], threadIdx.y < m_rp.m_tile[1]; j += (gridDim.y*m_rp.m_tile[1]) ) { + m_func(Tag(), i, j); + } } +*/ + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + m_func(Tag(), offset_0 , offset_1); + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + + +//Rank 3 +// Specializations for void tag type +template< typename RP , typename Functor > +struct apply_impl<3,RP,Functor,void > +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + for ( index_type tile_id2 = blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.z; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.z < m_rp.m_tile[2] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + m_func(offset_0 , offset_1 , offset_2); + } + } + } + } + } + } + } +// LR + else { + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id2 = blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.z; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.z < m_rp.m_tile[2] ) { + m_func(offset_0 , offset_1 , offset_2); + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + +// Specializations for void tag type +template< typename RP , typename Functor , typename Tag > +struct apply_impl<3,RP,Functor,Tag> +{ + using index_type = typename RP::index_type; + + inline __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + inline __device__ + void exec_range() const + { + if (RP::inner_direction == RP::Left) { + for ( index_type tile_id2 = blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.z; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.z < m_rp.m_tile[2] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + m_func(Tag(), offset_0 , offset_1 , offset_2); + } + } + } + } + } + } + } + else { + for ( index_type tile_id0 = blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) { + const index_type offset_0 = tile_id0*m_rp.m_tile[0] + threadIdx.x; + if ( offset_0 < m_rp.m_upper[0] && threadIdx.x < m_rp.m_tile[0] ) { + + for ( index_type tile_id1 = blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + threadIdx.y; + if ( offset_1 < m_rp.m_upper[1] && threadIdx.y < m_rp.m_tile[1] ) { + + for ( index_type tile_id2 = blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.z; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.z < m_rp.m_tile[2] ) { + m_func(Tag(), offset_0 , offset_1 , offset_2); + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + + +//Rank 4 +// Specializations for void tag type +template< typename RP , typename Functor > +struct apply_impl<4,RP,Functor,void > +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + const index_type temp0 = m_rp.m_tile_end[0]; + const index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + for ( index_type tile_id3 = blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + threadIdx.z; + if ( offset_3 < m_rp.m_upper[3] && threadIdx.z < m_rp.m_tile[3] ) { + + for ( index_type tile_id2 = blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.y; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.y < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3); + } + } + } + } + } + } + } + } + } +// LR + else { + const index_type temp0 = m_rp.m_tile_end[0]; + const index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type tile_id2 = blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.y; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.y < m_rp.m_tile[2] ) { + + for ( index_type tile_id3 = blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + threadIdx.z; + if ( offset_3 < m_rp.m_upper[3] && threadIdx.z < m_rp.m_tile[3] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3); + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + +// Specializations for void tag type +template< typename RP , typename Functor , typename Tag > +struct apply_impl<4,RP,Functor,Tag> +{ + using index_type = typename RP::index_type; + + inline __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { + if (RP::inner_direction == RP::Left) { + const index_type temp0 = m_rp.m_tile_end[0]; + const index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + for ( index_type tile_id3 = blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + threadIdx.z; + if ( offset_3 < m_rp.m_upper[3] && threadIdx.z < m_rp.m_tile[3] ) { + + for ( index_type tile_id2 = blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.y; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.y < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(Tag(), offset_0 , offset_1 , offset_2 , offset_3); + } + } + } + } + } + } + } + } + } + else { + const index_type temp0 = m_rp.m_tile_end[0]; + const index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = tile_id1*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type tile_id2 = blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) { + const index_type offset_2 = tile_id2*m_rp.m_tile[2] + threadIdx.y; + if ( offset_2 < m_rp.m_upper[2] && threadIdx.y < m_rp.m_tile[2] ) { + + for ( index_type tile_id3 = blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) { + const index_type offset_3 = tile_id3*m_rp.m_tile[3] + threadIdx.z; + if ( offset_3 < m_rp.m_upper[3] && threadIdx.z < m_rp.m_tile[3] ) { + m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3); + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + + +//Rank 5 +// Specializations for void tag type +template< typename RP , typename Functor > +struct apply_impl<5,RP,Functor,void > +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y % numbl2; + const index_type tile_id3 = blockIdx.y / numbl2; + const index_type thr_id2 = threadIdx.y % m_rp.m_tile[2]; + const index_type thr_id3 = threadIdx.y / m_rp.m_tile[2]; + + for ( index_type tile_id4 = blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + threadIdx.z; + if ( offset_4 < m_rp.m_upper[4] && threadIdx.z < m_rp.m_tile[4] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3, offset_4); + } + } + } + } + } + } + } + } + } + } + } +// LR + else { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y / numbl3; + const index_type tile_id3 = blockIdx.y % numbl3; + const index_type thr_id2 = threadIdx.y / m_rp.m_tile[3]; + const index_type thr_id3 = threadIdx.y % m_rp.m_tile[3]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type tile_id4 = blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + threadIdx.z; + if ( offset_4 < m_rp.m_upper[4] && threadIdx.z < m_rp.m_tile[4] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3 , offset_4); + } + } + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + +// Specializations for tag type +template< typename RP , typename Functor , typename Tag > +struct apply_impl<5,RP,Functor,Tag> +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y % numbl2; + const index_type tile_id3 = blockIdx.y / numbl2; + const index_type thr_id2 = threadIdx.y % m_rp.m_tile[2]; + const index_type thr_id3 = threadIdx.y / m_rp.m_tile[2]; + + for ( index_type tile_id4 = blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + threadIdx.z; + if ( offset_4 < m_rp.m_upper[4] && threadIdx.z < m_rp.m_tile[4] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3, offset_4); + } + } + } + } + } + } + } + } + } + } + } +// LR + else { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y / numbl3; + const index_type tile_id3 = blockIdx.y % numbl3; + const index_type thr_id2 = threadIdx.y / m_rp.m_tile[3]; + const index_type thr_id3 = threadIdx.y % m_rp.m_tile[3]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type tile_id4 = blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) { + const index_type offset_4 = tile_id4*m_rp.m_tile[4] + threadIdx.z; + if ( offset_4 < m_rp.m_upper[4] && threadIdx.z < m_rp.m_tile[4] ) { + m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3 , offset_4); + } + } + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + + +//Rank 6 +// Specializations for void tag type +template< typename RP , typename Functor > +struct apply_impl<6,RP,Functor,void > +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y % numbl2; + const index_type tile_id3 = blockIdx.y / numbl2; + const index_type thr_id2 = threadIdx.y % m_rp.m_tile[2]; + const index_type thr_id3 = threadIdx.y / m_rp.m_tile[2]; + + temp0 = m_rp.m_tile_end[4]; + temp1 = m_rp.m_tile_end[5]; + const index_type numbl4 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl5 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl4 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id4 = blockIdx.z % numbl4; + const index_type tile_id5 = blockIdx.z / numbl4; + const index_type thr_id4 = threadIdx.z % m_rp.m_tile[4]; + const index_type thr_id5 = threadIdx.z / m_rp.m_tile[4]; + + for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { + + for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3, offset_4, offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } +// LR + else { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y / numbl3; + const index_type tile_id3 = blockIdx.y % numbl3; + const index_type thr_id2 = threadIdx.y / m_rp.m_tile[3]; + const index_type thr_id3 = threadIdx.y % m_rp.m_tile[3]; + + temp0 = m_rp.m_tile_end[4]; + temp1 = m_rp.m_tile_end[5]; + const index_type numbl5 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl4 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl5 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id4 = blockIdx.z / numbl5; + const index_type tile_id5 = blockIdx.z % numbl5; + const index_type thr_id4 = threadIdx.z / m_rp.m_tile[5]; + const index_type thr_id5 = threadIdx.z % m_rp.m_tile[5]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { + + for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { + m_func(offset_0 , offset_1 , offset_2 , offset_3 , offset_4 , offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + +// Specializations for tag type +template< typename RP , typename Functor , typename Tag > +struct apply_impl<6,RP,Functor,Tag> +{ + using index_type = typename RP::index_type; + + __device__ + apply_impl( const RP & rp_ , const Functor & f_ ) + : m_rp(rp_) + , m_func(f_) + {} + + static constexpr index_type max_blocks = 65535; + + inline __device__ + void exec_range() const + { +// LL + if (RP::inner_direction == RP::Left) { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x % numbl0; + const index_type tile_id1 = blockIdx.x / numbl0; + const index_type thr_id0 = threadIdx.x % m_rp.m_tile[0]; + const index_type thr_id1 = threadIdx.x / m_rp.m_tile[0]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y % numbl2; + const index_type tile_id3 = blockIdx.y / numbl2; + const index_type thr_id2 = threadIdx.y % m_rp.m_tile[2]; + const index_type thr_id3 = threadIdx.y / m_rp.m_tile[2]; + + temp0 = m_rp.m_tile_end[4]; + temp1 = m_rp.m_tile_end[5]; + const index_type numbl4 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ; + const index_type numbl5 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl4 ) : + ( temp1 <= max_blocks ? temp1 : max_blocks ) ); + + const index_type tile_id4 = blockIdx.z % numbl4; + const index_type tile_id5 = blockIdx.z / numbl4; + const index_type thr_id4 = threadIdx.z % m_rp.m_tile[4]; + const index_type thr_id5 = threadIdx.z / m_rp.m_tile[4]; + + for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { + + for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3, offset_4, offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } +// LR + else { + index_type temp0 = m_rp.m_tile_end[0]; + index_type temp1 = m_rp.m_tile_end[1]; + const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id0 = blockIdx.x / numbl1; + const index_type tile_id1 = blockIdx.x % numbl1; + const index_type thr_id0 = threadIdx.x / m_rp.m_tile[1]; + const index_type thr_id1 = threadIdx.x % m_rp.m_tile[1]; + + temp0 = m_rp.m_tile_end[2]; + temp1 = m_rp.m_tile_end[3]; + const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id2 = blockIdx.y / numbl3; + const index_type tile_id3 = blockIdx.y % numbl3; + const index_type thr_id2 = threadIdx.y / m_rp.m_tile[3]; + const index_type thr_id3 = threadIdx.y % m_rp.m_tile[3]; + + temp0 = m_rp.m_tile_end[4]; + temp1 = m_rp.m_tile_end[5]; + const index_type numbl5 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ; + const index_type numbl4 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl5 ) : + ( temp0 <= max_blocks ? temp0 : max_blocks ) ); + + const index_type tile_id4 = blockIdx.z / numbl5; + const index_type tile_id5 = blockIdx.z % numbl5; + const index_type thr_id4 = threadIdx.z / m_rp.m_tile[5]; + const index_type thr_id5 = threadIdx.z % m_rp.m_tile[5]; + + for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) { + const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0; + if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) { + + for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) { + const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1; + if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) { + + for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) { + const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2; + if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) { + + for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) { + const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3; + if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) { + + for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) { + const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4; + if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) { + + for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) { + const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5; + if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) { + m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3 , offset_4 , offset_5); + } + } + } + } + } + } + } + } + } + } + } + } + } + + } //end exec_range + +private: + const RP & m_rp; + const Functor & m_func; +}; + +// ---------------------------------------------------------------------------------- + +template < typename RP + , typename Functor + , typename Tag + > +struct DeviceIterateTile +{ + using index_type = typename RP::index_type; + using array_index_type = typename RP::array_index_type; + using point_type = typename RP::point_type; + + struct VoidDummy {}; + typedef typename std::conditional< std::is_same<Tag, void>::value, VoidDummy, Tag>::type usable_tag; + + DeviceIterateTile( const RP & rp, const Functor & func ) + : m_rp{rp} + , m_func{func} + {} + +private: + inline __device__ + void apply() const + { + apply_impl<RP::rank,RP,Functor,Tag>(m_rp,m_func).exec_range(); + } //end apply + +public: + + inline + __device__ + void operator()(void) const + { + this-> apply(); + } + + inline + void execute() const + { + const array_index_type maxblocks = 65535; //not true for blockIdx.x for newer archs + if ( RP::rank == 2 ) + { + const dim3 block( m_rp.m_tile[0] , m_rp.m_tile[1] , 1); + const dim3 grid( + std::min( ( m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1 ) / block.x , maxblocks ) + , std::min( ( m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1 ) / block.y , maxblocks ) + , 1 + ); + CudaLaunch< DeviceIterateTile >( *this , grid , block ); + } + else if ( RP::rank == 3 ) + { + const dim3 block( m_rp.m_tile[0] , m_rp.m_tile[1] , m_rp.m_tile[2] ); + const dim3 grid( + std::min( ( m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1 ) / block.x , maxblocks ) + , std::min( ( m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1 ) / block.y , maxblocks ) + , std::min( ( m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1 ) / block.z , maxblocks ) + ); + CudaLaunch< DeviceIterateTile >( *this , grid , block ); + } + else if ( RP::rank == 4 ) + { + // id0,id1 encoded within threadIdx.x; id2 to threadIdx.y; id3 to threadIdx.z + const dim3 block( m_rp.m_tile[0]*m_rp.m_tile[1] , m_rp.m_tile[2] , m_rp.m_tile[3] ); + const dim3 grid( + std::min( static_cast<index_type>( m_rp.m_tile_end[0] * m_rp.m_tile_end[1] ) + , static_cast<index_type>(maxblocks) ) + , std::min( ( m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1 ) / block.y , maxblocks ) + , std::min( ( m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1 ) / block.z , maxblocks ) + ); + CudaLaunch< DeviceIterateTile >( *this , grid , block ); + } + else if ( RP::rank == 5 ) + { + // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4 to threadIdx.z + const dim3 block( m_rp.m_tile[0]*m_rp.m_tile[1] , m_rp.m_tile[2]*m_rp.m_tile[3] , m_rp.m_tile[4] ); + const dim3 grid( + std::min( static_cast<index_type>( m_rp.m_tile_end[0] * m_rp.m_tile_end[1] ) + , static_cast<index_type>(maxblocks) ) + , std::min( static_cast<index_type>( m_rp.m_tile_end[2] * m_rp.m_tile_end[3] ) + , static_cast<index_type>(maxblocks) ) + , std::min( ( m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1 ) / block.z , maxblocks ) + ); + CudaLaunch< DeviceIterateTile >( *this , grid , block ); + } + else if ( RP::rank == 6 ) + { + // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4,id5 to threadIdx.z + const dim3 block( m_rp.m_tile[0]*m_rp.m_tile[1] , m_rp.m_tile[2]*m_rp.m_tile[3] , m_rp.m_tile[4]*m_rp.m_tile[5] ); + const dim3 grid( + std::min( static_cast<index_type>( m_rp.m_tile_end[0] * m_rp.m_tile_end[1] ) + , static_cast<index_type>(maxblocks) ) + , std::min( static_cast<index_type>( m_rp.m_tile_end[2] * m_rp.m_tile_end[3] ) + , static_cast<index_type>(maxblocks) ) + , std::min( static_cast<index_type>( m_rp.m_tile_end[4] * m_rp.m_tile_end[5] ) + , static_cast<index_type>(maxblocks) ) + ); + CudaLaunch< DeviceIterateTile >( *this , grid , block ); + } + else + { + printf("Kokkos::MDRange Error: Exceeded rank bounds with Cuda\n"); + Kokkos::abort("Aborting"); + } + + } //end execute + +protected: + const RP m_rp; + const Functor m_func; +}; + +} } } //end namespace Kokkos::Experimental::Impl + +#endif +#endif diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp index 0a0f41686bab1232f0bebe9e66dc4f6b08c76d6b..a273db998ba808726f4d9b5bc17bfc10347952ed 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp @@ -131,6 +131,7 @@ namespace Impl { int* atomic; int* scratch; int* threadid; + int n; }; } } @@ -250,6 +251,7 @@ struct CudaParallelLaunch< DriverType , true > { locks.atomic = atomic_lock_array_cuda_space_ptr(false); locks.scratch = scratch_lock_array_cuda_space_ptr(false); locks.threadid = threadid_lock_array_cuda_space_ptr(false); + locks.n = Kokkos::Cuda::concurrency(); cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); #endif @@ -292,6 +294,7 @@ struct CudaParallelLaunch< DriverType , false > { locks.atomic = atomic_lock_array_cuda_space_ptr(false); locks.scratch = scratch_lock_array_cuda_space_ptr(false); locks.threadid = threadid_lock_array_cuda_space_ptr(false); + locks.n = Kokkos::Cuda::concurrency(); cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); #endif diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp index 91a3c921381709fc0ade5776b03ef48a2abcfe67..303b3fa4f699f0e56c7d44682197bd050b2ac7ca 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -59,7 +59,7 @@ #include <Cuda/Kokkos_Cuda_Internal.hpp> #include <impl/Kokkos_Error.hpp> -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include <impl/Kokkos_Profiling_Interface.hpp> #endif @@ -184,7 +184,7 @@ void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const enum { max_uvm_allocations = 65536 }; - if ( arg_alloc_size > 0 ) + if ( arg_alloc_size > 0 ) { Kokkos::Impl::num_uvm_allocations++; @@ -193,7 +193,7 @@ void * CudaUVMSpace::allocate( const size_t arg_alloc_size ) const } CUDA_SAFE_CALL( cudaMallocManaged( &ptr, arg_alloc_size , cudaMemAttachGlobal ) ); - } + } return ptr ; } @@ -375,7 +375,7 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec ) SharedAllocationRecord< Kokkos::CudaSpace , void >:: ~SharedAllocationRecord() { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { SharedAllocationHeader header ; @@ -395,7 +395,7 @@ SharedAllocationRecord< Kokkos::CudaSpace , void >:: SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: ~SharedAllocationRecord() { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::fence(); //Make sure I can access the label ... Kokkos::Profiling::deallocateData( @@ -412,7 +412,7 @@ SharedAllocationRecord< Kokkos::CudaUVMSpace , void >:: SharedAllocationRecord< Kokkos::CudaHostPinnedSpace , void >:: ~SharedAllocationRecord() { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::deallocateData( Kokkos::Profiling::SpaceHandle(Kokkos::CudaHostPinnedSpace::name()),RecordBase::m_alloc_ptr->m_label, @@ -442,7 +442,7 @@ SharedAllocationRecord( const Kokkos::CudaSpace & arg_space , m_tex_obj( 0 ) , m_space( arg_space ) { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); } @@ -479,7 +479,7 @@ SharedAllocationRecord( const Kokkos::CudaUVMSpace & arg_space , m_tex_obj( 0 ) , m_space( arg_space ) { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); } @@ -510,7 +510,7 @@ SharedAllocationRecord( const Kokkos::CudaHostPinnedSpace & arg_space ) , m_space( arg_space ) { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); } @@ -745,14 +745,14 @@ print_records( std::ostream & s , const Kokkos::CudaSpace & space , bool detail //Formatting dependent on sizeof(uintptr_t) const char * format_string; - if (sizeof(uintptr_t) == sizeof(unsigned long)) { + if (sizeof(uintptr_t) == sizeof(unsigned long)) { format_string = "Cuda addr( 0x%.12lx ) list( 0x%.12lx 0x%.12lx ) extent[ 0x%.12lx + %.8ld ] count(%d) dealloc(0x%.12lx) %s\n"; } - else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { format_string = "Cuda addr( 0x%.12llx ) list( 0x%.12llx 0x%.12llx ) extent[ 0x%.12llx + %.8ld ] count(%d) dealloc(0x%.12llx) %s\n"; } - snprintf( buffer , 256 + snprintf( buffer , 256 , format_string , reinterpret_cast<uintptr_t>( r ) , reinterpret_cast<uintptr_t>( r->m_prev ) @@ -776,14 +776,14 @@ print_records( std::ostream & s , const Kokkos::CudaSpace & space , bool detail //Formatting dependent on sizeof(uintptr_t) const char * format_string; - if (sizeof(uintptr_t) == sizeof(unsigned long)) { + if (sizeof(uintptr_t) == sizeof(unsigned long)) { format_string = "Cuda [ 0x%.12lx + %ld ] %s\n"; } - else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { + else if (sizeof(uintptr_t) == sizeof(unsigned long long)) { format_string = "Cuda [ 0x%.12llx + %ld ] %s\n"; } - snprintf( buffer , 256 + snprintf( buffer , 256 , format_string , reinterpret_cast< uintptr_t >( r->data() ) , r->size() @@ -883,6 +883,7 @@ void init_lock_arrays_cuda_space() { locks.atomic = atomic_lock_array_cuda_space_ptr(false); locks.scratch = scratch_lock_array_cuda_space_ptr(false); locks.threadid = threadid_lock_array_cuda_space_ptr(false); + locks.n = Kokkos::Cuda::concurrency(); cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); init_lock_array_kernel_atomic<<<(CUDA_SPACE_ATOMIC_MASK+255)/256,256>>>(); init_lock_array_kernel_scratch_threadid<<<(Kokkos::Cuda::concurrency()+255)/256,256>>>(Kokkos::Cuda::concurrency()); diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp index eeea97049fa3e8ba949fb9aed7841b4639bea928..44d908d1023197c5a8d0232a3d13ff49d06ef8d9 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -505,18 +505,18 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl; std::cout << " without setting CUDA_LAUNCH_BLOCKING=1." << std::endl; std::cout << " The code must call Cuda::fence() after each kernel" << std::endl; - std::cout << " or will likely crash when accessing data on the host." << std::endl; + std::cout << " or will likely crash when accessing data on the host." << std::endl; } const char * env_force_device_alloc = getenv("CUDA_MANAGED_FORCE_DEVICE_ALLOC"); bool force_device_alloc; if (env_force_device_alloc == 0) force_device_alloc=false; else force_device_alloc=atoi(env_force_device_alloc)!=0; - + const char * env_visible_devices = getenv("CUDA_VISIBLE_DEVICES"); bool visible_devices_one=true; if (env_visible_devices == 0) visible_devices_one=false; - + if(!visible_devices_one && !force_device_alloc) { std::cout << "Kokkos::Cuda::initialize WARNING: Cuda is allocating into UVMSpace by default" << std::endl; std::cout << " without setting CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 or " << std::endl; @@ -536,6 +536,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count ) locks.atomic = atomic_lock_array_cuda_space_ptr(false); locks.scratch = scratch_lock_array_cuda_space_ptr(false); locks.threadid = threadid_lock_array_cuda_space_ptr(false); + locks.n = Kokkos::Cuda::concurrency(); cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) ); #endif } @@ -620,9 +621,9 @@ void CudaInternal::finalize() was_finalized = 1; if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) { - atomic_lock_array_cuda_space_ptr(false); - scratch_lock_array_cuda_space_ptr(false); - threadid_lock_array_cuda_space_ptr(false); + atomic_lock_array_cuda_space_ptr(true); + scratch_lock_array_cuda_space_ptr(true); + threadid_lock_array_cuda_space_ptr(true); if ( m_stream ) { for ( size_type i = 1 ; i < m_streamCount ; ++i ) { @@ -700,7 +701,7 @@ void Cuda::initialize( const Cuda::SelectDevice config , size_t num_instances ) { Impl::CudaInternal::singleton().initialize( config.cuda_device_id , num_instances ); - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #endif } @@ -739,7 +740,7 @@ void Cuda::finalize() { Impl::CudaInternal::singleton().finalize(); - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::finalize(); #endif } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp index fa29d732f473d727b5ac8beb81c8602d0e715914..56e6a3c1e34123d8fc58dbfffea0574acea31047 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp @@ -61,7 +61,7 @@ #include <Cuda/Kokkos_Cuda_Internal.hpp> #include <Kokkos_Vectorization.hpp> -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include <impl/Kokkos_Profiling_Interface.hpp> #include <typeinfo> #endif @@ -586,13 +586,35 @@ public: void operator()(void) const { // Iterate this block through the league + int threadid = 0; + if ( m_scratch_size[1]>0 ) { + __shared__ int base_thread_id; + if (threadIdx.x==0 && threadIdx.y==0 ) { + threadid = ((blockIdx.x*blockDim.z + threadIdx.z) * blockDim.x * blockDim.y) % kokkos_impl_cuda_lock_arrays.n; + threadid = ((threadid + blockDim.x * blockDim.y-1)/(blockDim.x * blockDim.y)) * blockDim.x * blockDim.y; + if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid-=blockDim.x * blockDim.y; + int done = 0; + while (!done) { + done = (0 == atomicCAS(&kokkos_impl_cuda_lock_arrays.atomic[threadid],0,1)); + if(!done) { + threadid += blockDim.x * blockDim.y; + if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid = 0; + } + } + base_thread_id = threadid; + } + __syncthreads(); + threadid = base_thread_id; + } + + for ( int league_rank = blockIdx.x ; league_rank < m_league_size ; league_rank += gridDim.x ) { this-> template exec_team< WorkTag >( typename Policy::member_type( kokkos_impl_cuda_shared_memory<void>() , m_shmem_begin , m_shmem_size - , m_scratch_ptr[1] + , (void*) ( ((char*)m_scratch_ptr[1]) + threadid/(blockDim.x*blockDim.y) * m_scratch_size[1]) , m_scratch_size[1] , league_rank , m_league_size ) ); @@ -946,11 +968,32 @@ public: __device__ inline void operator() () const { - run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0) ); + int threadid = 0; + if ( m_scratch_size[1]>0 ) { + __shared__ int base_thread_id; + if (threadIdx.x==0 && threadIdx.y==0 ) { + threadid = ((blockIdx.x*blockDim.z + threadIdx.z) * blockDim.x * blockDim.y) % kokkos_impl_cuda_lock_arrays.n; + threadid = ((threadid + blockDim.x * blockDim.y-1)/(blockDim.x * blockDim.y)) * blockDim.x * blockDim.y; + if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid-=blockDim.x * blockDim.y; + int done = 0; + while (!done) { + done = (0 == atomicCAS(&kokkos_impl_cuda_lock_arrays.atomic[threadid],0,1)); + if(!done) { + threadid += blockDim.x * blockDim.y; + if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid = 0; + } + } + base_thread_id = threadid; + } + __syncthreads(); + threadid = base_thread_id; + } + + run(Kokkos::Impl::if_c<UseShflReduction, DummyShflReductionType, DummySHMEMReductionType>::select(1,1.0), threadid ); } __device__ inline - void run(const DummySHMEMReductionType&) const + void run(const DummySHMEMReductionType&, const int& threadid) const { const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) > word_count( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) / sizeof(size_type) ); @@ -964,7 +1007,7 @@ public: ( Member( kokkos_impl_cuda_shared_memory<char>() + m_team_begin , m_shmem_begin , m_shmem_size - , m_scratch_ptr[1] + , (void*) ( ((char*)m_scratch_ptr[1]) + threadid/(blockDim.x*blockDim.y) * m_scratch_size[1]) , m_scratch_size[1] , league_rank , m_league_size ) @@ -992,7 +1035,7 @@ public: } __device__ inline - void run(const DummyShflReductionType&) const + void run(const DummyShflReductionType&, const int& threadid) const { value_type value; ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &value); @@ -1003,7 +1046,7 @@ public: ( Member( kokkos_impl_cuda_shared_memory<char>() + m_team_begin , m_shmem_begin , m_shmem_size - , m_scratch_ptr[1] + , (void*) ( ((char*)m_scratch_ptr[1]) + threadid/(blockDim.x*blockDim.y) * m_scratch_size[1]) , m_scratch_size[1] , league_rank , m_league_size ) @@ -1128,9 +1171,9 @@ public: Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too much L0 scratch memory")); } - if ( m_team_size > - Kokkos::Impl::cuda_get_max_block_size< ParallelReduce > - ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length()) { + if ( unsigned(m_team_size) > + unsigned(Kokkos::Impl::cuda_get_max_block_size< ParallelReduce > + ( arg_functor , arg_policy.vector_length(), arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) / arg_policy.vector_length())) { Kokkos::Impl::throw_runtime_exception(std::string("Kokkos::Impl::ParallelReduce< Cuda > requested too large team size.")); } @@ -1621,14 +1664,25 @@ void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Cuda #endif } -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. +/** \brief Intra-thread vector parallel_reduce. * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of - * val is performed and put into result. This functionality requires C++11 support.*/ + * Calls lambda(iType i, ValueType & val) for each i=[0..N). + * + * The range [0..N) is mapped to all vector lanes of + * the calling thread and a reduction of val is performed using += + * and output into result. + * + * The identity value for the += operator is assumed to be the default + * constructed value. + */ template< typename iType, class Lambda, typename ValueType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >& - loop_boundaries, const Lambda & lambda, ValueType& result) { +void parallel_reduce + ( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember > + const & loop_boundaries + , Lambda const & lambda + , ValueType & result ) +{ #ifdef __CUDA_ARCH__ result = ValueType(); @@ -1636,52 +1690,42 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::C lambda(i,result); } - if (loop_boundaries.increment > 1) - result += shfl_down(result, 1,loop_boundaries.increment); - if (loop_boundaries.increment > 2) - result += shfl_down(result, 2,loop_boundaries.increment); - if (loop_boundaries.increment > 4) - result += shfl_down(result, 4,loop_boundaries.increment); - if (loop_boundaries.increment > 8) - result += shfl_down(result, 8,loop_boundaries.increment); - if (loop_boundaries.increment > 16) - result += shfl_down(result, 16,loop_boundaries.increment); - - result = shfl(result,0,loop_boundaries.increment); + Impl::cuda_intra_warp_vector_reduce( + Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > >( & result ) ); + #endif } -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. +/** \brief Intra-thread vector parallel_reduce. * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ + * Calls lambda(iType i, ValueType & val) for each i=[0..N). + * + * The range [0..N) is mapped to all vector lanes of + * the calling thread and a reduction of val is performed + * using JoinType::operator()(ValueType& val, const ValueType& update) + * and output into result. + * + * The input value of result must be the identity value for the + * reduction operation; e.g., ( 0 , += ) or ( 1 , *= ). + */ template< typename iType, class Lambda, typename ValueType, class JoinType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember >& - loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { - +void parallel_reduce + ( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::CudaTeamMember > + const & loop_boundaries + , Lambda const & lambda + , JoinType const & join + , ValueType & result ) +{ #ifdef __CUDA_ARCH__ - ValueType result = init_result; for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { lambda(i,result); } - if (loop_boundaries.increment > 1) - join( result, shfl_down(result, 1,loop_boundaries.increment)); - if (loop_boundaries.increment > 2) - join( result, shfl_down(result, 2,loop_boundaries.increment)); - if (loop_boundaries.increment > 4) - join( result, shfl_down(result, 4,loop_boundaries.increment)); - if (loop_boundaries.increment > 8) - join( result, shfl_down(result, 8,loop_boundaries.increment)); - if (loop_boundaries.increment > 16) - join( result, shfl_down(result, 16,loop_boundaries.increment)); - - init_result = shfl(result,0,loop_boundaries.increment); + Impl::cuda_intra_warp_vector_reduce( + Impl::Reducer< ValueType , JoinType >( join , & result ) ); + #endif } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp index ad9cca26ce2463df58820da78a3fb2e16c2a351c..79b3867ba24a87e787faac051c21abf6a99795de 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp @@ -55,15 +55,163 @@ #include <impl/Kokkos_FunctorAdapter.hpp> #include <impl/Kokkos_Error.hpp> #include <Cuda/Kokkos_Cuda_Vectorization.hpp> + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { namespace Impl { +//---------------------------------------------------------------------------- + +template< typename T > +__device__ inline +void cuda_shfl( T & out , T const & in , int lane , + typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width ) +{ + *reinterpret_cast<int*>(&out) = + __shfl( *reinterpret_cast<int const *>(&in) , lane , width ); +} + +template< typename T > +__device__ inline +void cuda_shfl( T & out , T const & in , int lane , + typename std::enable_if + < ( sizeof(int) < sizeof(T) ) && ( 0 == ( sizeof(T) % sizeof(int) ) ) + , int >::type width ) +{ + enum : int { N = sizeof(T) / sizeof(int) }; + + for ( int i = 0 ; i < N ; ++i ) { + reinterpret_cast<int*>(&out)[i] = + __shfl( reinterpret_cast<int const *>(&in)[i] , lane , width ); + } +} + +//---------------------------------------------------------------------------- + +template< typename T > +__device__ inline +void cuda_shfl_down( T & out , T const & in , int delta , + typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width ) +{ + *reinterpret_cast<int*>(&out) = + __shfl_down( *reinterpret_cast<int const *>(&in) , delta , width ); +} + +template< typename T > +__device__ inline +void cuda_shfl_down( T & out , T const & in , int delta , + typename std::enable_if + < ( sizeof(int) < sizeof(T) ) && ( 0 == ( sizeof(T) % sizeof(int) ) ) + , int >::type width ) +{ + enum : int { N = sizeof(T) / sizeof(int) }; + + for ( int i = 0 ; i < N ; ++i ) { + reinterpret_cast<int*>(&out)[i] = + __shfl_down( reinterpret_cast<int const *>(&in)[i] , delta , width ); + } +} +//---------------------------------------------------------------------------- -//Shfl based reductions +template< typename T > +__device__ inline +void cuda_shfl_up( T & out , T const & in , int delta , + typename std::enable_if< sizeof(int) == sizeof(T) , int >::type width ) +{ + *reinterpret_cast<int*>(&out) = + __shfl_up( *reinterpret_cast<int const *>(&in) , delta , width ); +} + +template< typename T > +__device__ inline +void cuda_shfl_up( T & out , T const & in , int delta , + typename std::enable_if + < ( sizeof(int) < sizeof(T) ) && ( 0 == ( sizeof(T) % sizeof(int) ) ) + , int >::type width ) +{ + enum : int { N = sizeof(T) / sizeof(int) }; + + for ( int i = 0 ; i < N ; ++i ) { + reinterpret_cast<int*>(&out)[i] = + __shfl_up( reinterpret_cast<int const *>(&in)[i] , delta , width ); + } +} + +//---------------------------------------------------------------------------- +/** \brief Reduce within a warp over blockDim.x, the "vector" dimension. + * + * This will be called within a nested, intra-team parallel operation. + * Use shuffle operations to avoid conflicts with shared memory usage. + * + * Requires: + * blockDim.x is power of 2 + * blockDim.x <= 32 (one warp) + * + * Cannot use "butterfly" pattern because floating point + * addition is non-associative. Therefore, must broadcast + * the final result. + */ +template< class Reducer > +__device__ inline +void cuda_intra_warp_vector_reduce( Reducer const & reducer ) +{ + static_assert( + std::is_reference< typename Reducer::reference_type >::value , "" ); + + if ( 1 < blockDim.x ) { + + typename Reducer::value_type tmp ; + + for ( int i = blockDim.x ; ( i >>= 1 ) ; ) { + + cuda_shfl_down( tmp , reducer.reference() , i , blockDim.x ); + + if ( threadIdx.x < i ) { reducer.join( reducer.data() , & tmp ); } + } + + // Broadcast from root "lane" to all other "lanes" + + cuda_shfl( reducer.reference() , reducer.reference() , 0 , blockDim.x ); + } +} + +/** \brief Inclusive scan over blockDim.x, the "vector" dimension. + * + * This will be called within a nested, intra-team parallel operation. + * Use shuffle operations to avoid conflicts with shared memory usage. + * + * Algorithm is concurrent bottom-up reductions in triangular pattern + * where each CUDA thread is the root of a reduction tree from the + * zeroth CUDA thread to itself. + * + * Requires: + * blockDim.x is power of 2 + * blockDim.x <= 32 (one warp) + */ +template< typename ValueType > +__device__ inline +void cuda_intra_warp_vector_inclusive_scan( ValueType & local ) +{ + ValueType tmp ; + + // Bottom up: + // [t] += [t-1] if t >= 1 + // [t] += [t-2] if t >= 2 + // [t] += [t-4] if t >= 4 + // ... + + for ( int i = 1 ; i < blockDim.x ; i <<= 1 ) { + + cuda_shfl_up( tmp , local , i , blockDim.x ); + + if ( i <= threadIdx.x ) { local += tmp ; } + } +} + +//---------------------------------------------------------------------------- /* * Algorithmic constraints: * (a) threads with same threadIdx.y have same value @@ -98,7 +246,10 @@ inline void cuda_inter_warp_reduction( ValueType& value, const int max_active_thread = blockDim.y) { #define STEP_WIDTH 4 - __shared__ char sh_result[sizeof(ValueType)*STEP_WIDTH]; + // Depending on the ValueType _shared__ memory must be aligned up to 8byte boundaries + // The reason not to use ValueType directly is that for types with constructors it + // could lead to race conditions + __shared__ double sh_result[(sizeof(ValueType)+7)/8*STEP_WIDTH]; ValueType* result = (ValueType*) & sh_result; const unsigned step = 32 / blockDim.x; unsigned shift = STEP_WIDTH; diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp index c96b8b7d40666830032ee560840cddcc9e52fe04..cf3e55d50cf416cbb6a268c85602e7c7dd8fa4e2 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp @@ -91,7 +91,7 @@ void TaskQueueSpecialization< Kokkos::Cuda >::driver // Loop by priority and then type for ( int i = 0 ; i < Queue::NumQueue && end == task.ptr ; ++i ) { for ( int j = 0 ; j < 2 && end == task.ptr ; ++j ) { - task.ptr = Queue::pop_task( & queue->m_ready[i][j] ); + task.ptr = Queue::pop_ready_task( & queue->m_ready[i][j] ); } } diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp index 479294f3078a4e0d055610cb38b599415bbac921..a13e37837d8005867f1087b827a4d7e59ebd3209 100644 --- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp +++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp @@ -61,6 +61,8 @@ void set_cuda_task_base_apply_function_pointer } +template< class > class TaskExec ; + template<> class TaskQueueSpecialization< Kokkos::Cuda > { @@ -69,6 +71,7 @@ public: using execution_space = Kokkos::Cuda ; using memory_space = Kokkos::CudaUVMSpace ; using queue_type = TaskQueue< execution_space > ; + using member_type = TaskExec< Kokkos::Cuda > ; static void iff_single_thread_recursive_execute( queue_type * const ) {} @@ -79,13 +82,15 @@ public: static void execute( queue_type * const ); - template< typename FunctorType > + template< typename TaskType > static - void proc_set_apply( TaskBase<execution_space,void,void>::function_type * ptr ) + typename TaskType::function_type + get_function_pointer() { - using TaskType = TaskBase< execution_space - , typename FunctorType::value_type - , FunctorType > ; + using function_type = typename TaskType::function_type ; + + function_type * const ptr = + (function_type*) cuda_internal_scratch_unified( sizeof(function_type) ); CUDA_SAFE_CALL( cudaDeviceSynchronize() ); @@ -93,6 +98,8 @@ public: CUDA_SAFE_CALL( cudaGetLastError() ); CUDA_SAFE_CALL( cudaDeviceSynchronize() ); + + return *ptr ; } }; @@ -435,18 +442,26 @@ void parallel_reduce // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename ValueType, typename iType, class Lambda > +template< typename iType, class Closure > KOKKOS_INLINE_FUNCTION void parallel_scan (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, - const Lambda & lambda) { + const Closure & closure ) +{ + // Extract value_type from closure - ValueType accum = 0 ; - ValueType val, y, local_total; + using value_type = + typename Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::SCAN + , void + , Closure >::value_type ; + + value_type accum = 0 ; + value_type val, y, local_total; for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { val = 0; - lambda(i,val,false); + closure(i,val,false); // intra-blockDim.y exclusive scan on 'val' // accum = accumulated, sum in total for this iteration @@ -458,7 +473,7 @@ void parallel_scan } // pass accum to all threads - local_total = shfl_warp_broadcast<ValueType>(val, + local_total = shfl_warp_broadcast<value_type>(val, threadIdx.x+Impl::CudaTraits::WarpSize-blockDim.x, Impl::CudaTraits::WarpSize); @@ -467,7 +482,7 @@ void parallel_scan if ( threadIdx.y == 0 ) { val = 0 ; } val += accum; - lambda(i,val,true); + closure(i,val,true); accum += local_total; } } @@ -478,18 +493,26 @@ void parallel_scan // blockDim.y == team_size // threadIdx.x == position in vec // threadIdx.y == member number -template< typename iType, class Lambda, typename ValueType > +template< typename iType, class Closure > KOKKOS_INLINE_FUNCTION void parallel_scan (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Cuda > >& loop_boundaries, - const Lambda & lambda) + const Closure & closure ) { - ValueType accum = 0 ; - ValueType val, y, local_total; + // Extract value_type from closure + + using value_type = + typename Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::SCAN + , void + , Closure >::value_type ; + + value_type accum = 0 ; + value_type val, y, local_total; for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { val = 0; - lambda(i,val,false); + closure(i,val,false); // intra-blockDim.x exclusive scan on 'val' // accum = accumulated, sum in total for this iteration @@ -501,14 +524,14 @@ void parallel_scan } // pass accum to all threads - local_total = shfl_warp_broadcast<ValueType>(val, blockDim.x-1, blockDim.x); + local_total = shfl_warp_broadcast<value_type>(val, blockDim.x-1, blockDim.x); // make EXCLUSIVE scan by shifting values over one val = Kokkos::shfl_up(val, 1, blockDim.x); if ( threadIdx.x == 0 ) { val = 0 ; } val += accum; - lambda(i,val,true); + closure(i,val,true); accum += local_total; } } diff --git a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp index 4e1ce855c5efc9f8ecb414096b87ea14728967f9..a450ca36ae1bb0049c2abd142e20733edcaf2f7c 100644 --- a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp +++ b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp @@ -44,36 +44,47 @@ #ifndef KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP #define KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP +#include <initializer_list> + +#include<impl/KokkosExp_Host_IterateTile.hpp> #include <Kokkos_ExecPolicy.hpp> #include <Kokkos_Parallel.hpp> -#include <initializer_list> -#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_ENABLE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__) -#define KOKKOS_IMPL_MDRANGE_IVDEP +#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA ) +#include<Cuda/KokkosExp_Cuda_IterateTile.hpp> #endif namespace Kokkos { namespace Experimental { +// ------------------------------------------------------------------ // + enum class Iterate { Default, // Default for the device Left, // Left indices stride fastest Right, // Right indices stride fastest - Flat, // Do not tile, only valid for inner direction }; template <typename ExecSpace> struct default_outer_direction { using type = Iterate; + #if defined( KOKKOS_ENABLE_CUDA) + static constexpr Iterate value = Iterate::Left; + #else static constexpr Iterate value = Iterate::Right; + #endif }; template <typename ExecSpace> struct default_inner_direction { using type = Iterate; + #if defined( KOKKOS_ENABLE_CUDA) + static constexpr Iterate value = Iterate::Left; + #else static constexpr Iterate value = Iterate::Right; + #endif }; @@ -86,7 +97,7 @@ struct Rank { static_assert( N != 0u, "Kokkos Error: rank 0 undefined"); static_assert( N != 1u, "Kokkos Error: rank 1 is not a multi-dimensional range"); - static_assert( N < 4u, "Kokkos Error: Unsupported rank..."); + static_assert( N < 7u, "Kokkos Error: Unsupported rank..."); using iteration_pattern = Rank<N, OuterDir, InnerDir>; @@ -96,515 +107,370 @@ struct Rank }; - // multi-dimensional iteration pattern template <typename... Properties> struct MDRangePolicy + : public Kokkos::Impl::PolicyTraits<Properties ...> { + using traits = Kokkos::Impl::PolicyTraits<Properties ...>; using range_policy = RangePolicy<Properties...>; - static_assert( !std::is_same<range_policy,void>::value + using impl_range_policy = RangePolicy< typename traits::execution_space + , typename traits::schedule_type + , typename traits::index_type + > ; + + static_assert( !std::is_same<typename traits::iteration_pattern,void>::value , "Kokkos Error: MD iteration pattern not defined" ); - using iteration_pattern = typename range_policy::iteration_pattern; - using work_tag = typename range_policy::work_tag; + using iteration_pattern = typename traits::iteration_pattern; + using work_tag = typename traits::work_tag; static constexpr int rank = iteration_pattern::rank; static constexpr int outer_direction = static_cast<int> ( - (iteration_pattern::outer_direction != Iterate::Default && iteration_pattern::outer_direction != Iterate::Flat) + (iteration_pattern::outer_direction != Iterate::Default) ? iteration_pattern::outer_direction - : default_outer_direction< typename range_policy::execution_space>::value ); + : default_outer_direction< typename traits::execution_space>::value ); static constexpr int inner_direction = static_cast<int> ( iteration_pattern::inner_direction != Iterate::Default ? iteration_pattern::inner_direction - : default_inner_direction< typename range_policy::execution_space>::value ) ; + : default_inner_direction< typename traits::execution_space>::value ) ; // Ugly ugly workaround intel 14 not handling scoped enum correctly - static constexpr int Flat = static_cast<int>( Iterate::Flat ); static constexpr int Right = static_cast<int>( Iterate::Right ); - - - using size_type = typename range_policy::index_type; - using index_type = typename std::make_signed<size_type>::type; - - - template <typename I> - MDRangePolicy( std::initializer_list<I> upper_corner ) + static constexpr int Left = static_cast<int>( Iterate::Left ); + + using index_type = typename traits::index_type; + using array_index_type = long; + using point_type = Kokkos::Array<array_index_type,rank>; //was index_type + using tile_type = Kokkos::Array<array_index_type,rank>; + // If point_type or tile_type is not templated on a signed integral type (if it is unsigned), + // then if user passes in intializer_list of runtime-determined values of + // signed integral type that are not const will receive a compiler error due + // to an invalid case for implicit conversion - + // "conversion from integer or unscoped enumeration type to integer type that cannot represent all values of the original, except where source is a constant expression whose value can be stored exactly in the target type" + // This would require the user to either pass a matching index_type parameter + // as template parameter to the MDRangePolicy or static_cast the individual values + + MDRangePolicy( point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} ) + : m_lower(lower) + , m_upper(upper) + , m_tile(tile) + , m_num_tiles(1) { - static_assert( std::is_integral<I>::value, "Kokkos Error: corner defined with non-integral type" ); - - // TODO check size of lists equal to rank - // static_asserts on initializer_list.size() require c++14 - - //static_assert( upper_corner.size() == rank, "Kokkos Error: upper_corner has incorrect rank" ); - - const auto u = upper_corner.begin(); - - m_num_tiles = 1; - for (int i=0; i<rank; ++i) { - m_offset[i] = static_cast<index_type>(0); - m_dim[i] = static_cast<index_type>(u[i]); - if (inner_direction != Flat) { - // default tile size to 4 - m_tile[i] = 4; - } else { - m_tile[i] = 1; + // Host + if ( true + #if defined(KOKKOS_ENABLE_CUDA) + && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value + #endif + ) + { + index_type span; + for (int i=0; i<rank; ++i) { + span = upper[i] - lower[i]; + if ( m_tile[i] <= 0 ) { + if ( (inner_direction == Right && (i < rank-1)) + || (inner_direction == Left && (i > 0)) ) + { + m_tile[i] = 2; + } + else { + m_tile[i] = span; + } + } + m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]); + m_num_tiles *= m_tile_end[i]; } - m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i]; - m_num_tiles *= m_tile_dim[i]; } - } - - template <typename IA, typename IB> - MDRangePolicy( std::initializer_list<IA> corner_a - , std::initializer_list<IB> corner_b - ) - { - static_assert( std::is_integral<IA>::value, "Kokkos Error: corner A defined with non-integral type" ); - static_assert( std::is_integral<IB>::value, "Kokkos Error: corner B defined with non-integral type" ); - - // TODO check size of lists equal to rank - // static_asserts on initializer_list.size() require c++14 - //static_assert( corner_a.size() == rank, "Kokkos Error: corner_a has incorrect rank" ); - //static_assert( corner_b.size() == rank, "Kokkos Error: corner_b has incorrect rank" ); - - - using A = typename std::make_signed<IA>::type; - using B = typename std::make_signed<IB>::type; - - const auto a = [=](int i) { return static_cast<A>(corner_a.begin()[i]); }; - const auto b = [=](int i) { return static_cast<B>(corner_b.begin()[i]); }; - - m_num_tiles = 1; - for (int i=0; i<rank; ++i) { - m_offset[i] = static_cast<index_type>(a(i) <= b(i) ? a(i) : b(i)); - m_dim[i] = static_cast<index_type>(a(i) <= b(i) ? b(i) - a(i) : a(i) - b(i)); - if (inner_direction != Flat) { - // default tile size to 4 - m_tile[i] = 4; - } else { - m_tile[i] = 1; + #if defined(KOKKOS_ENABLE_CUDA) + else // Cuda + { + index_type span; + for (int i=0; i<rank; ++i) { + span = upper[i] - lower[i]; + if ( m_tile[i] <= 0 ) { + // TODO: determine what is a good default tile size for cuda + // may be rank dependent + if ( (inner_direction == Right && (i < rank-1)) + || (inner_direction == Left && (i > 0)) ) + { + m_tile[i] = 2; + } + else { + m_tile[i] = 16; + } + } + m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]); + m_num_tiles *= m_tile_end[i]; + } + index_type total_tile_size_check = 1; + for (int i=0; i<rank; ++i) { + total_tile_size_check *= m_tile[i]; + } + if ( total_tile_size_check >= 1024 ) { // improve this check - 1024,1024,64 max per dim (Kepler), but product num_threads < 1024; more restrictions pending register limit + printf(" Tile dimensions exceed Cuda limits\n"); + Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims"); + //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims"); } - m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i]; - m_num_tiles *= m_tile_dim[i]; - } - } - - template <typename IA, typename IB, typename T> - MDRangePolicy( std::initializer_list<IA> corner_a - , std::initializer_list<IB> corner_b - , std::initializer_list<T> tile - ) - { - static_assert( std::is_integral<IA>::value, "Kokkos Error: corner A defined with non-integral type" ); - static_assert( std::is_integral<IB>::value, "Kokkos Error: corner B defined with non-integral type" ); - static_assert( std::is_integral<T>::value, "Kokkos Error: tile defined with non-integral type" ); - static_assert( inner_direction != Flat, "Kokkos Error: tiling not support with flat iteration" ); - - // TODO check size of lists equal to rank - // static_asserts on initializer_list.size() require c++14 - //static_assert( corner_a.size() == rank, "Kokkos Error: corner_a has incorrect rank" ); - //static_assert( corner_b.size() == rank, "Kokkos Error: corner_b has incorrect rank" ); - //static_assert( tile.size() == rank, "Kokkos Error: tile has incorrect rank" ); - - using A = typename std::make_signed<IA>::type; - using B = typename std::make_signed<IB>::type; - - const auto a = [=](int i) { return static_cast<A>(corner_a.begin()[i]); }; - const auto b = [=](int i) { return static_cast<B>(corner_b.begin()[i]); }; - const auto t = tile.begin(); - - m_num_tiles = 1; - for (int i=0; i<rank; ++i) { - m_offset[i] = static_cast<index_type>(a(i) <= b(i) ? a(i) : b(i)); - m_dim[i] = static_cast<index_type>(a(i) <= b(i) ? b(i) - a(i) : a(i) - b(i)); - m_tile[i] = static_cast<int>(t[i] > (T)0 ? t[i] : (T)1 ); - m_tile_dim[i] = (m_dim[i] + (m_tile[i] - 1)) / m_tile[i]; - m_num_tiles *= m_tile_dim[i]; } + #endif } - index_type m_offset[rank]; - index_type m_dim[rank]; - int m_tile[rank]; - index_type m_tile_dim[rank]; - size_type m_num_tiles; // product of tile dims -}; - -namespace Impl { -// Serial, Threads, OpenMP -// use enable_if to overload for Cuda -template < typename MDRange, typename Functor, typename Enable = void > -struct MDForFunctor -{ - using work_tag = typename MDRange::work_tag; - using index_type = typename MDRange::index_type; - using size_type = typename MDRange::size_type; - - MDRange m_range; - Functor m_func; - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDRange const& range, Functor const& f ) - : m_range(range) - , m_func( f ) - {} - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDRange const& range, Functor && f ) - : m_range(range) - , m_func( std::forward<Functor>(f) ) - {} - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDRange && range, Functor const& f ) - : m_range( std::forward<MDRange>(range) ) - , m_func( f ) - {} - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDRange && range, Functor && f ) - : m_range( std::forward<MDRange>(range) ) - , m_func( std::forward<Functor>(f) ) - {} - - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDForFunctor const& ) = default; - - KOKKOS_INLINE_FUNCTION - MDForFunctor& operator=( MDForFunctor const& ) = default; - - KOKKOS_INLINE_FUNCTION - MDForFunctor( MDForFunctor && ) = default; - - KOKKOS_INLINE_FUNCTION - MDForFunctor& operator=( MDForFunctor && ) = default; - - // Rank-2, Flat, No Tag - template <typename Idx> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<Idx>::value - && std::is_same<void, work_tag>::value - && MDRange::rank == 2 - && MDRange::inner_direction == MDRange::Flat - )>::type - operator()(Idx t) const + template < typename LT , typename UT , typename TT = array_index_type > + MDRangePolicy( std::initializer_list<LT> const& lower, std::initializer_list<UT> const& upper, std::initializer_list<TT> const& tile = {} ) { - if ( MDRange::outer_direction == MDRange::Right ) { - m_func( m_range.m_offset[0] + ( t / m_range.m_dim[1] ) - , m_range.m_offset[1] + ( t % m_range.m_dim[1] ) ); - } else { - m_func( m_range.m_offset[0] + ( t % m_range.m_dim[0] ) - , m_range.m_offset[1] + ( t / m_range.m_dim[0] ) ); +#if 0 + // This should work, less duplicated code but not yet extensively tested + point_type lower_tmp, upper_tmp; + tile_type tile_tmp; + for ( auto i = 0; i < rank; ++i ) { + lower_tmp[i] = static_cast<array_index_type>(lower.begin()[i]); + upper_tmp[i] = static_cast<array_index_type>(upper.begin()[i]); + tile_tmp[i] = static_cast<array_index_type>(tile.begin()[i]); } - } - // Rank-2, Flat, Tag - template <typename Idx> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<Idx>::value - && !std::is_same<void, work_tag>::value - && MDRange::rank == 2 - && MDRange::inner_direction == MDRange::Flat - )>::type - operator()(Idx t) const - { - if ( MDRange::outer_direction == MDRange::Right ) { - m_func( work_tag{}, m_range.m_offset[0] + ( t / m_range.m_dim[1] ) - , m_range.m_offset[1] + ( t % m_range.m_dim[1] ) ); - } else { - m_func( work_tag{}, m_range.m_offset[0] + ( t % m_range.m_dim[0] ) - , m_range.m_offset[1] + ( t / m_range.m_dim[0] ) ); - } - } + MDRangePolicy( lower_tmp, upper_tmp, tile_tmp ); - // Rank-2, Not Flat, No Tag - template <typename Idx> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<Idx>::value - && std::is_same<void, work_tag>::value - && MDRange::rank == 2 - && MDRange::inner_direction != MDRange::Flat - )>::type - operator()(Idx t) const - { - index_type t0, t1; - if ( MDRange::outer_direction == MDRange::Right ) { - t0 = t / m_range.m_tile_dim[1]; - t1 = t % m_range.m_tile_dim[1]; - } else { - t0 = t % m_range.m_tile_dim[0]; - t1 = t / m_range.m_tile_dim[0]; - } +#else + if(m_lower.size()!=rank || m_upper.size() != rank) + Kokkos::abort("MDRangePolicy: Constructor initializer lists have wrong size"); - const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; - const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; - - const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); - const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); - - if ( MDRange::inner_direction == MDRange::Right ) { - for (int i0=b0; i0<e0; ++i0) { - #if defined(KOKKOS_IMPL_MDRANGE_IVDEP) - #pragma ivdep - #endif - for (int i1=b1; i1<e1; ++i1) { - m_func( i0, i1 ); - }} - } else { - for (int i1=b1; i1<e1; ++i1) { - #if defined(KOKKOS_IMPL_MDRANGE_IVDEP) - #pragma ivdep - #endif - for (int i0=b0; i0<e0; ++i0) { - m_func( i0, i1 ); - }} + for ( auto i = 0; i < rank; ++i ) { + m_lower[i] = static_cast<array_index_type>(lower.begin()[i]); + m_upper[i] = static_cast<array_index_type>(upper.begin()[i]); + if(tile.size()==rank) + m_tile[i] = static_cast<array_index_type>(tile.begin()[i]); + else + m_tile[i] = 0; } - } - // Rank-2, Not Flat, Tag - template <typename Idx> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<Idx>::value - && !std::is_same<void, work_tag>::value - && MDRange::rank == 2 - && MDRange::inner_direction != MDRange::Flat - )>::type - operator()(Idx t) const - { - work_tag tag; - - index_type t0, t1; - if ( MDRange::outer_direction == MDRange::Right ) { - t0 = t / m_range.m_tile_dim[1]; - t1 = t % m_range.m_tile_dim[1]; - } else { - t0 = t % m_range.m_tile_dim[0]; - t1 = t / m_range.m_tile_dim[0]; - } + m_num_tiles = 1; - const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; - const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; - - const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); - const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); - - if ( MDRange::inner_direction == MDRange::Right ) { - for (int i0=b0; i0<e0; ++i0) { - #if defined(KOKKOS_IMPL_MDRANGE_IVDEP) - #pragma ivdep - #endif - for (int i1=b1; i1<e1; ++i1) { - m_func( tag, i0, i1 ); - }} - } else { - for (int i1=b1; i1<e1; ++i1) { - #if defined(KOKKOS_IMPL_MDRANGE_IVDEP) - #pragma ivdep - #endif - for (int i0=b0; i0<e0; ++i0) { - m_func( tag, i0, i1 ); - }} - } - } - //--------------------------------------------------------------------------- - - // Rank-3, Flat, No Tag - template <typename Idx> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<Idx>::value - && std::is_same<void, work_tag>::value - && MDRange::rank == 3 - && MDRange::inner_direction == MDRange::Flat - )>::type - operator()(Idx t) const - { - if ( MDRange::outer_direction == MDRange::Right ) { - const int64_t tmp_prod = m_range.m_dim[1]*m_range.m_dim[2]; - m_func( m_range.m_offset[0] + ( t / tmp_prod ) - , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[2] ) - , m_range.m_offset[2] + ( (t % tmp_prod) % m_range.m_dim[2] ) - ); - } else { - const int64_t tmp_prod = m_range.m_dim[0]*m_range.m_dim[1]; - m_func( m_range.m_offset[0] + ( (t % tmp_prod) % m_range.m_dim[0] ) - , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[0] ) - , m_range.m_offset[2] + ( t / tmp_prod ) - ); + // Host + if ( true + #if defined(KOKKOS_ENABLE_CUDA) + && !std::is_same< typename traits::execution_space, Kokkos::Cuda >::value + #endif + ) + { + index_type span; + for (int i=0; i<rank; ++i) { + span = m_upper[i] - m_lower[i]; + if ( m_tile[i] <= 0 ) { + if ( (inner_direction == Right && (i < rank-1)) + || (inner_direction == Left && (i > 0)) ) + { + m_tile[i] = 2; + } + else { + m_tile[i] = span; + } + } + m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]); + m_num_tiles *= m_tile_end[i]; + } } - } - - // Rank-3, Flat, Tag - template <typename Idx> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<Idx>::value - && !std::is_same<void, work_tag>::value - && MDRange::rank == 3 - && MDRange::inner_direction == MDRange::Flat - )>::type - operator()(Idx t) const - { - if ( MDRange::outer_direction == MDRange::Right ) { - const int64_t tmp_prod = m_range.m_dim[1]*m_range.m_dim[2]; - m_func( work_tag{} - , m_range.m_offset[0] + ( t / tmp_prod ) - , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[2] ) - , m_range.m_offset[2] + ( (t % tmp_prod) % m_range.m_dim[2] ) - ); - } else { - const int64_t tmp_prod = m_range.m_dim[0]*m_range.m_dim[1]; - m_func( work_tag{} - , m_range.m_offset[0] + ( (t % tmp_prod) % m_range.m_dim[0] ) - , m_range.m_offset[1] + ( (t % tmp_prod) / m_range.m_dim[0] ) - , m_range.m_offset[2] + ( t / tmp_prod ) - ); + #if defined(KOKKOS_ENABLE_CUDA) + else // Cuda + { + index_type span; + for (int i=0; i<rank; ++i) { + span = m_upper[i] - m_lower[i]; + if ( m_tile[i] <= 0 ) { + // TODO: determine what is a good default tile size for cuda + // may be rank dependent + if ( (inner_direction == Right && (i < rank-1)) + || (inner_direction == Left && (i > 0)) ) + { + m_tile[i] = 2; + } + else { + m_tile[i] = 16; + } + } + m_tile_end[i] = static_cast<index_type>((span + m_tile[i] - 1) / m_tile[i]); + m_num_tiles *= m_tile_end[i]; + } + index_type total_tile_size_check = 1; + for (int i=0; i<rank; ++i) { + total_tile_size_check *= m_tile[i]; + } + if ( total_tile_size_check >= 1024 ) { // improve this check - 1024,1024,64 max per dim (Kepler), but product num_threads < 1024; more restrictions pending register limit + printf(" Tile dimensions exceed Cuda limits\n"); + Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims"); + //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims"); + } } + #endif +#endif } - // Rank-3, Not Flat, No Tag - template <typename Idx> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<Idx>::value - && std::is_same<void, work_tag>::value - && MDRange::rank == 3 - && MDRange::inner_direction != MDRange::Flat - )>::type - operator()(Idx t) const - { - index_type t0, t1, t2; - if ( MDRange::outer_direction == MDRange::Right ) { - const index_type tmp_prod = ( m_range.m_tile_dim[1]*m_range.m_tile_dim[2]); - t0 = t / tmp_prod; - t1 = ( t % tmp_prod ) / m_range.m_tile_dim[2]; - t2 = ( t % tmp_prod ) % m_range.m_tile_dim[2]; - } else { - const index_type tmp_prod = ( m_range.m_tile_dim[0]*m_range.m_tile_dim[1]); - t0 = ( t % tmp_prod ) % m_range.m_tile_dim[0]; - t1 = ( t % tmp_prod ) / m_range.m_tile_dim[0]; - t2 = t / tmp_prod; - } - const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; - const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; - const index_type b2 = t2 * m_range.m_tile[2] + m_range.m_offset[2]; - - const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); - const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); - const index_type e2 = b2 + m_range.m_tile[2] <= (m_range.m_dim[2] + m_range.m_offset[2] ) ? b2 + m_range.m_tile[2] : ( m_range.m_dim[2] + m_range.m_offset[2] ); - - if ( MDRange::inner_direction == MDRange::Right ) { - for (int i0=b0; i0<e0; ++i0) { - for (int i1=b1; i1<e1; ++i1) { - #if defined(KOKKOS_IMPL_MDRANGE_IVDEP) - #pragma ivdep - #endif - for (int i2=b2; i2<e2; ++i2) { - m_func( i0, i1, i2 ); - }}} - } else { - for (int i2=b2; i2<e2; ++i2) { - for (int i1=b1; i1<e1; ++i1) { - #if defined(KOKKOS_IMPL_MDRANGE_IVDEP) - #pragma ivdep - #endif - for (int i0=b0; i0<e0; ++i0) { - m_func( i0, i1, i2 ); - }}} - } - } + point_type m_lower; + point_type m_upper; + tile_type m_tile; + point_type m_tile_end; + index_type m_num_tiles; +}; +// ------------------------------------------------------------------ // - // Rank-3, Not Flat, Tag - template <typename Idx> - KOKKOS_FORCEINLINE_FUNCTION - typename std::enable_if<( std::is_integral<Idx>::value - && !std::is_same<void, work_tag>::value - && MDRange::rank == 3 - && MDRange::inner_direction != MDRange::Flat - )>::type - operator()(Idx t) const - { - work_tag tag; - - index_type t0, t1, t2; - if ( MDRange::outer_direction == MDRange::Right ) { - const index_type tmp_prod = ( m_range.m_tile_dim[1]*m_range.m_tile_dim[2]); - t0 = t / tmp_prod; - t1 = ( t % tmp_prod ) / m_range.m_tile_dim[2]; - t2 = ( t % tmp_prod ) % m_range.m_tile_dim[2]; - } else { - const index_type tmp_prod = ( m_range.m_tile_dim[0]*m_range.m_tile_dim[1]); - t0 = ( t % tmp_prod ) % m_range.m_tile_dim[0]; - t1 = ( t % tmp_prod ) / m_range.m_tile_dim[0]; - t2 = t / tmp_prod; - } +// ------------------------------------------------------------------ // +//md_parallel_for +// ------------------------------------------------------------------ // +template <typename MDRange, typename Functor, typename Enable = void> +void md_parallel_for( MDRange const& range + , Functor const& f + , const std::string& str = "" + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::MDFunctor<MDRange, Functor, void> g(range, f); - const index_type b0 = t0 * m_range.m_tile[0] + m_range.m_offset[0]; - const index_type b1 = t1 * m_range.m_tile[1] + m_range.m_offset[1]; - const index_type b2 = t2 * m_range.m_tile[2] + m_range.m_offset[2]; - - const index_type e0 = b0 + m_range.m_tile[0] <= (m_range.m_dim[0] + m_range.m_offset[0] ) ? b0 + m_range.m_tile[0] : ( m_range.m_dim[0] + m_range.m_offset[0] ); - const index_type e1 = b1 + m_range.m_tile[1] <= (m_range.m_dim[1] + m_range.m_offset[1] ) ? b1 + m_range.m_tile[1] : ( m_range.m_dim[1] + m_range.m_offset[1] ); - const index_type e2 = b2 + m_range.m_tile[2] <= (m_range.m_dim[2] + m_range.m_offset[2] ) ? b2 + m_range.m_tile[2] : ( m_range.m_dim[2] + m_range.m_offset[2] ); - - if ( MDRange::inner_direction == MDRange::Right ) { - for (int i0=b0; i0<e0; ++i0) { - for (int i1=b1; i1<e1; ++i1) { - #if defined(KOKKOS_IMPL_MDRANGE_IVDEP) - #pragma ivdep - #endif - for (int i2=b2; i2<e2; ++i2) { - m_func( tag, i0, i1, i2 ); - }}} - } else { - for (int i2=b2; i2<e2; ++i2) { - for (int i1=b1; i1<e1; ++i1) { - #if defined(KOKKOS_IMPL_MDRANGE_IVDEP) - #pragma ivdep - #endif - for (int i0=b0; i0<e0; ++i0) { - m_func( tag, i0, i1, i2 ); - }}} - } - } -}; + //using range_policy = typename MDRange::range_policy; + using range_policy = typename MDRange::impl_range_policy; + + Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str ); +} +template <typename MDRange, typename Functor> +void md_parallel_for( const std::string& str + , MDRange const& range + , Functor const& f + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::MDFunctor<MDRange, Functor, void> g(range, f); + //using range_policy = typename MDRange::range_policy; + using range_policy = typename MDRange::impl_range_policy; -} // namespace Impl + Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str ); +} +// Cuda specialization +#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA ) +template <typename MDRange, typename Functor> +void md_parallel_for( const std::string& str + , MDRange const& range + , Functor const& f + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f); + closure.execute(); +} template <typename MDRange, typename Functor> void md_parallel_for( MDRange const& range , Functor const& f , const std::string& str = "" + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 ) { - Impl::MDForFunctor<MDRange, Functor> g(range, f); + Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f); + closure.execute(); +} +#endif +// ------------------------------------------------------------------ // - using range_policy = typename MDRange::range_policy; +// ------------------------------------------------------------------ // +//md_parallel_reduce +// ------------------------------------------------------------------ // +template <typename MDRange, typename Functor, typename ValueType> +void md_parallel_reduce( MDRange const& range + , Functor const& f + , ValueType & v + , const std::string& str = "" + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::MDFunctor<MDRange, Functor, ValueType> g(range, f, v); - Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str ); + //using range_policy = typename MDRange::range_policy; + using range_policy = typename MDRange::impl_range_policy; + Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v ); } -template <typename MDRange, typename Functor> -void md_parallel_for( const std::string& str +template <typename MDRange, typename Functor, typename ValueType> +void md_parallel_reduce( const std::string& str , MDRange const& range , Functor const& f + , ValueType & v + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && !std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 ) { - Impl::MDForFunctor<MDRange, Functor> g(range, f); + Impl::MDFunctor<MDRange, Functor, ValueType> g(range, f, v); - using range_policy = typename MDRange::range_policy; + //using range_policy = typename MDRange::range_policy; + using range_policy = typename MDRange::impl_range_policy; - Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str ); + Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v ); } +// Cuda - parallel_reduce not implemented yet +/* +template <typename MDRange, typename Functor, typename ValueType> +void md_parallel_reduce( MDRange const& range + , Functor const& f + , ValueType & v + , const std::string& str = "" + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f, v); + closure.execute(); +} + +template <typename MDRange, typename Functor, typename ValueType> +void md_parallel_reduce( const std::string& str + , MDRange const& range + , Functor const& f + , ValueType & v + , typename std::enable_if<( true + #if defined( KOKKOS_ENABLE_CUDA) + && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value + #endif + ) >::type* = 0 + ) +{ + Impl::DeviceIterateTile<MDRange, Functor, typename MDRange::work_tag> closure(range, f, v); + closure.execute(); +} +*/ + }} // namespace Kokkos::Experimental #endif //KOKKOS_CORE_EXP_MD_RANGE_POLICY_HPP diff --git a/lib/kokkos/core/src/Kokkos_Array.hpp b/lib/kokkos/core/src/Kokkos_Array.hpp index 8deb5142c4352021c4305b422508b21f8524e108..abb263b7ccd7d6f82f469d06fadbc2326fe21438 100644 --- a/lib/kokkos/core/src/Kokkos_Array.hpp +++ b/lib/kokkos/core/src/Kokkos_Array.hpp @@ -59,8 +59,14 @@ template< class T = void , class Proxy = void > struct Array { -private: - T m_elem[N]; +public: + /** + * The elements of this C array shall not be accessed directly. The data + * member has to be declared public to enable aggregate initialization as for + * std::array. We mark it as private in the documentation. + * @private + */ + T m_internal_implementation_private_member_data[N]; public: typedef T & reference ; @@ -78,25 +84,32 @@ public: KOKKOS_INLINE_FUNCTION reference operator[]( const iType & i ) { - static_assert( std::is_integral<iType>::value , "Must be integral argument" ); - return m_elem[i]; + static_assert( ( std::is_integral<iType>::value || std::is_enum<iType>::value ) , "Must be integral argument" ); + return m_internal_implementation_private_member_data[i]; } template< typename iType > KOKKOS_INLINE_FUNCTION const_reference operator[]( const iType & i ) const { - static_assert( std::is_integral<iType>::value , "Must be integral argument" ); - return m_elem[i]; + static_assert( ( std::is_integral<iType>::value || std::is_enum<iType>::value ) , "Must be integral argument" ); + return m_internal_implementation_private_member_data[i]; } - KOKKOS_INLINE_FUNCTION pointer data() { return & m_elem[0] ; } - KOKKOS_INLINE_FUNCTION const_pointer data() const { return & m_elem[0] ; } + KOKKOS_INLINE_FUNCTION pointer data() + { + return & m_internal_implementation_private_member_data[0]; + } + KOKKOS_INLINE_FUNCTION const_pointer data() const + { + return & m_internal_implementation_private_member_data[0]; + } - ~Array() = default ; - Array() = default ; - Array( const Array & ) = default ; - Array & operator = ( const Array & ) = default ; + // Do not default unless move and move-assignment are also defined + // ~Array() = default ; + // Array() = default ; + // Array( const Array & ) = default ; + // Array & operator = ( const Array & ) = default ; // Some supported compilers are not sufficiently C++11 compliant // for default move constructor and move assignment operator. @@ -124,7 +137,7 @@ public: KOKKOS_INLINE_FUNCTION value_type operator[]( const iType & ) { - static_assert( std::is_integral<iType>::value , "Must be integer argument" ); + static_assert( ( std::is_integral<iType>::value || std::is_enum<iType>::value ) , "Must be integer argument" ); return value_type(); } @@ -132,7 +145,7 @@ public: KOKKOS_INLINE_FUNCTION value_type operator[]( const iType & ) const { - static_assert( std::is_integral<iType>::value , "Must be integer argument" ); + static_assert( ( std::is_integral<iType>::value || std::is_enum<iType>::value ) , "Must be integer argument" ); return value_type(); } @@ -181,7 +194,7 @@ public: KOKKOS_INLINE_FUNCTION reference operator[]( const iType & i ) { - static_assert( std::is_integral<iType>::value , "Must be integral argument" ); + static_assert( ( std::is_integral<iType>::value || std::is_enum<iType>::value ) , "Must be integral argument" ); return m_elem[i]; } @@ -189,7 +202,7 @@ public: KOKKOS_INLINE_FUNCTION const_reference operator[]( const iType & i ) const { - static_assert( std::is_integral<iType>::value , "Must be integral argument" ); + static_assert( ( std::is_integral<iType>::value || std::is_enum<iType>::value ) , "Must be integral argument" ); return m_elem[i]; } @@ -250,7 +263,7 @@ public: KOKKOS_INLINE_FUNCTION reference operator[]( const iType & i ) { - static_assert( std::is_integral<iType>::value , "Must be integral argument" ); + static_assert( ( std::is_integral<iType>::value || std::is_enum<iType>::value ) , "Must be integral argument" ); return m_elem[i*m_stride]; } @@ -258,7 +271,7 @@ public: KOKKOS_INLINE_FUNCTION const_reference operator[]( const iType & i ) const { - static_assert( std::is_integral<iType>::value , "Must be integral argument" ); + static_assert( ( std::is_integral<iType>::value || std::is_enum<iType>::value ) , "Must be integral argument" ); return m_elem[i*m_stride]; } diff --git a/lib/kokkos/core/src/Kokkos_Concepts.hpp b/lib/kokkos/core/src/Kokkos_Concepts.hpp index 3f9bdea40da551332852448b3b7fb68952bd1875..cfcdabf95e3e085cf388f14e99fb6b4db3d8c654 100644 --- a/lib/kokkos/core/src/Kokkos_Concepts.hpp +++ b/lib/kokkos/core/src/Kokkos_Concepts.hpp @@ -102,6 +102,7 @@ KOKKOS_IMPL_IS_CONCEPT( memory_traits ) KOKKOS_IMPL_IS_CONCEPT( execution_space ) KOKKOS_IMPL_IS_CONCEPT( execution_policy ) KOKKOS_IMPL_IS_CONCEPT( array_layout ) +KOKKOS_IMPL_IS_CONCEPT( reducer ) namespace Impl { diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp index 6d92f4bf616a057bb83cc34d38ab872e77281608..16c1bce902d47f38a1cd455df8f8900d3e73c0a5 100644 --- a/lib/kokkos/core/src/Kokkos_Core.hpp +++ b/lib/kokkos/core/src/Kokkos_Core.hpp @@ -57,6 +57,10 @@ #include <Kokkos_OpenMP.hpp> #endif +#if defined( KOKKOS_ENABLE_QTHREADS ) +#include <Kokkos_Qthreads.hpp> +#endif + #if defined( KOKKOS_ENABLE_PTHREAD ) #include <Kokkos_Threads.hpp> #endif @@ -76,6 +80,7 @@ #include <Kokkos_Complex.hpp> +#include <iosfwd> //---------------------------------------------------------------------------- @@ -105,6 +110,9 @@ void finalize_all(); void fence(); +/** \brief Print "Bill of Materials" */ +void print_configuration( std::ostream & , const bool detail = false ); + } // namespace Kokkos //---------------------------------------------------------------------------- @@ -159,4 +167,3 @@ void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size ) //---------------------------------------------------------------------------- #endif - diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp index e7e6a49d379045b2da38c7b53fdde589a989adec..4029bf599c6b564a8bc6bb2b6d20f9472fe19be5 100644 --- a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp +++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp @@ -63,7 +63,7 @@ namespace Kokkos { struct AUTO_t { KOKKOS_INLINE_FUNCTION - constexpr const AUTO_t & operator()() const { return *this ; } + constexpr const AUTO_t & operator()() const { return *this; } }; namespace { @@ -73,46 +73,49 @@ constexpr AUTO_t AUTO = Kokkos::AUTO_t(); struct InvalidType {}; -} +} // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- // Forward declarations for class inter-relationships namespace Kokkos { -class HostSpace ; ///< Memory space for main process and CPU execution spaces +class HostSpace; ///< Memory space for main process and CPU execution spaces #ifdef KOKKOS_ENABLE_HBWSPACE namespace Experimental { -class HBWSpace ; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor) +class HBWSpace; /// Memory space for hbw_malloc from memkind (e.g. for KNL processor) } #endif #if defined( KOKKOS_ENABLE_SERIAL ) -class Serial ; ///< Execution space main process on CPU -#endif // defined( KOKKOS_ENABLE_SERIAL ) +class Serial; ///< Execution space main process on CPU. +#endif + +#if defined( KOKKOS_ENABLE_QTHREADS ) +class Qthreads; ///< Execution space with Qthreads back-end. +#endif #if defined( KOKKOS_ENABLE_PTHREAD ) -class Threads ; ///< Execution space with pthreads back-end +class Threads; ///< Execution space with pthreads back-end. #endif #if defined( KOKKOS_ENABLE_OPENMP ) -class OpenMP ; ///< OpenMP execution space +class OpenMP; ///< OpenMP execution space. #endif #if defined( KOKKOS_ENABLE_CUDA ) -class CudaSpace ; ///< Memory space on Cuda GPU -class CudaUVMSpace ; ///< Memory space on Cuda GPU with UVM -class CudaHostPinnedSpace ; ///< Memory space on Host accessible to Cuda GPU -class Cuda ; ///< Execution space for Cuda GPU +class CudaSpace; ///< Memory space on Cuda GPU +class CudaUVMSpace; ///< Memory space on Cuda GPU with UVM +class CudaHostPinnedSpace; ///< Memory space on Host accessible to Cuda GPU +class Cuda; ///< Execution space for Cuda GPU #endif template<class ExecutionSpace, class MemorySpace> struct Device; + } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- // Set the default execution space. @@ -122,60 +125,66 @@ struct Device; namespace Kokkos { -#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) - typedef Cuda DefaultExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) - typedef OpenMP DefaultExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) - typedef Threads DefaultExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) - typedef Serial DefaultExecutionSpace ; +#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) + typedef Cuda DefaultExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) + typedef OpenMP DefaultExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) + typedef Threads DefaultExecutionSpace; +//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +// typedef Qthreads DefaultExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) + typedef Serial DefaultExecutionSpace; #else -# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads." +# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::Cuda, Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial." #endif -#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) - typedef OpenMP DefaultHostExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) - typedef Threads DefaultHostExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) - typedef Serial DefaultHostExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_OPENMP ) - typedef OpenMP DefaultHostExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_PTHREAD ) - typedef Threads DefaultHostExecutionSpace ; -#elif defined ( KOKKOS_ENABLE_SERIAL ) - typedef Serial DefaultHostExecutionSpace ; +#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) + typedef OpenMP DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) + typedef Threads DefaultHostExecutionSpace; +//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +// typedef Qthreads DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) + typedef Serial DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_OPENMP ) + typedef OpenMP DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_PTHREAD ) + typedef Threads DefaultHostExecutionSpace; +//#elif defined( KOKKOS_ENABLE_QTHREADS ) +// typedef Qthreads DefaultHostExecutionSpace; +#elif defined( KOKKOS_ENABLE_SERIAL ) + typedef Serial DefaultHostExecutionSpace; #else -# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads." +# error "At least one of the following execution spaces must be defined in order to use Kokkos: Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial." #endif } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- // Detect the active execution space and define its memory space. // This is used to verify whether a running kernel can access // a given memory space. namespace Kokkos { + namespace Impl { -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined (KOKKOS_ENABLE_CUDA) -typedef Kokkos::CudaSpace ActiveExecutionMemorySpace ; +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined( KOKKOS_ENABLE_CUDA ) +typedef Kokkos::CudaSpace ActiveExecutionMemorySpace; #elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) -typedef Kokkos::HostSpace ActiveExecutionMemorySpace ; +typedef Kokkos::HostSpace ActiveExecutionMemorySpace; #else -typedef void ActiveExecutionMemorySpace ; +typedef void ActiveExecutionMemorySpace; #endif -template< class ActiveSpace , class MemorySpace > +template< class ActiveSpace, class MemorySpace > struct VerifyExecutionCanAccessMemorySpace { enum {value = 0}; }; template< class Space > -struct VerifyExecutionCanAccessMemorySpace< Space , Space > +struct VerifyExecutionCanAccessMemorySpace< Space, Space > { enum {value = 1}; KOKKOS_INLINE_FUNCTION static void verify(void) {} @@ -183,33 +192,33 @@ struct VerifyExecutionCanAccessMemorySpace< Space , Space > }; } // namespace Impl + } // namespace Kokkos -#define KOKKOS_RESTRICT_EXECUTION_TO_DATA( DATA_SPACE , DATA_PTR ) \ +#define KOKKOS_RESTRICT_EXECUTION_TO_DATA( DATA_SPACE, DATA_PTR ) \ Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \ - Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify( DATA_PTR ) + Kokkos::Impl::ActiveExecutionMemorySpace, DATA_SPACE >::verify( DATA_PTR ) #define KOKKOS_RESTRICT_EXECUTION_TO_( DATA_SPACE ) \ Kokkos::Impl::VerifyExecutionCanAccessMemorySpace< \ - Kokkos::Impl::ActiveExecutionMemorySpace , DATA_SPACE >::verify() + Kokkos::Impl::ActiveExecutionMemorySpace, DATA_SPACE >::verify() //---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- namespace Kokkos { void fence(); } -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { template< class Functor , class Policy , class EnableFunctor = void - , class EnablePolicy = void + , class EnablePolicy = void > struct FunctorPolicyExecutionSpace; @@ -220,18 +229,18 @@ struct FunctorPolicyExecutionSpace; /// /// This is an implementation detail of parallel_for. Users should /// skip this and go directly to the nonmember function parallel_for. -template< class FunctorType , class ExecPolicy , class ExecutionSpace = - typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space - > class ParallelFor ; +template< class FunctorType, class ExecPolicy, class ExecutionSpace = + typename Impl::FunctorPolicyExecutionSpace< FunctorType, ExecPolicy >::execution_space + > class ParallelFor; /// \class ParallelReduce /// \brief Implementation detail of parallel_reduce. /// /// This is an implementation detail of parallel_reduce. Users should /// skip this and go directly to the nonmember function parallel_reduce. -template< class FunctorType , class ExecPolicy , class ReducerType = InvalidType, class ExecutionSpace = - typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space - > class ParallelReduce ; +template< class FunctorType, class ExecPolicy, class ReducerType = InvalidType, class ExecutionSpace = + typename Impl::FunctorPolicyExecutionSpace< FunctorType, ExecPolicy >::execution_space + > class ParallelReduce; /// \class ParallelScan /// \brief Implementation detail of parallel_scan. @@ -239,10 +248,12 @@ template< class FunctorType , class ExecPolicy , class ReducerType = InvalidType /// This is an implementation detail of parallel_scan. Users should /// skip this and go directly to the documentation of the nonmember /// template function Kokkos::parallel_scan. -template< class FunctorType , class ExecPolicy , class ExecutionSapce = - typename Impl::FunctorPolicyExecutionSpace< FunctorType , ExecPolicy >::execution_space - > class ParallelScan ; +template< class FunctorType, class ExecPolicy, class ExecutionSapce = + typename Impl::FunctorPolicyExecutionSpace< FunctorType, ExecPolicy >::execution_space + > class ParallelScan; -}} -#endif /* #ifndef KOKKOS_CORE_FWD_HPP */ +} // namespace Impl + +} // namespace Kokkos +#endif /* #ifndef KOKKOS_CORE_FWD_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Kokkos_Cuda.hpp index afccdb6c5246b8a9778346d2db9065eb68ab7db0..433cac5e518cfbb40a413e1b5984994d54bfacbd 100644 --- a/lib/kokkos/core/src/Kokkos_Cuda.hpp +++ b/lib/kokkos/core/src/Kokkos_Cuda.hpp @@ -62,7 +62,6 @@ #include <Kokkos_MemoryTraits.hpp> #include <impl/Kokkos_Tags.hpp> -#include <KokkosExp_MDRangePolicy.hpp> /*--------------------------------------------------------------------------*/ @@ -295,6 +294,7 @@ struct VerifyExecutionCanAccessMemorySpace #include <Cuda/Kokkos_Cuda_Parallel.hpp> #include <Cuda/Kokkos_Cuda_Task.hpp> +#include <KokkosExp_MDRangePolicy.hpp> //---------------------------------------------------------------------------- #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ diff --git a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp index d6bf8dcdf4520224fe238ec7eb3cc90754bd3838..fc39ce0e5bc04c4a9f2c6ee91580dbc43a45d8ef 100644 --- a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,14 +44,16 @@ #ifndef KOKKOS_HBWSPACE_HPP #define KOKKOS_HBWSPACE_HPP - #include <Kokkos_HostSpace.hpp> /*--------------------------------------------------------------------------*/ + #ifdef KOKKOS_ENABLE_HBWSPACE namespace Kokkos { + namespace Experimental { + namespace Impl { /// \brief Initialize lock array for arbitrary size atomics. @@ -67,7 +69,7 @@ void init_lock_array_hbw_space(); /// This function tries to aquire the lock for the hash value derived /// from the provided ptr. If the lock is successfully aquired the /// function returns true. Otherwise it returns false. -bool lock_address_hbw_space(void* ptr); +bool lock_address_hbw_space( void* ptr ); /// \brief Release lock for the address /// @@ -75,13 +77,16 @@ bool lock_address_hbw_space(void* ptr); /// from the provided ptr. This function should only be called /// after previously successfully aquiring a lock with /// lock_address. -void unlock_address_hbw_space(void* ptr); +void unlock_address_hbw_space( void* ptr ); } // namespace Impl -} // neamspace Experimental + +} // namespace Experimental + } // namespace Kokkos namespace Kokkos { + namespace Experimental { /// \class HBWSpace @@ -91,10 +96,9 @@ namespace Experimental { /// memory means the usual CPU-accessible memory. class HBWSpace { public: - //! Tag this class as a kokkos memory space - typedef HBWSpace memory_space ; - typedef size_t size_type ; + typedef HBWSpace memory_space; + typedef size_t size_type; /// \typedef execution_space /// \brief Default execution space for this memory space. @@ -103,21 +107,25 @@ public: /// useful for things like initializing a View (which happens in /// parallel using the View's default execution space). #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) - typedef Kokkos::OpenMP execution_space ; + typedef Kokkos::OpenMP execution_space; #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) - typedef Kokkos::Threads execution_space ; + typedef Kokkos::Threads execution_space; +//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +// typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_OPENMP ) - typedef Kokkos::OpenMP execution_space ; + typedef Kokkos::OpenMP execution_space; #elif defined( KOKKOS_ENABLE_PTHREAD ) - typedef Kokkos::Threads execution_space ; + typedef Kokkos::Threads execution_space; +//#elif defined( KOKKOS_ENABLE_QTHREADS ) +// typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_SERIAL ) - typedef Kokkos::Serial execution_space ; + typedef Kokkos::Serial execution_space; #else -# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." +# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qhreads, or Kokkos::Serial. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." #endif //! This memory space preferred device_type - typedef Kokkos::Device<execution_space,memory_space> device_type; + typedef Kokkos::Device< execution_space, memory_space > device_type; /*--------------------------------*/ /* Functions unique to the HBWSpace */ @@ -129,72 +137,73 @@ public: /**\brief Default memory space instance */ HBWSpace(); - HBWSpace( const HBWSpace & rhs ) = default ; - HBWSpace & operator = ( const HBWSpace & ) = default ; - ~HBWSpace() = default ; + HBWSpace( const HBWSpace & rhs ) = default; + HBWSpace & operator = ( const HBWSpace & ) = default; + ~HBWSpace() = default; /**\brief Non-default memory space instance to choose allocation mechansim, if available */ - enum AllocationMechanism { STD_MALLOC , POSIX_MEMALIGN , POSIX_MMAP , INTEL_MM_ALLOC }; + enum AllocationMechanism { STD_MALLOC, POSIX_MEMALIGN, POSIX_MMAP, INTEL_MM_ALLOC }; explicit HBWSpace( const AllocationMechanism & ); /**\brief Allocate untracked memory in the space */ - void * allocate( const size_t arg_alloc_size ) const ; + void * allocate( const size_t arg_alloc_size ) const; /**\brief Deallocate untracked memory in the space */ - void deallocate( void * const arg_alloc_ptr - , const size_t arg_alloc_size ) const ; + void deallocate( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) const; /**\brief Return Name of the MemorySpace */ static constexpr const char* name(); private: - AllocationMechanism m_alloc_mech ; + AllocationMechanism m_alloc_mech; static constexpr const char* m_name = "HBW"; - friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > ; + friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::HBWSpace, void >; }; } // namespace Experimental + } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { template<> -class SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void > - : public SharedAllocationRecord< void , void > +class SharedAllocationRecord< Kokkos::Experimental::HBWSpace, void > + : public SharedAllocationRecord< void, void > { private: - friend Kokkos::Experimental::HBWSpace ; + friend Kokkos::Experimental::HBWSpace; - typedef SharedAllocationRecord< void , void > RecordBase ; + typedef SharedAllocationRecord< void, void > RecordBase; - SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; - SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord( const SharedAllocationRecord & ) = delete; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete; static void deallocate( RecordBase * ); /**\brief Root record for tracked allocations from this HBWSpace instance */ - static RecordBase s_root_record ; + static RecordBase s_root_record; - const Kokkos::Experimental::HBWSpace m_space ; + const Kokkos::Experimental::HBWSpace m_space; protected: ~SharedAllocationRecord(); - SharedAllocationRecord() = default ; + SharedAllocationRecord() = default; - SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space - , const std::string & arg_label - , const size_t arg_alloc_size - , const RecordBase::function_type arg_dealloc = & deallocate + SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size + , const RecordBase::function_type arg_dealloc = & deallocate ); public: @@ -206,23 +215,23 @@ public: } KOKKOS_INLINE_FUNCTION static - SharedAllocationRecord * allocate( const Kokkos::Experimental::HBWSpace & arg_space - , const std::string & arg_label - , const size_t arg_alloc_size + SharedAllocationRecord * allocate( const Kokkos::Experimental::HBWSpace & arg_space + , const std::string & arg_label + , const size_t arg_alloc_size ) { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size ); + return new SharedAllocationRecord( arg_space, arg_label, arg_alloc_size ); #else - return (SharedAllocationRecord *) 0 ; + return (SharedAllocationRecord *) 0; #endif } /**\brief Allocate tracked memory in the space */ static void * allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space - , const std::string & arg_label - , const size_t arg_alloc_size ); + , const std::string & arg_label + , const size_t arg_alloc_size ); /**\brief Reallocate tracked memory in the space */ static @@ -233,88 +242,93 @@ public: static void deallocate_tracked( void * const arg_alloc_ptr ); - static SharedAllocationRecord * get_record( void * arg_alloc_ptr ); - static void print_records( std::ostream & , const Kokkos::Experimental::HBWSpace & , bool detail = false ); + static void print_records( std::ostream &, const Kokkos::Experimental::HBWSpace &, bool detail = false ); }; } // namespace Impl -} // namespace Kokkos +} // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { -static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::HBWSpace , Kokkos::Experimental::HBWSpace >::assignable , "" ); +static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::HBWSpace, Kokkos::Experimental::HBWSpace >::assignable, "" ); template<> -struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::Experimental::HBWSpace > { +struct MemorySpaceAccess< Kokkos::HostSpace, Kokkos::Experimental::HBWSpace > { enum { assignable = true }; enum { accessible = true }; enum { deepcopy = true }; }; template<> -struct MemorySpaceAccess< Kokkos::Experimental::HBWSpace , Kokkos::HostSpace> { +struct MemorySpaceAccess< Kokkos::Experimental::HBWSpace, Kokkos::HostSpace > { enum { assignable = false }; enum { accessible = true }; enum { deepcopy = true }; }; -}} +} // namespace Impl + +} // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Impl { +namespace Impl { -template<class ExecutionSpace> -struct DeepCopy<Experimental::HBWSpace,Experimental::HBWSpace,ExecutionSpace> { - DeepCopy( void * dst , const void * src , size_t n ) { - memcpy( dst , src , n ); +template< class ExecutionSpace > +struct DeepCopy< Experimental::HBWSpace, Experimental::HBWSpace, ExecutionSpace > { + DeepCopy( void * dst, const void * src, size_t n ) { + memcpy( dst, src, n ); } - DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + + DeepCopy( const ExecutionSpace& exec, void * dst, const void * src, size_t n ) { exec.fence(); - memcpy( dst , src , n ); + memcpy( dst, src, n ); } }; -template<class ExecutionSpace> -struct DeepCopy<HostSpace,Experimental::HBWSpace,ExecutionSpace> { - DeepCopy( void * dst , const void * src , size_t n ) { - memcpy( dst , src , n ); +template< class ExecutionSpace > +struct DeepCopy< HostSpace, Experimental::HBWSpace, ExecutionSpace > { + DeepCopy( void * dst, const void * src, size_t n ) { + memcpy( dst, src, n ); } - DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + + DeepCopy( const ExecutionSpace& exec, void * dst, const void * src, size_t n ) { exec.fence(); - memcpy( dst , src , n ); + memcpy( dst, src, n ); } }; -template<class ExecutionSpace> -struct DeepCopy<Experimental::HBWSpace,HostSpace,ExecutionSpace> { - DeepCopy( void * dst , const void * src , size_t n ) { - memcpy( dst , src , n ); +template< class ExecutionSpace > +struct DeepCopy< Experimental::HBWSpace, HostSpace, ExecutionSpace > { + DeepCopy( void * dst, const void * src, size_t n ) { + memcpy( dst, src, n ); } - DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + + DeepCopy( const ExecutionSpace& exec, void * dst, const void * src, size_t n ) { exec.fence(); - memcpy( dst , src , n ); + memcpy( dst, src, n ); } }; } // namespace Impl + } // namespace Kokkos namespace Kokkos { + namespace Impl { template<> -struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::HBWSpace > +struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace, Kokkos::Experimental::HBWSpace > { enum { value = true }; inline static void verify( void ) { } @@ -322,7 +336,7 @@ struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experime }; template<> -struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::HBWSpace , Kokkos::HostSpace > +struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::HBWSpace, Kokkos::HostSpace > { enum { value = true }; inline static void verify( void ) { } @@ -330,8 +344,9 @@ struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::HBWSpace , Kok }; } // namespace Impl + } // namespace Kokkos #endif -#endif /* #define KOKKOS_HBWSPACE_HPP */ +#endif // #define KOKKOS_HBWSPACE_HPP diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp index e79de462bfe354fe5f7eb77100cdcc4e7aca2aef..82006665ce0a6a4ba37ae88ad8e7456d4c75101a 100644 --- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -60,6 +60,7 @@ /*--------------------------------------------------------------------------*/ namespace Kokkos { + namespace Impl { /// \brief Initialize lock array for arbitrary size atomics. @@ -83,9 +84,10 @@ bool lock_address_host_space(void* ptr); /// from the provided ptr. This function should only be called /// after previously successfully aquiring a lock with /// lock_address. -void unlock_address_host_space(void* ptr); +void unlock_address_host_space( void* ptr ); } // namespace Impl + } // namespace Kokkos namespace Kokkos { @@ -97,10 +99,9 @@ namespace Kokkos { /// memory means the usual CPU-accessible memory. class HostSpace { public: - //! Tag this class as a kokkos memory space - typedef HostSpace memory_space ; - typedef size_t size_type ; + typedef HostSpace memory_space; + typedef size_t size_type; /// \typedef execution_space /// \brief Default execution space for this memory space. @@ -109,21 +110,25 @@ public: /// useful for things like initializing a View (which happens in /// parallel using the View's default execution space). #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) - typedef Kokkos::OpenMP execution_space ; + typedef Kokkos::OpenMP execution_space; #elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) - typedef Kokkos::Threads execution_space ; + typedef Kokkos::Threads execution_space; +//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +// typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_OPENMP ) - typedef Kokkos::OpenMP execution_space ; + typedef Kokkos::OpenMP execution_space; #elif defined( KOKKOS_ENABLE_PTHREAD ) - typedef Kokkos::Threads execution_space ; + typedef Kokkos::Threads execution_space; +//#elif defined( KOKKOS_ENABLE_QTHREADS ) +// typedef Kokkos::Qthreads execution_space; #elif defined( KOKKOS_ENABLE_SERIAL ) - typedef Kokkos::Serial execution_space ; + typedef Kokkos::Serial execution_space; #else -# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Serial, or Kokkos::Threads. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." +# error "At least one of the following host execution spaces must be defined: Kokkos::OpenMP, Kokkos::Threads, Kokkos::Qthreads, or Kokkos::Serial. You might be seeing this message if you disabled the Kokkos::Serial device explicitly using the Kokkos_ENABLE_Serial:BOOL=OFF CMake option, but did not enable any of the other host execution space devices." #endif //! This memory space preferred device_type - typedef Kokkos::Device<execution_space,memory_space> device_type; + typedef Kokkos::Device< execution_space, memory_space > device_type; /*--------------------------------*/ /* Functions unique to the HostSpace */ @@ -135,61 +140,57 @@ public: /**\brief Default memory space instance */ HostSpace(); - HostSpace( HostSpace && rhs ) = default ; - HostSpace( const HostSpace & rhs ) = default ; - HostSpace & operator = ( HostSpace && ) = default ; - HostSpace & operator = ( const HostSpace & ) = default ; - ~HostSpace() = default ; + HostSpace( HostSpace && rhs ) = default; + HostSpace( const HostSpace & rhs ) = default; + HostSpace & operator = ( HostSpace && ) = default; + HostSpace & operator = ( const HostSpace & ) = default; + ~HostSpace() = default; /**\brief Non-default memory space instance to choose allocation mechansim, if available */ - enum AllocationMechanism { STD_MALLOC , POSIX_MEMALIGN , POSIX_MMAP , INTEL_MM_ALLOC }; + enum AllocationMechanism { STD_MALLOC, POSIX_MEMALIGN, POSIX_MMAP, INTEL_MM_ALLOC }; explicit HostSpace( const AllocationMechanism & ); /**\brief Allocate untracked memory in the space */ - void * allocate( const size_t arg_alloc_size ) const ; + void * allocate( const size_t arg_alloc_size ) const; /**\brief Deallocate untracked memory in the space */ - void deallocate( void * const arg_alloc_ptr - , const size_t arg_alloc_size ) const ; + void deallocate( void * const arg_alloc_ptr + , const size_t arg_alloc_size ) const; /**\brief Return Name of the MemorySpace */ static constexpr const char* name(); private: - - AllocationMechanism m_alloc_mech ; + AllocationMechanism m_alloc_mech; static constexpr const char* m_name = "Host"; - friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > ; + friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::HostSpace, void >; }; } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Impl { -static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::HostSpace >::assignable , "" ); +namespace Impl { +static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::HostSpace >::assignable, "" ); template< typename S > struct HostMirror { private: - // If input execution space can access HostSpace then keep it. // Example: Kokkos::OpenMP can access, Kokkos::Cuda cannot enum { keep_exe = Kokkos::Impl::MemorySpaceAccess - < typename S::execution_space::memory_space , Kokkos::HostSpace > - ::accessible }; + < typename S::execution_space::memory_space, Kokkos::HostSpace >::accessible }; // If HostSpace can access memory space then keep it. // Example: Cannot access Kokkos::CudaSpace, can access Kokkos::CudaUVMSpace enum { keep_mem = Kokkos::Impl::MemorySpaceAccess - < Kokkos::HostSpace , typename S::memory_space >::accessible }; + < Kokkos::HostSpace, typename S::memory_space >::accessible }; public: @@ -202,42 +203,41 @@ public: , typename S::memory_space > , Kokkos::HostSpace >::type - >::type Space ; + >::type Space; }; } // namespace Impl + } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { template<> -class SharedAllocationRecord< Kokkos::HostSpace , void > - : public SharedAllocationRecord< void , void > +class SharedAllocationRecord< Kokkos::HostSpace, void > + : public SharedAllocationRecord< void, void > { private: + friend Kokkos::HostSpace; - friend Kokkos::HostSpace ; - - typedef SharedAllocationRecord< void , void > RecordBase ; + typedef SharedAllocationRecord< void, void > RecordBase; - SharedAllocationRecord( const SharedAllocationRecord & ) = delete ; - SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ; + SharedAllocationRecord( const SharedAllocationRecord & ) = delete; + SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete; static void deallocate( RecordBase * ); /**\brief Root record for tracked allocations from this HostSpace instance */ - static RecordBase s_root_record ; + static RecordBase s_root_record; - const Kokkos::HostSpace m_space ; + const Kokkos::HostSpace m_space; protected: - ~SharedAllocationRecord(); - SharedAllocationRecord() = default ; + SharedAllocationRecord() = default; SharedAllocationRecord( const Kokkos::HostSpace & arg_space , const std::string & arg_label @@ -249,22 +249,23 @@ public: inline std::string get_label() const - { - return std::string( RecordBase::head()->m_label ); - } + { + return std::string( RecordBase::head()->m_label ); + } KOKKOS_INLINE_FUNCTION static SharedAllocationRecord * allocate( const Kokkos::HostSpace & arg_space , const std::string & arg_label , const size_t arg_alloc_size ) - { + { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - return new SharedAllocationRecord( arg_space , arg_label , arg_alloc_size ); + return new SharedAllocationRecord( arg_space, arg_label, arg_alloc_size ); #else - return (SharedAllocationRecord *) 0 ; + return (SharedAllocationRecord *) 0; #endif - } + } + /**\brief Allocate tracked memory in the space */ static @@ -281,37 +282,37 @@ public: static void deallocate_tracked( void * const arg_alloc_ptr ); - static SharedAllocationRecord * get_record( void * arg_alloc_ptr ); - static void print_records( std::ostream & , const Kokkos::HostSpace & , bool detail = false ); + static void print_records( std::ostream &, const Kokkos::HostSpace &, bool detail = false ); }; } // namespace Impl + } // namespace Kokkos -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { + namespace Impl { -template< class DstSpace, class SrcSpace, class ExecutionSpace = typename DstSpace::execution_space> struct DeepCopy ; +template< class DstSpace, class SrcSpace, class ExecutionSpace = typename DstSpace::execution_space > struct DeepCopy; -template<class ExecutionSpace> -struct DeepCopy<HostSpace,HostSpace,ExecutionSpace> { - DeepCopy( void * dst , const void * src , size_t n ) { - memcpy( dst , src , n ); +template< class ExecutionSpace > +struct DeepCopy< HostSpace, HostSpace, ExecutionSpace > { + DeepCopy( void * dst, const void * src, size_t n ) { + memcpy( dst, src, n ); } - DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n ) { + + DeepCopy( const ExecutionSpace& exec, void * dst, const void * src, size_t n ) { exec.fence(); - memcpy( dst , src , n ); + memcpy( dst, src, n ); } }; } // namespace Impl -} // namespace Kokkos - -#endif /* #define KOKKOS_HOSTSPACE_HPP */ +} // namespace Kokkos +#endif // #define KOKKOS_HOSTSPACE_HPP diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp index 52845b9e093bcc6cd363b144ac59df0bda8bb124..c138b08c94a5a9f93e7faeb067283a221486cb4a 100644 --- a/lib/kokkos/core/src/Kokkos_Macros.hpp +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -45,22 +45,20 @@ #define KOKKOS_MACROS_HPP //---------------------------------------------------------------------------- -/** Pick up configure/build options via #define macros: +/** Pick up configure / build options via #define macros: * * KOKKOS_ENABLE_CUDA Kokkos::Cuda execution and memory spaces * KOKKOS_ENABLE_PTHREAD Kokkos::Threads execution space - * KOKKOS_ENABLE_QTHREAD Kokkos::Qthread execution space - * KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space - * KOKKOS_ENABLE_HWLOC HWLOC library is available - * KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK insert array bounds checks, is expensive! - * - * KOKKOS_ENABLE_MPI negotiate MPI/execution space interactions - * - * KOKKOS_ENABLE_CUDA_UVM Use CUDA UVM for Cuda memory space + * KOKKOS_ENABLE_QTHREADS Kokkos::Qthreads execution space + * KOKKOS_ENABLE_OPENMP Kokkos::OpenMP execution space + * KOKKOS_ENABLE_HWLOC HWLOC library is available. + * KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK Insert array bounds checks, is expensive! + * KOKKOS_ENABLE_MPI Negotiate MPI/execution space interactions. + * KOKKOS_ENABLE_CUDA_UVM Use CUDA UVM for Cuda memory space. */ #ifndef KOKKOS_DONT_INCLUDE_CORE_CONFIG_H -#include <KokkosCore_config.h> + #include <KokkosCore_config.h> #endif #include <impl/Kokkos_OldMacros.hpp> @@ -86,7 +84,7 @@ * KOKKOS_ENABLE_INTEL_ATOMICS * KOKKOS_ENABLE_OPENMP_ATOMICS * - * A suite of 'KOKKOS_HAVE_PRAGMA_...' are defined for internal use. + * A suite of 'KOKKOS_ENABLE_PRAGMA_...' are defined for internal use. * * Macros for marking functions to run in an execution space: * @@ -98,64 +96,63 @@ //---------------------------------------------------------------------------- #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) + // Compiling with a CUDA compiler. + // + // Include <cuda.h> to pick up the CUDA_VERSION macro defined as: + // CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 ) + // + // When generating device code the __CUDA_ARCH__ macro is defined as: + // __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 ) + + #include <cuda_runtime.h> + #include <cuda.h> + + #if !defined( CUDA_VERSION ) + #error "#include <cuda.h> did not define CUDA_VERSION." + #endif -/* Compiling with a CUDA compiler. - * - * Include <cuda.h> to pick up the CUDA_VERSION macro defined as: - * CUDA_VERSION = ( MAJOR_VERSION * 1000 ) + ( MINOR_VERSION * 10 ) - * - * When generating device code the __CUDA_ARCH__ macro is defined as: - * __CUDA_ARCH__ = ( MAJOR_CAPABILITY * 100 ) + ( MINOR_CAPABILITY * 10 ) - */ + #if ( CUDA_VERSION < 7000 ) + // CUDA supports C++11 in device code starting with version 7.0. + // This includes auto type and device code internal lambdas. + #error "Cuda version 7.0 or greater required." + #endif -#include <cuda_runtime.h> -#include <cuda.h> + #if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 ) + // Compiling with CUDA compiler for device code. + #error "Cuda device capability >= 3.0 is required." + #endif -#if ! defined( CUDA_VERSION ) -#error "#include <cuda.h> did not define CUDA_VERSION" -#endif + #ifdef KOKKOS_ENABLE_CUDA_LAMBDA + #if ( CUDA_VERSION < 7050 ) + // CUDA supports C++11 lambdas generated in host code to be given + // to the device starting with version 7.5. But the release candidate (7.5.6) + // still identifies as 7.0. + #error "Cuda version 7.5 or greater required for host-to-device Lambda support." + #endif -#if ( CUDA_VERSION < 7000 ) -// CUDA supports C++11 in device code starting with -// version 7.0. This includes auto type and device code internal -// lambdas. -#error "Cuda version 7.0 or greater required" -#endif + #if ( CUDA_VERSION < 8000 ) && defined( __NVCC__ ) + #define KOKKOS_LAMBDA [=]__device__ + #else + #define KOKKOS_LAMBDA [=]__host__ __device__ -#if defined( __CUDA_ARCH__ ) && ( __CUDA_ARCH__ < 300 ) -/* Compiling with CUDA compiler for device code. */ -#error "Cuda device capability >= 3.0 is required" -#endif + #if defined( KOKKOS_ENABLE_CXX1Z ) + #define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__ + #endif + #endif -#ifdef KOKKOS_ENABLE_CUDA_LAMBDA -#if ( CUDA_VERSION < 7050 ) - // CUDA supports C++11 lambdas generated in host code to be given - // to the device starting with version 7.5. But the release candidate (7.5.6) - // still identifies as 7.0 - #error "Cuda version 7.5 or greater required for host-to-device Lambda support" -#endif -#if ( CUDA_VERSION < 8000 ) && defined(__NVCC__) - #define KOKKOS_LAMBDA [=]__device__ -#else - #define KOKKOS_LAMBDA [=]__host__ __device__ - #if defined( KOKKOS_ENABLE_CXX1Z ) - #define KOKKOS_CLASS_LAMBDA [=,*this] __host__ __device__ + #define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1 #endif -#endif -#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1 -#endif -#endif /* #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) */ +#endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ ) - -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) // Cuda version 8.0 still needs the functor wrapper - #if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA /* && (CUDA_VERSION < 8000) */ ) && defined(__NVCC__) + #if /* ( CUDA_VERSION < 8000 ) && */ defined( __NVCC__ ) #define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER #endif #endif -/*--------------------------------------------------------------------------*/ -/* Language info: C++, CUDA, OPENMP */ +//---------------------------------------------------------------------------- +// Language info: C++, CUDA, OPENMP #if defined( KOKKOS_ENABLE_CUDA ) // Compiling Cuda code to 'ptx' @@ -163,20 +160,17 @@ #define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__ #define KOKKOS_INLINE_FUNCTION __device__ __host__ inline #define KOKKOS_FUNCTION __device__ __host__ -#endif /* #if defined( __CUDA_ARCH__ ) */ +#endif // #if defined( __CUDA_ARCH__ ) #if defined( _OPENMP ) + // Compiling with OpenMP. + // The value of _OPENMP is an integer value YYYYMM + // where YYYY and MM are the year and month designation + // of the supported OpenMP API version. +#endif // #if defined( _OPENMP ) - /* Compiling with OpenMP. - * The value of _OPENMP is an integer value YYYYMM - * where YYYY and MM are the year and month designation - * of the supported OpenMP API version. - */ - -#endif /* #if defined( _OPENMP ) */ - -/*--------------------------------------------------------------------------*/ -/* Mapping compiler built-ins to KOKKOS_COMPILER_*** macros */ +//---------------------------------------------------------------------------- +// Mapping compiler built-ins to KOKKOS_COMPILER_*** macros #if defined( __NVCC__ ) // NVIDIA compiler is being used. @@ -184,29 +178,28 @@ // Host code is compiled again with another compiler. // Device code is compile to 'ptx'. #define KOKKOS_COMPILER_NVCC __NVCC__ - #else -#if ! defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) - #if !defined (KOKKOS_ENABLE_CUDA) // Compiling with clang for Cuda does not work with LAMBDAs either - // CUDA (including version 6.5) does not support giving lambdas as - // arguments to global functions. Thus its not currently possible - // to dispatch lambdas from the host. - #define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1 + #if !defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) + #if !defined( KOKKOS_ENABLE_CUDA ) // Compiling with clang for Cuda does not work with LAMBDAs either + // CUDA (including version 6.5) does not support giving lambdas as + // arguments to global functions. Thus its not currently possible + // to dispatch lambdas from the host. + #define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1 #endif #endif -#endif /* #if defined( __NVCC__ ) */ +#endif // #if defined( __NVCC__ ) -#if !defined (KOKKOS_LAMBDA) +#if !defined( KOKKOS_LAMBDA ) #define KOKKOS_LAMBDA [=] #endif -#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined (KOKKOS_CLASS_LAMBDA) +#if defined( KOKKOS_ENABLE_CXX1Z ) && !defined( KOKKOS_CLASS_LAMBDA ) #define KOKKOS_CLASS_LAMBDA [=,*this] #endif -//#if ! defined( __CUDA_ARCH__ ) /* Not compiling Cuda code to 'ptx'. */ +//#if !defined( __CUDA_ARCH__ ) // Not compiling Cuda code to 'ptx'. -/* Intel compiler for host code */ +// Intel compiler for host code. #if defined( __INTEL_COMPILER ) #define KOKKOS_COMPILER_INTEL __INTEL_COMPILER @@ -218,7 +211,7 @@ #define KOKKOS_COMPILER_INTEL __ECC #endif -/* CRAY compiler for host code */ +// CRAY compiler for host code #if defined( _CRAYC ) #define KOKKOS_COMPILER_CRAYC _CRAYC #endif @@ -234,50 +227,53 @@ #define KOKKOS_COMPILER_APPLECC __APPLE_CC__ #endif -#if defined (__clang__) && !defined (KOKKOS_COMPILER_INTEL) +#if defined( __clang__ ) && !defined( KOKKOS_COMPILER_INTEL ) #define KOKKOS_COMPILER_CLANG __clang_major__*100+__clang_minor__*10+__clang_patchlevel__ #endif -#if ! defined( __clang__ ) && ! defined( KOKKOS_COMPILER_INTEL ) &&defined( __GNUC__ ) +#if !defined( __clang__ ) && !defined( KOKKOS_COMPILER_INTEL ) &&defined( __GNUC__ ) #define KOKKOS_COMPILER_GNU __GNUC__*100+__GNUC_MINOR__*10+__GNUC_PATCHLEVEL__ + #if ( 472 > KOKKOS_COMPILER_GNU ) #error "Compiling with GCC version earlier than 4.7.2 is not supported." #endif #endif -#if defined( __PGIC__ ) && ! defined( __GNUC__ ) +#if defined( __PGIC__ ) && !defined( __GNUC__ ) #define KOKKOS_COMPILER_PGI __PGIC__*100+__PGIC_MINOR__*10+__PGIC_PATCHLEVEL__ + #if ( 1540 > KOKKOS_COMPILER_PGI ) #error "Compiling with PGI version earlier than 15.4 is not supported." #endif #endif -//#endif /* #if ! defined( __CUDA_ARCH__ ) */ +//#endif // #if !defined( __CUDA_ARCH__ ) -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ -/* Intel compiler macros */ +//---------------------------------------------------------------------------- +// Intel compiler macros #if defined( KOKKOS_COMPILER_INTEL ) - #define KOKKOS_ENABLE_PRAGMA_UNROLL 1 - #define KOKKOS_ENABLE_PRAGMA_IVDEP 1 #define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 #define KOKKOS_ENABLE_PRAGMA_VECTOR 1 #define KOKKOS_ENABLE_PRAGMA_SIMD 1 + #if ( __INTEL_COMPILER > 1400 ) + #define KOKKOS_ENABLE_PRAGMA_IVDEP 1 + #endif + #define KOKKOS_RESTRICT __restrict__ #ifndef KOKKOS_ALIGN - #define KOKKOS_ALIGN(size) __attribute__((aligned(size))) + #define KOKKOS_ALIGN(size) __attribute__((aligned(size))) #endif #ifndef KOKKOS_ALIGN_PTR - #define KOKKOS_ALIGN_PTR(size) __attribute__((align_value(size))) + #define KOKKOS_ALIGN_PTR(size) __attribute__((align_value(size))) #endif #ifndef KOKKOS_ALIGN_SIZE - #define KOKKOS_ALIGN_SIZE 64 + #define KOKKOS_ALIGN_SIZE 64 #endif #if ( 1400 > KOKKOS_COMPILER_INTEL ) @@ -287,12 +283,13 @@ #warning "Compiling with Intel version 13.x probably works but is not officially supported. Official minimal version is 14.0." #endif #endif - #if ! defined( KOKKOS_ENABLE_ASM ) && ! defined( _WIN32 ) + + #if !defined( KOKKOS_ENABLE_ASM ) && !defined( _WIN32 ) #define KOKKOS_ENABLE_ASM 1 #endif - #if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) - #if !defined (_WIN32) + #if !defined( KOKKOS_FORCEINLINE_FUNCTION ) + #if !defined( _WIN32 ) #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #else #define KOKKOS_FORCEINLINE_FUNCTION inline @@ -302,192 +299,170 @@ #if defined( __MIC__ ) // Compiling for Xeon Phi #endif - #endif -/*--------------------------------------------------------------------------*/ -/* Cray compiler macros */ +//---------------------------------------------------------------------------- +// Cray compiler macros #if defined( KOKKOS_COMPILER_CRAYC ) - - #endif -/*--------------------------------------------------------------------------*/ -/* IBM Compiler macros */ +//---------------------------------------------------------------------------- +// IBM Compiler macros #if defined( KOKKOS_COMPILER_IBM ) - #define KOKKOS_ENABLE_PRAGMA_UNROLL 1 //#define KOKKOS_ENABLE_PRAGMA_IVDEP 1 //#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 //#define KOKKOS_ENABLE_PRAGMA_VECTOR 1 //#define KOKKOS_ENABLE_PRAGMA_SIMD 1 - #endif -/*--------------------------------------------------------------------------*/ -/* CLANG compiler macros */ +//---------------------------------------------------------------------------- +// CLANG compiler macros #if defined( KOKKOS_COMPILER_CLANG ) - //#define KOKKOS_ENABLE_PRAGMA_UNROLL 1 //#define KOKKOS_ENABLE_PRAGMA_IVDEP 1 //#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 //#define KOKKOS_ENABLE_PRAGMA_VECTOR 1 //#define KOKKOS_ENABLE_PRAGMA_SIMD 1 - #if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) + #if !defined( KOKKOS_FORCEINLINE_FUNCTION ) #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #endif - #endif -/*--------------------------------------------------------------------------*/ -/* GNU Compiler macros */ +//---------------------------------------------------------------------------- +// GNU Compiler macros #if defined( KOKKOS_COMPILER_GNU ) - //#define KOKKOS_ENABLE_PRAGMA_UNROLL 1 //#define KOKKOS_ENABLE_PRAGMA_IVDEP 1 //#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 //#define KOKKOS_ENABLE_PRAGMA_VECTOR 1 //#define KOKKOS_ENABLE_PRAGMA_SIMD 1 - #if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) + #if !defined( KOKKOS_FORCEINLINE_FUNCTION ) #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #endif - #if ! defined( KOKKOS_ENABLE_ASM ) && ! defined( __PGIC__ ) && \ - ( defined( __amd64 ) || \ - defined( __amd64__ ) || \ - defined( __x86_64 ) || \ - defined( __x86_64__ ) ) + #if !defined( KOKKOS_ENABLE_ASM ) && !defined( __PGIC__ ) && \ + ( defined( __amd64 ) || defined( __amd64__ ) || \ + defined( __x86_64 ) || defined( __x86_64__ ) ) #define KOKKOS_ENABLE_ASM 1 #endif - #endif -/*--------------------------------------------------------------------------*/ +//---------------------------------------------------------------------------- #if defined( KOKKOS_COMPILER_PGI ) - #define KOKKOS_ENABLE_PRAGMA_UNROLL 1 #define KOKKOS_ENABLE_PRAGMA_IVDEP 1 //#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1 #define KOKKOS_ENABLE_PRAGMA_VECTOR 1 //#define KOKKOS_ENABLE_PRAGMA_SIMD 1 - #endif -/*--------------------------------------------------------------------------*/ +//---------------------------------------------------------------------------- #if defined( KOKKOS_COMPILER_NVCC ) - - #if defined(__CUDA_ARCH__ ) + #if defined( __CUDA_ARCH__ ) #define KOKKOS_ENABLE_PRAGMA_UNROLL 1 #endif - #endif //---------------------------------------------------------------------------- -/** Define function marking macros if compiler specific macros are undefined: */ +// Define function marking macros if compiler specific macros are undefined: -#if ! defined( KOKKOS_FORCEINLINE_FUNCTION ) -#define KOKKOS_FORCEINLINE_FUNCTION inline +#if !defined( KOKKOS_FORCEINLINE_FUNCTION ) + #define KOKKOS_FORCEINLINE_FUNCTION inline #endif -#if ! defined( KOKKOS_INLINE_FUNCTION ) -#define KOKKOS_INLINE_FUNCTION inline +#if !defined( KOKKOS_INLINE_FUNCTION ) + #define KOKKOS_INLINE_FUNCTION inline #endif -#if ! defined( KOKKOS_FUNCTION ) -#define KOKKOS_FUNCTION /**/ +#if !defined( KOKKOS_FUNCTION ) + #define KOKKOS_FUNCTION /**/ #endif - //---------------------------------------------------------------------------- -///** Define empty macro for restrict if necessary: */ +// Define empty macro for restrict if necessary: -#if ! defined(KOKKOS_RESTRICT) -#define KOKKOS_RESTRICT +#if !defined( KOKKOS_RESTRICT ) + #define KOKKOS_RESTRICT #endif //---------------------------------------------------------------------------- -/** Define Macro for alignment: */ -#if ! defined KOKKOS_ALIGN_SIZE -#define KOKKOS_ALIGN_SIZE 16 -#endif +// Define Macro for alignment: -#if ! defined(KOKKOS_ALIGN) -#define KOKKOS_ALIGN(size) __attribute__((aligned(size))) +#if !defined KOKKOS_ALIGN_SIZE + #define KOKKOS_ALIGN_SIZE 16 #endif -#if ! defined(KOKKOS_ALIGN_PTR) -#define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size))) +#if !defined( KOKKOS_ALIGN ) + #define KOKKOS_ALIGN(size) __attribute__((aligned(size))) #endif -//---------------------------------------------------------------------------- -/** Determine the default execution space for parallel dispatch. - * There is zero or one default execution space specified. - */ - -#if 1 < ( ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \ - ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \ - ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \ - ( defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) ) - -#error "More than one KOKKOS_HAVE_DEFAULT_DEVICE_TYPE_* specified" ; - +#if !defined( KOKKOS_ALIGN_PTR ) + #define KOKKOS_ALIGN_PTR(size) __attribute__((aligned(size))) #endif -/** If default is not specified then chose from enabled execution spaces. - * Priority: CUDA, OPENMP, THREADS, SERIAL - */ -#if defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) -#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) -#elif defined ( KOKKOS_ENABLE_CUDA ) -#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA -#elif defined ( KOKKOS_ENABLE_OPENMP ) -#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP -#elif defined ( KOKKOS_ENABLE_PTHREAD ) -#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS +//---------------------------------------------------------------------------- +// Determine the default execution space for parallel dispatch. +// There is zero or one default execution space specified. + +#if 1 < ( ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) ? 1 : 0 ) + \ + ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) ? 1 : 0 ) ) + #error "More than one KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_* specified." +#endif + +// If default is not specified then chose from enabled execution spaces. +// Priority: CUDA, OPENMP, THREADS, QTHREADS, SERIAL +#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) +//#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) +#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) +#elif defined( KOKKOS_ENABLE_CUDA ) + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA +#elif defined( KOKKOS_ENABLE_OPENMP ) + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP +#elif defined( KOKKOS_ENABLE_PTHREAD ) + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS +//#elif defined( KOKKOS_ENABLE_QTHREADS ) +// #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS #else -#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL + #define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL #endif //---------------------------------------------------------------------------- -/** Determine for what space the code is being compiled: */ +// Determine for what space the code is being compiled: -#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined (KOKKOS_ENABLE_CUDA) -#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA +#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA ) + #define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA #else -#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST + #define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST #endif -//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- #if ( defined( _POSIX_C_SOURCE ) && _POSIX_C_SOURCE >= 200112L ) || \ ( defined( _XOPEN_SOURCE ) && _XOPEN_SOURCE >= 600 ) -#if defined(KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN) -#define KOKKOS_ENABLE_POSIX_MEMALIGN 1 -#endif + #if defined( KOKKOS_ENABLE_PERFORMANCE_POSIX_MEMALIGN ) + #define KOKKOS_ENABLE_POSIX_MEMALIGN 1 + #endif #endif //---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -/**Enable Profiling by default**/ +// Enable Profiling by default #ifndef KOKKOS_ENABLE_PROFILING -#define KOKKOS_ENABLE_PROFILING 1 + #define KOKKOS_ENABLE_PROFILING 1 #endif -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #ifndef KOKKOS_MACROS_HPP */ - +#endif // #ifndef KOKKOS_MACROS_HPP diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp index 2d45926e762acd61ba7f308a80c2d7f922267ffe..eadad10b4991db1e98410f8eafcd77ad9bc87db0 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -1294,6 +1294,7 @@ public: KOKKOS_INLINE_FUNCTION size_t get_min_block_size() const { return MIN_BLOCK_SIZE; } + KOKKOS_INLINE_FUNCTION size_t get_mem_size() const { return m_data_size; } private: diff --git a/lib/kokkos/core/src/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/Kokkos_OpenMP.hpp index a337d1a9d4a02fcdaae38a6f402301d1a6a9ec03..c0c43b92f4d72f4fb6ae5ba95dc5270887f1cd32 100644 --- a/lib/kokkos/core/src/Kokkos_OpenMP.hpp +++ b/lib/kokkos/core/src/Kokkos_OpenMP.hpp @@ -66,7 +66,6 @@ #include <Kokkos_Layout.hpp> #include <impl/Kokkos_Tags.hpp> -#include <KokkosExp_MDRangePolicy.hpp> /*--------------------------------------------------------------------------*/ namespace Kokkos { @@ -196,6 +195,7 @@ struct VerifyExecutionCanAccessMemorySpace #include <OpenMP/Kokkos_OpenMP_Parallel.hpp> #include <OpenMP/Kokkos_OpenMP_Task.hpp> +#include <KokkosExp_MDRangePolicy.hpp> /*--------------------------------------------------------------------------*/ #endif /* #if defined( KOKKOS_ENABLE_OPENMP ) && defined( _OPENMP ) */ diff --git a/lib/kokkos/core/src/Kokkos_Pair.hpp b/lib/kokkos/core/src/Kokkos_Pair.hpp index 83436826f4aded7131802662327d6b80c5b5c785..067767f2f83f1739fb3a40bd800300c2078c3b28 100644 --- a/lib/kokkos/core/src/Kokkos_Pair.hpp +++ b/lib/kokkos/core/src/Kokkos_Pair.hpp @@ -78,16 +78,14 @@ struct pair /// This calls the default constructors of T1 and T2. It won't /// compile if those default constructors are not defined and /// public. - KOKKOS_FORCEINLINE_FUNCTION - pair() - : first(), second() - {} + KOKKOS_FORCEINLINE_FUNCTION constexpr + pair() = default ; /// \brief Constructor that takes both elements of the pair. /// /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(first_type const& f, second_type const& s) : first(f), second(s) {} @@ -97,7 +95,7 @@ struct pair /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template <class U, class V> - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const pair<U,V> &p) : first(p.first), second(p.second) {} @@ -107,7 +105,7 @@ struct pair /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template <class U, class V> - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const volatile pair<U,V> &p) : first(p.first), second(p.second) {} @@ -183,7 +181,7 @@ struct pair<T1&, T2&> /// /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(first_type f, second_type s) : first(f), second(s) {} @@ -193,7 +191,7 @@ struct pair<T1&, T2&> /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template <class U, class V> - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const pair<U,V> &p) : first(p.first), second(p.second) {} @@ -247,7 +245,7 @@ struct pair<T1, T2&> /// /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(first_type const& f, second_type s) : first(f), second(s) {} @@ -257,7 +255,7 @@ struct pair<T1, T2&> /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template <class U, class V> - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const pair<U,V> &p) : first(p.first), second(p.second) {} @@ -311,7 +309,7 @@ struct pair<T1&, T2> /// /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(first_type f, second_type const& s) : first(f), second(s) {} @@ -321,7 +319,7 @@ struct pair<T1&, T2> /// This calls the copy constructors of T1 and T2. It won't compile /// if those copy constructors are not defined and public. template <class U, class V> - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const pair<U,V> &p) : first(p.first), second(p.second) {} @@ -366,31 +364,31 @@ bool operator== (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) //! Inequality operator for Kokkos::pair. template <class T1, class T2> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator!= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) { return !(lhs==rhs); } //! Less-than operator for Kokkos::pair. template <class T1, class T2> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator< (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) { return lhs.first<rhs.first || (!(rhs.first<lhs.first) && lhs.second<rhs.second); } //! Less-than-or-equal-to operator for Kokkos::pair. template <class T1, class T2> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) { return !(rhs<lhs); } //! Greater-than operator for Kokkos::pair. template <class T1, class T2> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator> (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) { return rhs<lhs; } //! Greater-than-or-equal-to operator for Kokkos::pair. template <class T1, class T2> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) { return !(lhs<rhs); } @@ -399,7 +397,7 @@ bool operator>= (const pair<T1,T2>& lhs, const pair<T1,T2>& rhs) /// This is a "nonmember constructor" for Kokkos::pair. It works just /// like std::make_pair. template <class T1,class T2> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr pair<T1,T2> make_pair (T1 x, T2 y) { return ( pair<T1,T2>(x,y) ); } @@ -460,23 +458,21 @@ struct pair<T1,void> first_type first; enum { second = 0 }; - KOKKOS_FORCEINLINE_FUNCTION - pair() - : first() - {} + KOKKOS_FORCEINLINE_FUNCTION constexpr + pair() = default ; - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(const first_type & f) : first(f) {} - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair(const first_type & f, int) : first(f) {} template <class U> - KOKKOS_FORCEINLINE_FUNCTION + KOKKOS_FORCEINLINE_FUNCTION constexpr pair( const pair<U,void> &p) : first(p.first) {} @@ -495,32 +491,32 @@ struct pair<T1,void> // template <class T1> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator== (const pair<T1,void>& lhs, const pair<T1,void>& rhs) { return lhs.first==rhs.first; } template <class T1> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator!= (const pair<T1,void>& lhs, const pair<T1,void>& rhs) { return !(lhs==rhs); } template <class T1> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator< (const pair<T1,void>& lhs, const pair<T1,void>& rhs) { return lhs.first<rhs.first; } template <class T1> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator<= (const pair<T1,void>& lhs, const pair<T1,void>& rhs) { return !(rhs<lhs); } template <class T1> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator> (const pair<T1,void>& lhs, const pair<T1,void>& rhs) { return rhs<lhs; } template <class T1> -KOKKOS_FORCEINLINE_FUNCTION +KOKKOS_FORCEINLINE_FUNCTION constexpr bool operator>= (const pair<T1,void>& lhs, const pair<T1,void>& rhs) { return !(lhs<rhs); } @@ -528,3 +524,4 @@ bool operator>= (const pair<T1,void>& lhs, const pair<T1,void>& rhs) #endif //KOKKOS_PAIR_HPP + diff --git a/lib/kokkos/core/src/Kokkos_Parallel.hpp b/lib/kokkos/core/src/Kokkos_Parallel.hpp index 64b1502bcc1932338a16bfcb1604eb1887d85cce..e412e608b28ca52f7d7888ea5fc37af721c5b10c 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -52,13 +52,14 @@ #include <Kokkos_View.hpp> #include <Kokkos_ExecPolicy.hpp> -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include <impl/Kokkos_Profiling_Interface.hpp> #include <typeinfo> #endif #include <impl/Kokkos_Tags.hpp> #include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_FunctorAnalysis.hpp> #include <impl/Kokkos_FunctorAdapter.hpp> #ifdef KOKKOS_DEBUG @@ -175,7 +176,7 @@ void parallel_for( const ExecPolicy & policy , typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0 ) { -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID); @@ -185,10 +186,10 @@ void parallel_for( const ExecPolicy & policy Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); Impl::ParallelFor< FunctorType , ExecPolicy > closure( functor , policy ); Kokkos::Impl::shared_allocation_tracking_release_and_enable(); - + closure.execute(); -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelFor(kpID); } @@ -207,20 +208,20 @@ void parallel_for( const size_t work_count execution_space ; typedef RangePolicy< execution_space > policy ; -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif - + Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); Impl::ParallelFor< FunctorType , policy > closure( functor , policy(0,work_count) ); Kokkos::Impl::shared_allocation_tracking_release_and_enable(); closure.execute(); -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelFor(kpID); } @@ -417,7 +418,7 @@ void parallel_scan( const ExecutionPolicy & policy , typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0 ) { -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID); @@ -430,7 +431,7 @@ void parallel_scan( const ExecutionPolicy & policy closure.execute(); -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelScan(kpID); } @@ -450,20 +451,20 @@ void parallel_scan( const size_t work_count typedef Kokkos::RangePolicy< execution_space > policy ; -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID); } #endif - + Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); Impl::ParallelScan< FunctorType , policy > closure( functor , policy(0,work_count) ); Kokkos::Impl::shared_allocation_tracking_release_and_enable(); closure.execute(); -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelScan(kpID); } diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index a3649b4422dc7f581b38f2866f2bacb63b93b631..900dce19fe52b538228fbb2a82cb649f5313ec43 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -1094,7 +1094,7 @@ namespace Impl { const PolicyType& policy, const FunctorType& functor, ReturnType& return_value) { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::beginParallelReduce("" == label ? typeid(FunctorType).name() : label, 0, &kpID); @@ -1116,7 +1116,7 @@ namespace Impl { Kokkos::Impl::shared_allocation_tracking_release_and_enable(); closure.execute(); - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::endParallelReduce(kpID); } diff --git a/lib/kokkos/core/src/Kokkos_Qthread.hpp b/lib/kokkos/core/src/Kokkos_Qthreads.hpp similarity index 72% rename from lib/kokkos/core/src/Kokkos_Qthread.hpp rename to lib/kokkos/core/src/Kokkos_Qthreads.hpp index c58518b0654bb3267a12041a2ab7fef4e2375972..0507552c3f95e7fb63527603c7123a19daee2b14 100644 --- a/lib/kokkos/core/src/Kokkos_Qthread.hpp +++ b/lib/kokkos/core/src/Kokkos_Qthreads.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,57 +36,75 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#ifndef KOKKOS_QTHREAD_HPP -#define KOKKOS_QTHREAD_HPP +#ifndef KOKKOS_QTHREADS_HPP +#define KOKKOS_QTHREADS_HPP + +#include <Kokkos_Core_fwd.hpp> + +#ifdef KOKKOS_ENABLE_QTHREADS + +// Defines to enable experimental Qthreads functionality. +#define QTHREAD_LOCAL_PRIORITY +#define CLONED_TASKS + +#include <qthread.h> #include <cstddef> #include <iosfwd> -#include <Kokkos_Core.hpp> -#include <Kokkos_Layout.hpp> -#include <Kokkos_MemoryTraits.hpp> + #include <Kokkos_HostSpace.hpp> -#include <Kokkos_ExecPolicy.hpp> +#include <Kokkos_ScratchSpace.hpp> +#include <Kokkos_Parallel.hpp> +//#include <Kokkos_MemoryTraits.hpp> +//#include <Kokkos_ExecPolicy.hpp> +//#include <Kokkos_TaskScheduler.hpp> // Uncomment when Tasking working. +#include <Kokkos_Layout.hpp> #include <impl/Kokkos_Tags.hpp> +#include <KokkosExp_MDRangePolicy.hpp> /*--------------------------------------------------------------------------*/ namespace Kokkos { + namespace Impl { -class QthreadExec ; + +class QthreadsExec; + } // namespace Impl + } // namespace Kokkos /*--------------------------------------------------------------------------*/ namespace Kokkos { -/** \brief Execution space supported by Qthread */ -class Qthread { +/** \brief Execution space supported by Qthreads */ +class Qthreads { public: //! \name Type declarations that all Kokkos devices must provide. //@{ //! Tag this class as an execution space - typedef Qthread execution_space ; - typedef Kokkos::HostSpace memory_space ; + typedef Qthreads execution_space; + typedef Kokkos::HostSpace memory_space; //! This execution space preferred device_type - typedef Kokkos::Device<execution_space,memory_space> device_type; + typedef Kokkos::Device< execution_space, memory_space > device_type; - typedef Kokkos::LayoutRight array_layout ; - typedef memory_space::size_type size_type ; + typedef Kokkos::LayoutRight array_layout; + typedef memory_space::size_type size_type; - typedef ScratchMemorySpace< Qthread > scratch_memory_space ; + typedef ScratchMemorySpace< Qthreads > scratch_memory_space; //@} /*------------------------------------------------------------------------*/ /** \brief Initialization will construct one or more instances */ - static Qthread & instance( int = 0 ); + static Qthreads & instance( int = 0 ); /** \brief Set the execution space to a "sleep" state. * @@ -100,14 +118,14 @@ public: bool sleep(); /** \brief Wake from the sleep state. - * + * * \return True if enters or is in the "ready" state. * False if functions are currently executing. */ static bool wake(); /** \brief Wait until all dispatched functions to complete. - * + * * The parallel_for or parallel_reduce dispatch of a functor may * return asynchronously, before the functor completes. This * method does not return until all dispatched functors on this @@ -128,26 +146,24 @@ public: static void finalize(); /** \brief Print configuration information to the given output stream. */ - static void print_configuration( std::ostream & , const bool detail = false ); + static void print_configuration( std::ostream &, const bool detail = false ); - int shepherd_size() const ; - int shepherd_worker_size() const ; + int shepherd_size() const; + int shepherd_worker_size() const; }; -/*--------------------------------------------------------------------------*/ - } // namespace Kokkos -/*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ namespace Kokkos { + namespace Impl { template<> -struct MemorySpaceAccess - < Kokkos::Qthread::memory_space - , Kokkos::Qthread::scratch_memory_space +struct MemorySpaceAccess + < Kokkos::Qthreads::memory_space + , Kokkos::Qthreads::scratch_memory_space > { enum { assignable = false }; @@ -157,27 +173,26 @@ struct MemorySpaceAccess template<> struct VerifyExecutionCanAccessMemorySpace - < Kokkos::Qthread::memory_space - , Kokkos::Qthread::scratch_memory_space + < Kokkos::Qthreads::memory_space + , Kokkos::Qthreads::scratch_memory_space > { enum { value = true }; - inline static void verify( void ) { } - inline static void verify( const void * ) { } + inline static void verify( void ) {} + inline static void verify( const void * ) {} }; } // namespace Impl + } // namespace Kokkos /*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - -#include <Kokkos_Parallel.hpp> -#include <Qthread/Kokkos_QthreadExec.hpp> -#include <Qthread/Kokkos_Qthread_Parallel.hpp> -#endif /* #define KOKKOS_QTHREAD_HPP */ +#include <Qthreads/Kokkos_QthreadsExec.hpp> +#include <Qthreads/Kokkos_Qthreads_Parallel.hpp> +//#include <Qthreads/Kokkos_Qthreads_Task.hpp> // Uncomment when Tasking working. +//#include <Qthreads/Kokkos_Qthreads_TaskQueue.hpp> // Uncomment when Tasking working. -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- +#endif // #define KOKKOS_ENABLE_QTHREADS +#endif // #define KOKKOS_QTHREADS_HPP diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp index f26253591007774c6d1aeb70bce6210896fea56f..72710e81679863bfc3c5e680663cf0feda2b5868 100644 --- a/lib/kokkos/core/src/Kokkos_Serial.hpp +++ b/lib/kokkos/core/src/Kokkos_Serial.hpp @@ -56,6 +56,8 @@ #include <Kokkos_ScratchSpace.hpp> #include <Kokkos_MemoryTraits.hpp> #include <impl/Kokkos_Tags.hpp> +#include <impl/Kokkos_HostThreadTeam.hpp> +#include <impl/Kokkos_FunctorAnalysis.hpp> #include <impl/Kokkos_FunctorAdapter.hpp> #include <impl/Kokkos_Profiling_Interface.hpp> @@ -138,30 +140,15 @@ public: static void initialize( unsigned threads_count = 1 , unsigned use_numa_count = 0 , unsigned use_cores_per_numa = 0 , - bool allow_asynchronous_threadpool = false) { - (void) threads_count; - (void) use_numa_count; - (void) use_cores_per_numa; - (void) allow_asynchronous_threadpool; - - // Init the array of locks used for arbitrarily sized atomics - Impl::init_lock_array_host_space(); - #if (KOKKOS_ENABLE_PROFILING) - Kokkos::Profiling::initialize(); - #endif - } + bool allow_asynchronous_threadpool = false); - static int is_initialized() { return 1 ; } + static int is_initialized(); /** \brief Return the maximum amount of concurrency. */ static int concurrency() {return 1;}; //! Free any resources being consumed by the device. - static void finalize() { - #if (KOKKOS_ENABLE_PROFILING) - Kokkos::Profiling::finalize(); - #endif - } + static void finalize(); //! Print configuration information to the given output stream. static void print_configuration( std::ostream & , const bool /* detail */ = false ) {} @@ -177,10 +164,6 @@ public: inline static unsigned max_hardware_threads() { return thread_pool_size(0); } //-------------------------------------------------------------------------- - - static void * scratch_memory_resize( unsigned reduce_size , unsigned shared_size ); - - //-------------------------------------------------------------------------- }; } // namespace Kokkos @@ -192,7 +175,7 @@ namespace Kokkos { namespace Impl { template<> -struct MemorySpaceAccess +struct MemorySpaceAccess < Kokkos::Serial::memory_space , Kokkos::Serial::scratch_memory_space > @@ -213,22 +196,6 @@ struct VerifyExecutionCanAccessMemorySpace inline static void verify( const void * ) { } }; -namespace SerialImpl { - -struct Sentinel { - - void * m_scratch ; - unsigned m_reduce_end ; - unsigned m_shared_end ; - - Sentinel(); - ~Sentinel(); - static Sentinel & singleton(); -}; - -inline -unsigned align( unsigned n ); -} } // namespace Impl } // namespace Kokkos @@ -238,89 +205,26 @@ unsigned align( unsigned n ); namespace Kokkos { namespace Impl { -class SerialTeamMember { -private: - typedef Kokkos::ScratchMemorySpace< Kokkos::Serial > scratch_memory_space ; - const scratch_memory_space m_space ; - const int m_league_rank ; - const int m_league_size ; - - SerialTeamMember & operator = ( const SerialTeamMember & ); - -public: - - KOKKOS_INLINE_FUNCTION - const scratch_memory_space & team_shmem() const { return m_space ; } - - KOKKOS_INLINE_FUNCTION - const scratch_memory_space & team_scratch(int) const - { return m_space ; } - - KOKKOS_INLINE_FUNCTION - const scratch_memory_space & thread_scratch(int) const - { return m_space ; } - - KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } - KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } - KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; } - KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; } +// Resize thread team data scratch memory +void serial_resize_thread_team_data( size_t pool_reduce_bytes + , size_t team_reduce_bytes + , size_t team_shared_bytes + , size_t thread_local_bytes ); - KOKKOS_INLINE_FUNCTION void team_barrier() const {} +HostThreadTeamData * serial_get_thread_team_data(); - template<class ValueType> - KOKKOS_INLINE_FUNCTION - void team_broadcast(const ValueType& , const int& ) const {} - - template< class ValueType, class JoinOp > - KOKKOS_INLINE_FUNCTION - ValueType team_reduce( const ValueType & value , const JoinOp & ) const - { - return value ; - } - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the - * league's parallel execution, be the scan's total. - * Parallel execution ordering of the league's teams is non-deterministic. - * As such the base value for each team's scan operation is similarly - * non-deterministic. - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const - { - const Type tmp = global_accum ? *global_accum : Type(0) ; - if ( global_accum ) { *global_accum += value ; } - return tmp ; - } - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value ; - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & ) const - { return Type(0); } - - //---------------------------------------- - // Execution space specific: +} /* namespace Impl */ +} /* namespace Kokkos */ - SerialTeamMember( int arg_league_rank - , int arg_league_size - , int arg_shared_size - ); -}; -} // namespace Impl +namespace Kokkos { +namespace Impl { /* * < Kokkos::Serial , WorkArgTag > * < WorkArgTag , Impl::enable_if< std::is_same< Kokkos::Serial , Kokkos::DefaultExecutionSpace >::value >::type > * */ -namespace Impl { template< class ... Properties > class TeamPolicyInternal< Kokkos::Serial , Properties ... >:public PolicyTraits<Properties...> { @@ -441,14 +345,11 @@ public: return p; }; - typedef Impl::SerialTeamMember member_type ; + typedef Impl::HostThreadTeamMember< Kokkos::Serial > member_type ; }; } /* namespace Impl */ } /* namespace Kokkos */ -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ - /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ /* Parallel patterns for Kokkos::Serial with RangePolicy */ @@ -521,11 +422,12 @@ private: typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; + typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const Policy m_policy ; @@ -535,34 +437,25 @@ private: template< class TagType > inline typename std::enable_if< std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( reference_type update ) const { - reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); - const typename Policy::member_type e = m_policy.end(); for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { m_functor( i , update ); } - - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: - final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } template< class TagType > inline typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( reference_type update ) const { const TagType t{} ; - reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); const typename Policy::member_type e = m_policy.end(); for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { m_functor( t , i , update ); } - - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: - final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } public: @@ -570,10 +463,29 @@ public: inline void execute() const { - pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize - ( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + const size_t pool_reduce_size = + Analysis::value_size( ReducerConditional::select(m_functor , m_reducer) ); + const size_t team_reduce_size = 0 ; // Never shrinks + const size_t team_shared_size = 0 ; // Never shrinks + const size_t thread_local_size = 0 ; // Never shrinks + + serial_resize_thread_team_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); + + HostThreadTeamData & data = *serial_get_thread_team_data(); - this-> template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr ); + pointer_type ptr = + m_result_ptr ? m_result_ptr : pointer_type(data.pool_reduce_local()); + + reference_type update = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + this-> template exec< WorkTag >( update ); + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >:: + final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } template< class HostViewType > @@ -587,7 +499,7 @@ public: : m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( InvalidType() ) - , m_result_ptr( arg_result_view.ptr_on_device() ) + , m_result_ptr( arg_result_view.data() ) { static_assert( Kokkos::is_view< HostViewType >::value , "Kokkos::Serial reduce result must be a View" ); @@ -623,11 +535,13 @@ private: typedef Kokkos::RangePolicy< Traits ... > Policy ; typedef typename Policy::work_tag WorkTag ; - typedef Kokkos::Impl::FunctorValueTraits< FunctorType , WorkTag > ValueTraits ; + + typedef FunctorAnalysis< FunctorPatternInterface::SCAN , Policy , FunctorType > Analysis ; + typedef Kokkos::Impl::FunctorValueInit< FunctorType , WorkTag > ValueInit ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const Policy m_policy ; @@ -635,10 +549,8 @@ private: template< class TagType > inline typename std::enable_if< std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( reference_type update ) const { - reference_type update = ValueInit::init( m_functor , ptr ); - const typename Policy::member_type e = m_policy.end(); for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { m_functor( i , update , true ); @@ -648,11 +560,9 @@ private: template< class TagType > inline typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( reference_type update ) const { const TagType t{} ; - reference_type update = ValueInit::init( m_functor , ptr ); - const typename Policy::member_type e = m_policy.end(); for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { m_functor( t , i , update , true ); @@ -664,9 +574,22 @@ public: inline void execute() const { - pointer_type ptr = (pointer_type) - Kokkos::Serial::scratch_memory_resize( ValueTraits::value_size( m_functor ) , 0 ); - this-> template exec< WorkTag >( ptr ); + const size_t pool_reduce_size = Analysis::value_size( m_functor ); + const size_t team_reduce_size = 0 ; // Never shrinks + const size_t team_shared_size = 0 ; // Never shrinks + const size_t thread_local_size = 0 ; // Never shrinks + + serial_resize_thread_team_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); + + HostThreadTeamData & data = *serial_get_thread_team_data(); + + reference_type update = + ValueInit::init( m_functor , pointer_type(data.pool_reduce_local()) ); + + this-> template exec< WorkTag >( update ); } inline @@ -696,6 +619,8 @@ class ParallelFor< FunctorType { private: + enum { TEAM_REDUCE_SIZE = 512 }; + typedef TeamPolicyInternal< Kokkos::Serial , Properties ...> Policy ; typedef typename Policy::member_type Member ; @@ -706,21 +631,21 @@ private: template< class TagType > inline typename std::enable_if< std::is_same< TagType , void >::value >::type - exec() const + exec( HostThreadTeamData & data ) const { for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { - m_functor( Member(ileague,m_league,m_shared) ); + m_functor( Member(data,ileague,m_league) ); } } template< class TagType > inline typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec() const + exec( HostThreadTeamData & data ) const { const TagType t{} ; for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { - m_functor( t , Member(ileague,m_league,m_shared) ); + m_functor( t , Member(data,ileague,m_league) ); } } @@ -729,15 +654,28 @@ public: inline void execute() const { - Kokkos::Serial::scratch_memory_resize( 0 , m_shared ); - this-> template exec< typename Policy::work_tag >(); + const size_t pool_reduce_size = 0 ; // Never shrinks + const size_t team_reduce_size = TEAM_REDUCE_SIZE ; + const size_t team_shared_size = m_shared ; + const size_t thread_local_size = 0 ; // Never shrinks + + serial_resize_thread_team_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); + + HostThreadTeamData & data = *serial_get_thread_team_data(); + + this->template exec< typename Policy::work_tag >( data ); } ParallelFor( const FunctorType & arg_functor , const Policy & arg_policy ) : m_functor( arg_functor ) , m_league( arg_policy.league_size() ) - , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) ) + , m_shared( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) ) { } }; @@ -752,18 +690,22 @@ class ParallelReduce< FunctorType { private: + enum { TEAM_REDUCE_SIZE = 512 }; + typedef TeamPolicyInternal< Kokkos::Serial, Properties ... > Policy ; + + typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; + typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const int m_league ; @@ -774,33 +716,23 @@ private: template< class TagType > inline typename std::enable_if< std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( HostThreadTeamData & data , reference_type update ) const { - reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); - for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { - m_functor( Member(ileague,m_league,m_shared) , update ); + m_functor( Member(data,ileague,m_league) , update ); } - - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: - final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } template< class TagType > inline typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec( pointer_type ptr ) const + exec( HostThreadTeamData & data , reference_type update ) const { const TagType t{} ; - reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); - for ( int ileague = 0 ; ileague < m_league ; ++ileague ) { - m_functor( t , Member(ileague,m_league,m_shared) , update ); + m_functor( t , Member(data,ileague,m_league) , update ); } - - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , TagType >:: - final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } public: @@ -808,10 +740,31 @@ public: inline void execute() const { - pointer_type ptr = (pointer_type) Kokkos::Serial::scratch_memory_resize - ( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , m_shared ); + const size_t pool_reduce_size = + Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); + + const size_t team_reduce_size = TEAM_REDUCE_SIZE ; + const size_t team_shared_size = m_shared ; + const size_t thread_local_size = 0 ; // Never shrinks + + serial_resize_thread_team_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); + - this-> template exec< WorkTag >( m_result_ptr ? m_result_ptr : ptr ); + HostThreadTeamData & data = *serial_get_thread_team_data(); + + pointer_type ptr = + m_result_ptr ? m_result_ptr : pointer_type(data.pool_reduce_local()); + + reference_type update = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + this-> template exec< WorkTag >( data , update ); + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >:: + final( ReducerConditional::select(m_functor , m_reducer) , ptr ); } template< class ViewType > @@ -825,8 +778,10 @@ public: : m_functor( arg_functor ) , m_league( arg_policy.league_size() ) , m_reducer( InvalidType() ) - , m_result_ptr( arg_result.ptr_on_device() ) - , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( m_functor , 1 ) ) + , m_result_ptr( arg_result.data() ) + , m_shared( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType >::value( m_functor , 1 ) ) { static_assert( Kokkos::is_view< ViewType >::value , "Reduction result on Kokkos::Serial must be a Kokkos::View" ); @@ -838,13 +793,15 @@ public: inline ParallelReduce( const FunctorType & arg_functor - , Policy arg_policy - , const ReducerType& reducer ) - : m_functor( arg_functor ) - , m_league( arg_policy.league_size() ) - , m_reducer( reducer ) - , m_result_ptr( reducer.result_view().data() ) - , m_shared( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + , Policy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_league( arg_policy.league_size() ) + , m_reducer( reducer ) + , m_result_ptr( reducer.result_view().data() ) + , m_shared( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType >::value( arg_functor , 1 ) ) { /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value @@ -858,261 +815,6 @@ public: /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ -/* Nested parallel patterns for Kokkos::Serial with TeamPolicy */ - -namespace Kokkos { -namespace Impl { - -template<typename iType> -struct TeamThreadRangeBoundariesStruct<iType,SerialTeamMember> { - typedef iType index_type; - const iType begin ; - const iType end ; - enum {increment = 1}; - const SerialTeamMember& thread; - - KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_count) - : begin(0) - , end(arg_count) - , thread(arg_thread) - {} - - KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct (const SerialTeamMember& arg_thread, const iType& arg_begin, const iType & arg_end ) - : begin( arg_begin ) - , end( arg_end) - , thread( arg_thread ) - {} -}; - - template<typename iType> - struct ThreadVectorRangeBoundariesStruct<iType,SerialTeamMember> { - typedef iType index_type; - enum {start = 0}; - const iType end; - enum {increment = 1}; - - KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct (const SerialTeamMember& thread, const iType& count): - end( count ) - {} - }; - -} // namespace Impl - -template< typename iType > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember> -TeamThreadRange( const Impl::SerialTeamMember& thread, const iType & count ) -{ - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::SerialTeamMember >( thread, count ); -} - -template< typename iType1, typename iType2 > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::SerialTeamMember > -TeamThreadRange( const Impl::SerialTeamMember& thread, const iType1 & begin, const iType2 & end ) -{ - typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::SerialTeamMember >( thread, iType(begin), iType(end) ); -} - -template<typename iType> -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember > - ThreadVectorRange(const Impl::SerialTeamMember& thread, const iType& count) { - return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >(thread,count); -} - -KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct<Impl::SerialTeamMember> PerTeam(const Impl::SerialTeamMember& thread) { - return Impl::ThreadSingleStruct<Impl::SerialTeamMember>(thread); -} - -KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct<Impl::SerialTeamMember> PerThread(const Impl::SerialTeamMember& thread) { - return Impl::VectorSingleStruct<Impl::SerialTeamMember>(thread); -} - -} // namespace Kokkos - -namespace Kokkos { - - /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support.*/ -template<typename iType, class Lambda> -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries, const Lambda& lambda) { - for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -} - -/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of - * val is performed and put into result. This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries, - const Lambda & lambda, ValueType& result) { - - result = ValueType(); - - for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - result+=tmp; - } - - result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>()); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::SerialTeamMember>& loop_boundaries, - const Lambda & lambda, const JoinType& join, ValueType& init_result) { - - ValueType result = init_result; - - for( iType i = loop_boundaries.begin; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - - init_result = loop_boundaries.thread.team_reduce(result,Impl::JoinLambdaAdapter<ValueType,JoinType>(join)); -} - -} //namespace Kokkos - -namespace Kokkos { -/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. - * This functionality requires C++11 support.*/ -template<typename iType, class Lambda> -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >& - loop_boundaries, const Lambda& lambda) { - #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - #pragma ivdep - #endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of - * val is performed and put into result. This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >& - loop_boundaries, const Lambda & lambda, ValueType& result) { - result = ValueType(); -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - result+=tmp; - } -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >& - loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { - - ValueType result = init_result; -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - init_result = result; -} - -/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) - * for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. - * Depending on the target execution space the operator might be called twice: once with final=false - * and once with final=true. When final==true val contains the prefix sum value. The contribution of this - * "i" needs to be added to val no matter whether final==true or not. In a serial execution - * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set - * to the final sum value over all vector lanes. - * This functionality requires C++11 support.*/ -template< typename iType, class FunctorType > -KOKKOS_INLINE_FUNCTION -void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::SerialTeamMember >& - loop_boundaries, const FunctorType & lambda) { - - typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; - typedef typename ValueTraits::value_type value_type ; - - value_type scan_val = value_type(); - -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,scan_val,true); - } -} - -} // namespace Kokkos - -namespace Kokkos { - -template<class FunctorType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) { - lambda(); -} - -template<class FunctorType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda) { - lambda(); -} - -template<class FunctorType, class ValueType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) { - lambda(val); -} - -template<class FunctorType, class ValueType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct<Impl::SerialTeamMember>& , const FunctorType& lambda, ValueType& val) { - lambda(val); -} -} - -//---------------------------------------------------------------------------- #include <impl/Kokkos_Serial_Task.hpp> diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp index e4271aa18814160f58fde909b619c78cc25761fa..e25039d236d68544cecf3dc968f853179e94a52d 100644 --- a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp +++ b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp @@ -82,6 +82,15 @@ class Future ; template< typename Space > class TaskScheduler ; +template< typename Space > +void wait( TaskScheduler< Space > const & ); + +template< typename Space > +struct is_scheduler : public std::false_type {}; + +template< typename Space > +struct is_scheduler< TaskScheduler< Space > > : public std::true_type {}; + } // namespace Kokkos #include <impl/Kokkos_TaskQueue.hpp> @@ -109,9 +118,6 @@ namespace Impl { template< typename Space , typename ResultType , typename FunctorType > class TaskBase ; -template< typename Space > -class TaskExec ; - } // namespace Impl } // namespace Kokkos @@ -312,6 +318,19 @@ public: } }; +// Is a Future with the given execution space +template< typename , typename ExecSpace = void > +struct is_future : public std::false_type {}; + +template< typename Arg1 , typename Arg2 , typename ExecSpace > +struct is_future< Future<Arg1,Arg2> , ExecSpace > + : public std::integral_constant + < bool , + ( std::is_same< ExecSpace , void >::value || + std::is_same< ExecSpace + , typename Future<Arg1,Arg2>::execution_space >::value ) + > {}; + } // namespace Kokkos //---------------------------------------------------------------------------- @@ -319,18 +338,59 @@ public: namespace Kokkos { -enum TaskType { TaskTeam = Impl::TaskBase<void,void,void>::TaskTeam - , TaskSingle = Impl::TaskBase<void,void,void>::TaskSingle }; +enum class TaskPriority : int { High = 0 + , Regular = 1 + , Low = 2 }; -enum TaskPriority { TaskHighPriority = 0 - , TaskRegularPriority = 1 - , TaskLowPriority = 2 }; +} // namespace Kokkos -template< typename Space > -void wait( TaskScheduler< Space > const & ); +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template< int TaskEnum , typename DepFutureType > +struct TaskPolicyData +{ + using execution_space = typename DepFutureType::execution_space ; + using scheduler_type = TaskScheduler< execution_space > ; + + enum : int { m_task_type = TaskEnum }; + + scheduler_type const * m_scheduler ; + DepFutureType const m_dependence ; + int m_priority ; + + TaskPolicyData() = delete ; + TaskPolicyData( TaskPolicyData && ) = default ; + TaskPolicyData( TaskPolicyData const & ) = default ; + TaskPolicyData & operator = ( TaskPolicyData && ) = default ; + TaskPolicyData & operator = ( TaskPolicyData const & ) = default ; + + KOKKOS_INLINE_FUNCTION + TaskPolicyData( DepFutureType && arg_future + , Kokkos::TaskPriority const & arg_priority ) + : m_scheduler( 0 ) + , m_dependence( arg_future ) + , m_priority( static_cast<int>( arg_priority ) ) + {} + + KOKKOS_INLINE_FUNCTION + TaskPolicyData( scheduler_type const & arg_scheduler + , Kokkos::TaskPriority const & arg_priority ) + : m_scheduler( & arg_scheduler ) + , m_dependence() + , m_priority( static_cast<int>( arg_priority ) ) + {} +}; +} // namespace Impl } // namespace Kokkos +//---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { @@ -348,52 +408,13 @@ private: queue_type * m_queue ; //---------------------------------------- - // Process optional arguments to spawn and respawn functions - - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const ) {} - - // TaskTeam or TaskSingle - template< typename ... Options > - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const task - , TaskType const & arg - , Options const & ... opts ) - { - task->m_task_type = arg ; - assign( task , opts ... ); - } - - // TaskHighPriority or TaskRegularPriority or TaskLowPriority - template< typename ... Options > - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const task - , TaskPriority const & arg - , Options const & ... opts ) - { - task->m_priority = arg ; - assign( task , opts ... ); - } - - // Future for a dependence - template< typename A1 , typename A2 , typename ... Options > - KOKKOS_INLINE_FUNCTION static - void assign( task_base * const task - , Future< A1 , A2 > const & arg - , Options const & ... opts ) - { - task->add_dependence( arg.m_task ); - assign( task , opts ... ); - } - - //---------------------------------------- public: - using execution_policy = TaskScheduler ; using execution_space = ExecSpace ; using memory_space = typename queue_type::memory_space ; - using member_type = Kokkos::Impl::TaskExec< ExecSpace > ; + using member_type = + typename Kokkos::Impl::TaskQueueSpecialization< ExecSpace >::member_type ; KOKKOS_INLINE_FUNCTION TaskScheduler() : m_track(), m_queue(0) {} @@ -460,18 +481,13 @@ public: //---------------------------------------- - /**\brief A task spawns a task with options - * - * 1) High, Normal, or Low priority - * 2) With or without dependence - * 3) Team or Serial - */ - template< typename FunctorType , typename ... Options > - KOKKOS_FUNCTION - Future< typename FunctorType::value_type , ExecSpace > - task_spawn( FunctorType const & arg_functor - , Options const & ... arg_options - ) const + template< int TaskEnum , typename DepFutureType , typename FunctorType > + KOKKOS_FUNCTION static + Kokkos::Future< typename FunctorType::value_type , execution_space > + spawn( Impl::TaskPolicyData<TaskEnum,DepFutureType> const & arg_policy + , typename task_base::function_type arg_function + , FunctorType && arg_functor + ) { using value_type = typename FunctorType::value_type ; using future_type = Future< value_type , execution_space > ; @@ -479,11 +495,21 @@ public: , value_type , FunctorType > ; + queue_type * const queue = + arg_policy.m_scheduler ? arg_policy.m_scheduler->m_queue : ( + arg_policy.m_dependence.m_task + ? arg_policy.m_dependence.m_task->m_queue + : (queue_type*) 0 ); + + if ( 0 == queue ) { + Kokkos::abort("Kokkos spawn given null Future" ); + } + //---------------------------------------- // Give single-thread back-ends an opportunity to clear // queue of ready tasks before allocating a new task - m_queue->iff_single_thread_recursive_execute(); + queue->iff_single_thread_recursive_execute(); //---------------------------------------- @@ -491,176 +517,129 @@ public: // Allocate task from memory pool f.m_task = - reinterpret_cast< task_type * >(m_queue->allocate(sizeof(task_type))); + reinterpret_cast< task_type * >(queue->allocate(sizeof(task_type))); if ( f.m_task ) { // Placement new construction - new ( f.m_task ) task_type( arg_functor ); - - // Reference count starts at two - // +1 for matching decrement when task is complete - // +1 for future - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = sizeof(task_type); - - assign( f.m_task , arg_options... ); - - // Spawning from within the execution space so the - // apply function pointer is guaranteed to be valid - f.m_task->m_apply = task_type::apply ; - - m_queue->schedule( f.m_task ); - // this task may be updated or executed at any moment + // Reference count starts at two: + // +1 for the matching decrement when task is complete + // +1 for the future + new ( f.m_task ) + task_type( arg_function + , queue + , arg_policy.m_dependence.m_task /* dependence */ + , 2 /* reference count */ + , int(sizeof(task_type)) /* allocation size */ + , int(arg_policy.m_task_type) + , int(arg_policy.m_priority) + , std::move(arg_functor) ); + + // The dependence (if any) is processed immediately + // within the schedule function, as such the dependence's + // reference count does not need to be incremented for + // the assignment. + + queue->schedule_runnable( f.m_task ); + // This task may be updated or executed at any moment, + // even during the call to 'schedule'. } return f ; } - /**\brief The host process spawns a task with options - * - * 1) High, Normal, or Low priority - * 2) With or without dependence - * 3) Team or Serial - */ - template< typename FunctorType , typename ... Options > - inline - Future< typename FunctorType::value_type , ExecSpace > - host_spawn( FunctorType const & arg_functor - , Options const & ... arg_options - ) const + template< typename FunctorType , typename A1 , typename A2 > + KOKKOS_FUNCTION static + void + respawn( FunctorType * arg_self + , Future<A1,A2> const & arg_dependence + , TaskPriority const & arg_priority + ) { + // Precondition: task is in Executing state + using value_type = typename FunctorType::value_type ; - using future_type = Future< value_type , execution_space > ; using task_type = Impl::TaskBase< execution_space , value_type , FunctorType > ; - if ( m_queue == 0 ) { - Kokkos::abort("Kokkos::TaskScheduler not initialized"); - } + task_type * const task = static_cast< task_type * >( arg_self ); - future_type f ; + task->m_priority = static_cast<int>(arg_priority); - // Allocate task from memory pool - f.m_task = - reinterpret_cast<task_type*>( m_queue->allocate(sizeof(task_type)) ); - - if ( f.m_task ) { - - // Placement new construction - new( f.m_task ) task_type( arg_functor ); - - // Reference count starts at two: - // +1 to match decrement when task completes - // +1 for the future - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = sizeof(task_type); - - assign( f.m_task , arg_options... ); - - // Potentially spawning outside execution space so the - // apply function pointer must be obtained from execution space. - // Required for Cuda execution space function pointer. - m_queue->template proc_set_apply< FunctorType >( & f.m_task->m_apply ); + task->add_dependence( arg_dependence.m_task ); - m_queue->schedule( f.m_task ); - } - return f ; + // Postcondition: task is in Executing-Respawn state } + //---------------------------------------- /**\brief Return a future that is complete * when all input futures are complete. */ template< typename A1 , typename A2 > - KOKKOS_FUNCTION - Future< ExecSpace > - when_all( int narg , Future< A1 , A2 > const * const arg ) const + KOKKOS_FUNCTION static + Future< execution_space > + when_all( Future< A1 , A2 > const arg[] , int narg ) { - static_assert - ( std::is_same< execution_space - , typename Future< A1 , A2 >::execution_space - >::value - , "Future must have same execution space" ); - - using future_type = Future< ExecSpace > ; - using task_base = Kokkos::Impl::TaskBase< ExecSpace , void , void > ; + using future_type = Future< execution_space > ; + using task_base = Kokkos::Impl::TaskBase< execution_space , void , void > ; future_type f ; - size_t const size = sizeof(task_base) + narg * sizeof(task_base*); - - f.m_task = - reinterpret_cast< task_base * >( m_queue->allocate( size ) ); + if ( narg ) { - if ( f.m_task ) { - - new( f.m_task ) task_base(); - - // Reference count starts at two: - // +1 to match decrement when task completes - // +1 for the future - f.m_task->m_queue = m_queue ; - f.m_task->m_ref_count = 2 ; - f.m_task->m_alloc_size = size ; - f.m_task->m_dep_count = narg ; - f.m_task->m_task_type = task_base::Aggregate ; - - task_base ** const dep = f.m_task->aggregate_dependences(); - - // Assign dependences to increment their reference count - // The futures may be destroyed upon returning from this call - // so increment reference count to track this assignment. + queue_type * queue = 0 ; for ( int i = 0 ; i < narg ; ++i ) { - task_base * const t = dep[i] = arg[i].m_task ; + task_base * const t = arg[i].m_task ; if ( 0 != t ) { + // Increment reference count to track subsequent assignment. Kokkos::atomic_increment( &(t->m_ref_count) ); + if ( queue == 0 ) { + queue = t->m_queue ; + } + else if ( queue != t->m_queue ) { + Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); + } } } - m_queue->schedule( f.m_task ); - // this when_all may be processed at any moment - } + if ( queue != 0 ) { - return f ; - } + size_t const size = sizeof(task_base) + narg * sizeof(task_base*); - /**\brief An executing task respawns itself with options - * - * 1) High, Normal, or Low priority - * 2) With or without dependence - */ - template< class FunctorType , typename ... Options > - KOKKOS_FUNCTION - void respawn( FunctorType * task_self - , Options const & ... arg_options ) const - { - using value_type = typename FunctorType::value_type ; - using task_type = Impl::TaskBase< execution_space - , value_type - , FunctorType > ; + f.m_task = + reinterpret_cast< task_base * >( queue->allocate( size ) ); - task_type * const task = static_cast< task_type * >( task_self ); + if ( f.m_task ) { - // Reschedule task with no dependences. - m_queue->reschedule( task ); + // Reference count starts at two: + // +1 to match decrement when task completes + // +1 for the future + new( f.m_task ) task_base( queue + , 2 /* reference count */ + , size /* allocation size */ + , narg /* dependence count */ + ); - // Dependences, if requested, are added here through parsing the arguments. - assign( task , arg_options... ); - } + // Assign dependences, reference counts were already incremented - //---------------------------------------- + task_base ** const dep = f.m_task->aggregate_dependences(); - template< typename S > - friend - void Kokkos::wait( Kokkos::TaskScheduler< S > const & ); + for ( int i = 0 ; i < narg ; ++i ) { dep[i] = arg[i].m_task ; } + + queue->schedule_aggregate( f.m_task ); + // this when_all may be processed at any moment + } + } + } + + return f ; + } //---------------------------------------- - inline + KOKKOS_INLINE_FUNCTION int allocation_capacity() const noexcept { return m_queue->m_memory.get_mem_size(); } @@ -676,12 +655,192 @@ public: long allocated_task_count_accum() const noexcept { return m_queue->m_accum_alloc ; } + //---------------------------------------- + + template< typename S > + friend + void Kokkos::wait( Kokkos::TaskScheduler< S > const & ); + }; +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +//---------------------------------------------------------------------------- +// Construct a TaskTeam execution policy + +template< typename T > +Kokkos::Impl::TaskPolicyData + < Kokkos::Impl::TaskBase<void,void,void>::TaskTeam + , typename std::conditional< Kokkos::is_future< T >::value , T , + typename Kokkos::Future< typename T::execution_space > >::type + > +KOKKOS_INLINE_FUNCTION +TaskTeam( T const & arg + , TaskPriority const & arg_priority = TaskPriority::Regular + ) +{ + static_assert( Kokkos::is_future<T>::value || + Kokkos::is_scheduler<T>::value + , "Kokkos TaskTeam argument must be Future or TaskScheduler" ); + + return + Kokkos::Impl::TaskPolicyData + < Kokkos::Impl::TaskBase<void,void,void>::TaskTeam + , typename std::conditional< Kokkos::is_future< T >::value , T , + typename Kokkos::Future< typename T::execution_space > >::type + >( arg , arg_priority ); +} + +// Construct a TaskSingle execution policy + +template< typename T > +Kokkos::Impl::TaskPolicyData + < Kokkos::Impl::TaskBase<void,void,void>::TaskSingle + , typename std::conditional< Kokkos::is_future< T >::value , T , + typename Kokkos::Future< typename T::execution_space > >::type + > +KOKKOS_INLINE_FUNCTION +TaskSingle( T const & arg + , TaskPriority const & arg_priority = TaskPriority::Regular + ) +{ + static_assert( Kokkos::is_future<T>::value || + Kokkos::is_scheduler<T>::value + , "Kokkos TaskSingle argument must be Future or TaskScheduler" ); + + return + Kokkos::Impl::TaskPolicyData + < Kokkos::Impl::TaskBase<void,void,void>::TaskSingle + , typename std::conditional< Kokkos::is_future< T >::value , T , + typename Kokkos::Future< typename T::execution_space > >::type + >( arg , arg_priority ); +} + +//---------------------------------------------------------------------------- + +/**\brief A host control thread spawns a task with options + * + * 1) Team or Serial + * 2) With scheduler or dependence + * 3) High, Normal, or Low priority + */ +template< int TaskEnum + , typename DepFutureType + , typename FunctorType > +Future< typename FunctorType::value_type + , typename DepFutureType::execution_space > +host_spawn( Impl::TaskPolicyData<TaskEnum,DepFutureType> const & arg_policy + , FunctorType && arg_functor + ) +{ + using exec_space = typename DepFutureType::execution_space ; + using scheduler = TaskScheduler< exec_space > ; + + typedef Impl::TaskBase< exec_space + , typename FunctorType::value_type + , FunctorType + > task_type ; + + static_assert( TaskEnum == task_type::TaskTeam || + TaskEnum == task_type::TaskSingle + , "Kokkos host_spawn requires TaskTeam or TaskSingle" ); + + // May be spawning a Cuda task, must use the specialization + // to query on-device function pointer. + typename task_type::function_type const ptr = + Kokkos::Impl::TaskQueueSpecialization< exec_space >:: + template get_function_pointer< task_type >(); + + return scheduler::spawn( arg_policy , ptr , std::move(arg_functor) ); +} + +/**\brief A task spawns a task with options + * + * 1) Team or Serial + * 2) With scheduler or dependence + * 3) High, Normal, or Low priority + */ +template< int TaskEnum + , typename DepFutureType + , typename FunctorType > +Future< typename FunctorType::value_type + , typename DepFutureType::execution_space > +KOKKOS_INLINE_FUNCTION +task_spawn( Impl::TaskPolicyData<TaskEnum,DepFutureType> const & arg_policy + , FunctorType && arg_functor + ) +{ + using exec_space = typename DepFutureType::execution_space ; + using scheduler = TaskScheduler< exec_space > ; + + typedef Impl::TaskBase< exec_space + , typename FunctorType::value_type + , FunctorType + > task_type ; + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) && \ + defined( KOKKOS_ENABLE_CUDA ) + + static_assert( ! std::is_same< Kokkos::Cuda , exec_space >::value + , "Error calling Kokkos::task_spawn for Cuda space within Host code" ); + +#endif + + static_assert( TaskEnum == task_type::TaskTeam || + TaskEnum == task_type::TaskSingle + , "Kokkos host_spawn requires TaskTeam or TaskSingle" ); + + typename task_type::function_type const ptr = task_type::apply ; + + return scheduler::spawn( arg_policy , ptr , std::move(arg_functor) ); +} + +/**\brief A task respawns itself with options + * + * 1) With scheduler or dependence + * 2) High, Normal, or Low priority + */ +template< typename FunctorType , typename T > +void +KOKKOS_INLINE_FUNCTION +respawn( FunctorType * arg_self + , T const & arg + , TaskPriority const & arg_priority = TaskPriority::Regular + ) +{ + static_assert( Kokkos::is_future<T>::value || + Kokkos::is_scheduler<T>::value + , "Kokkos respawn argument must be Future or TaskScheduler" ); + + TaskScheduler< typename T::execution_space >:: + respawn( arg_self , arg , arg_priority ); +} + +//---------------------------------------------------------------------------- + +template< typename A1 , typename A2 > +KOKKOS_INLINE_FUNCTION +Future< typename Future< A1 , A2 >::execution_space > +when_all( Future< A1 , A2 > const arg[] + , int narg + ) +{ + return TaskScheduler< typename Future<A1,A2>::execution_space >:: + when_all( arg , narg ); +} + +//---------------------------------------------------------------------------- +// Wait for all runnable tasks to complete + template< typename ExecSpace > inline -void wait( TaskScheduler< ExecSpace > const & policy ) -{ policy.m_queue->execute(); } +void wait( TaskScheduler< ExecSpace > const & scheduler ) +{ scheduler.m_queue->execute(); } } // namespace Kokkos diff --git a/lib/kokkos/core/src/Kokkos_Threads.hpp b/lib/kokkos/core/src/Kokkos_Threads.hpp index aca482b427a11a21ecc5d71dddfffb715438fa85..8aa968d0535f1f6c32ac170a73d2ec60d018d824 100644 --- a/lib/kokkos/core/src/Kokkos_Threads.hpp +++ b/lib/kokkos/core/src/Kokkos_Threads.hpp @@ -230,4 +230,3 @@ struct VerifyExecutionCanAccessMemorySpace #endif /* #if defined( KOKKOS_ENABLE_PTHREAD ) */ #endif /* #define KOKKOS_THREADS_HPP */ - diff --git a/lib/kokkos/core/src/Makefile b/lib/kokkos/core/src/Makefile index 316f61fd4d9fcd4c7ce4ec37592659deef006bce..0668f89c86e040e5dd1017fc3c3f0a233e9affa3 100644 --- a/lib/kokkos/core/src/Makefile +++ b/lib/kokkos/core/src/Makefile @@ -31,23 +31,23 @@ KOKKOS_HEADERS_INCLUDE += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) CONDITIONAL_COPIES = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) - KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) - CONDITIONAL_COPIES += copy-cuda + KOKKOS_HEADERS_CUDA += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) + CONDITIONAL_COPIES += copy-cuda endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) - CONDITIONAL_COPIES += copy-threads + KOKKOS_HEADERS_THREADS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) + CONDITIONAL_COPIES += copy-threads endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) - KOKKOS_HEADERS_QTHREAD += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp) - CONDITIONAL_COPIES += copy-qthread +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) + KOKKOS_HEADERS_QTHREADS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp) + CONDITIONAL_COPIES += copy-qthreads endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) - KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) - CONDITIONAL_COPIES += copy-openmp + KOKKOS_HEADERS_OPENMP += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) + CONDITIONAL_COPIES += copy-openmp endif ifeq ($(KOKKOS_OS),CYGWIN) @@ -60,6 +60,12 @@ ifeq ($(KOKKOS_OS),Darwin) COPY_FLAG = endif +ifeq ($(KOKKOS_DEBUG),"no") + KOKKOS_DEBUG_CMAKE = OFF +else + KOKKOS_DEBUG_CMAKE = ON +endif + messages: echo "Start Build" @@ -91,6 +97,7 @@ build-makefile-kokkos: echo "" >> Makefile.kokkos echo "#Internal settings which need to propagated for Kokkos examples" >> Makefile.kokkos echo "KOKKOS_INTERNAL_USE_CUDA = ${KOKKOS_INTERNAL_USE_CUDA}" >> Makefile.kokkos + echo "KOKKOS_INTERNAL_USE_QTHREADS = ${KOKKOS_INTERNAL_USE_QTHREADS}" >> Makefile.kokkos echo "KOKKOS_INTERNAL_USE_OPENMP = ${KOKKOS_INTERNAL_USE_OPENMP}" >> Makefile.kokkos echo "KOKKOS_INTERNAL_USE_PTHREADS = ${KOKKOS_INTERNAL_USE_PTHREADS}" >> Makefile.kokkos echo "" >> Makefile.kokkos @@ -107,7 +114,55 @@ build-makefile-kokkos: > Makefile.kokkos.tmp mv -f Makefile.kokkos.tmp Makefile.kokkos -build-lib: build-makefile-kokkos $(KOKKOS_LINK_DEPENDS) +build-cmake-kokkos: + rm -f kokkos.cmake + echo "#Global Settings used to generate this library" >> kokkos.cmake + echo "set(KOKKOS_PATH $(PREFIX) CACHE PATH \"Kokkos installation path\")" >> kokkos.cmake + echo "set(KOKKOS_DEVICES $(KOKKOS_DEVICES) CACHE STRING \"Kokkos devices list\")" >> kokkos.cmake + echo "set(KOKKOS_ARCH $(KOKKOS_ARCH) CACHE STRING \"Kokkos architecture flags\")" >> kokkos.cmake + echo "set(KOKKOS_DEBUG $(KOKKOS_DEBUG_CMAKE) CACHE BOOL \"Kokkos debug enabled ?)\")" >> kokkos.cmake + echo "set(KOKKOS_USE_TPLS $(KOKKOS_USE_TPLS) CACHE STRING \"Kokkos templates list\")" >> kokkos.cmake + echo "set(KOKKOS_CXX_STANDARD $(KOKKOS_CXX_STANDARD) CACHE STRING \"Kokkos C++ standard\")" >> kokkos.cmake + echo "set(KOKKOS_OPTIONS $(KOKKOS_OPTIONS) CACHE STRING \"Kokkos options\")" >> kokkos.cmake + echo "set(KOKKOS_CUDA_OPTIONS $(KOKKOS_CUDA_OPTIONS) CACHE STRING \"Kokkos Cuda options\")" >> kokkos.cmake + echo "if(NOT $ENV{CXX})" >> kokkos.cmake + echo ' message(WARNING "You are currently using compiler $${CMAKE_CXX_COMPILER} while Kokkos was built with $(CXX) ; make sure this is the behavior you intended to be.")' >> kokkos.cmake + echo "endif()" >> kokkos.cmake + echo "if(NOT DEFINED ENV{NVCC_WRAPPER})" >> kokkos.cmake + echo " set(NVCC_WRAPPER \"$(NVCC_WRAPPER)\" CACHE FILEPATH \"Path to command nvcc_wrapper\")" >> kokkos.cmake + echo "else()" >> kokkos.cmake + echo ' set(NVCC_WRAPPER $$ENV{NVCC_WRAPPER} CACHE FILEPATH "Path to command nvcc_wrapper")' >> kokkos.cmake + echo "endif()" >> kokkos.cmake + echo "" >> kokkos.cmake + echo "#Source and Header files of Kokkos relative to KOKKOS_PATH" >> kokkos.cmake + echo "set(KOKKOS_HEADERS \"$(KOKKOS_HEADERS)\" CACHE STRING \"Kokkos headers list\")" >> kokkos.cmake + echo "set(KOKKOS_SRC \"$(KOKKOS_SRC)\" CACHE STRING \"Kokkos source list\")" >> kokkos.cmake + echo "" >> kokkos.cmake + echo "#Variables used in application Makefiles" >> kokkos.cmake + echo "set(KOKKOS_CPP_DEPENDS \"$(KOKKOS_CPP_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_CXXFLAGS \"$(KOKKOS_CXXFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_CPPFLAGS \"$(KOKKOS_CPPFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_LINK_DEPENDS \"$(KOKKOS_LINK_DEPENDS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_LIBS \"$(KOKKOS_LIBS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_LDFLAGS \"$(KOKKOS_LDFLAGS)\" CACHE STRING \"\")" >> kokkos.cmake + echo "" >> kokkos.cmake + echo "#Internal settings which need to propagated for Kokkos examples" >> kokkos.cmake + echo "set(KOKKOS_INTERNAL_USE_CUDA \"${KOKKOS_INTERNAL_USE_CUDA}\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_INTERNAL_USE_OPENMP \"${KOKKOS_INTERNAL_USE_OPENMP}\" CACHE STRING \"\")" >> kokkos.cmake + echo "set(KOKKOS_INTERNAL_USE_PTHREADS \"${KOKKOS_INTERNAL_USE_PTHREADS}\" CACHE STRING \"\")" >> kokkos.cmake + echo "mark_as_advanced(KOKKOS_HEADERS KOKKOS_SRC KOKKOS_INTERNAL_USE_CUDA KOKKOS_INTERNAL_USE_OPENMP KOKKOS_INTERNAL_USE_PTHREADS)" >> kokkos.cmake + echo "" >> kokkos.cmake + sed \ + -e 's|$(KOKKOS_PATH)/core/src|$(PREFIX)/include|g' \ + -e 's|$(KOKKOS_PATH)/containers/src|$(PREFIX)/include|g' \ + -e 's|$(KOKKOS_PATH)/algorithms/src|$(PREFIX)/include|g' \ + -e 's|-L$(PWD)|-L$(PREFIX)/lib|g' \ + -e 's|= libkokkos.a|= $(PREFIX)/lib/libkokkos.a|g' \ + -e 's|= KokkosCore_config.h|= $(PREFIX)/include/KokkosCore_config.h|g' kokkos.cmake \ + > kokkos.cmake.tmp + mv -f kokkos.cmake.tmp kokkos.cmake + +build-lib: build-makefile-kokkos build-cmake-kokkos $(KOKKOS_LINK_DEPENDS) mkdir: mkdir -p $(PREFIX) @@ -124,9 +179,9 @@ copy-threads: mkdir mkdir -p $(PREFIX)/include/Threads cp $(COPY_FLAG) $(KOKKOS_HEADERS_THREADS) $(PREFIX)/include/Threads -copy-qthread: mkdir - mkdir -p $(PREFIX)/include/Qthread - cp $(COPY_FLAG) $(KOKKOS_HEADERS_QTHREAD) $(PREFIX)/include/Qthread +copy-qthreads: mkdir + mkdir -p $(PREFIX)/include/Qthreads + cp $(COPY_FLAG) $(KOKKOS_HEADERS_QTHREADS) $(PREFIX)/include/Qthreads copy-openmp: mkdir mkdir -p $(PREFIX)/include/OpenMP @@ -137,6 +192,7 @@ install: mkdir $(CONDITIONAL_COPIES) build-lib cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE) $(PREFIX)/include cp $(COPY_FLAG) $(KOKKOS_HEADERS_INCLUDE_IMPL) $(PREFIX)/include/impl cp $(COPY_FLAG) Makefile.kokkos $(PREFIX) + cp $(COPY_FLAG) kokkos.cmake $(PREFIX) cp $(COPY_FLAG) libkokkos.a $(PREFIX)/lib cp $(COPY_FLAG) KokkosCore_config.h $(PREFIX)/include diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp index a61791ca9c7be2779820b5ed96db1aec02644654..ecacffb77331c9d14134dc2dcc9a8eafabbc175f 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -46,7 +46,6 @@ #include <omp.h> #include <iostream> -#include <Kokkos_Parallel.hpp> #include <OpenMP/Kokkos_OpenMPexec.hpp> #include <impl/Kokkos_FunctorAdapter.hpp> @@ -107,58 +106,41 @@ private: public: - inline void execute() const { - this->template execute_schedule<typename Policy::schedule_type::type>(); - } - - template<class Schedule> - inline - typename std::enable_if< std::is_same<Schedule,Kokkos::Static>::value >::type - execute_schedule() const + inline void execute() const { + enum { is_dynamic = std::is_same< typename Policy::schedule_type::type + , Kokkos::Dynamic >::value }; + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); #pragma omp parallel { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); - - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + HostThreadTeamData & data = *OpenMPexec::get_thread_data(); - ParallelFor::template exec_range< WorkTag >( m_functor , range.begin() , range.end() ); - } -/* END #pragma omp parallel */ - } + data.set_work_partition( m_policy.end() - m_policy.begin() + , m_policy.chunk_size() ); - template<class Schedule> - inline - typename std::enable_if< std::is_same<Schedule,Kokkos::Dynamic>::value >::type - execute_schedule() const - { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); + if ( is_dynamic ) { + // Make sure work partition is set before stealing + if ( data.pool_rendezvous() ) data.pool_rendezvous_release(); + } -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + std::pair<int64_t,int64_t> range(0,0); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); + do { - exec.set_work_range(range.begin(),range.end(),m_policy.chunk_size()); - exec.reset_steal_target(); - #pragma omp barrier - - long work_index = exec.get_work_index(); + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); - while(work_index != -1) { - const Member begin = static_cast<Member>(work_index) * m_policy.chunk_size(); - const Member end = begin + m_policy.chunk_size() < m_policy.end()?begin+m_policy.chunk_size():m_policy.end(); - ParallelFor::template exec_range< WorkTag >( m_functor , begin, end ); - work_index = exec.get_work_index(); - } + ParallelFor::template + exec_range< WorkTag >( m_functor + , range.first + m_policy.begin() + , range.second + m_policy.begin() ); + } while ( is_dynamic && 0 <= range.first ); } -/* END #pragma omp parallel */ + // END #pragma omp parallel } inline @@ -193,17 +175,18 @@ private: typedef typename Policy::WorkRange WorkRange ; typedef typename Policy::member_type Member ; + typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; // Static Assert WorkTag void if ReducerType not InvalidType - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const Policy m_policy ; @@ -247,92 +230,70 @@ private: public: - inline void execute() const { - this->template execute_schedule<typename Policy::schedule_type::type>(); - } - - template<class Schedule> - inline - typename std::enable_if< std::is_same<Schedule,Kokkos::Static>::value >::type - execute_schedule() const + inline void execute() const { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); + enum { is_dynamic = std::is_same< typename Policy::schedule_type::type + , Kokkos::Dynamic >::value }; - OpenMPexec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); + OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); + + const size_t pool_reduce_bytes = + Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); + + OpenMPexec::resize_thread_data( pool_reduce_bytes + , 0 // team_reduce_bytes + , 0 // team_shared_bytes + , 0 // thread_local_bytes + ); #pragma omp parallel { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); - ParallelReduce::template exec_range< WorkTag > - ( m_functor , range.begin() , range.end() - , ValueInit::init( ReducerConditional::select(m_functor , m_reducer), exec.scratch_reduce() ) ); - } -/* END #pragma omp parallel */ + HostThreadTeamData & data = *OpenMPexec::get_thread_data(); - // Reduction: + data.set_work_partition( m_policy.end() - m_policy.begin() + , m_policy.chunk_size() ); - const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() ); + if ( is_dynamic ) { + // Make sure work partition is set before stealing + if ( data.pool_rendezvous() ) data.pool_rendezvous_release(); + } - for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) { - ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() ); - } + reference_type update = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) + , data.pool_reduce_local() ); - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + std::pair<int64_t,int64_t> range(0,0); - if ( m_result_ptr ) { - const int n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + do { - for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } - } - } + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); - template<class Schedule> - inline - typename std::enable_if< std::is_same<Schedule,Kokkos::Dynamic>::value >::type - execute_schedule() const - { - OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); - OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); + ParallelReduce::template + exec_range< WorkTag >( m_functor + , range.first + m_policy.begin() + , range.second + m_policy.begin() + , update ); - OpenMPexec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); - -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); - - exec.set_work_range(range.begin(),range.end(),m_policy.chunk_size()); - exec.reset_steal_target(); - #pragma omp barrier - - long work_index = exec.get_work_index(); - - reference_type update = ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , exec.scratch_reduce() ); - while(work_index != -1) { - const Member begin = static_cast<Member>(work_index) * m_policy.chunk_size(); - const Member end = begin + m_policy.chunk_size() < m_policy.end()?begin+m_policy.chunk_size():m_policy.end(); - ParallelReduce::template exec_range< WorkTag > - ( m_functor , begin,end - , update ); - work_index = exec.get_work_index(); - } + } while ( is_dynamic && 0 <= range.first ); } -/* END #pragma omp parallel */ +// END #pragma omp parallel // Reduction: - const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() ); + const pointer_type ptr = pointer_type( OpenMPexec::get_thread_data(0)->pool_reduce_local() ); for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) { - ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() ); + ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) + , ptr + , OpenMPexec::get_thread_data(i)->pool_reduce_local() ); } Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); if ( m_result_ptr ) { - const int n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) ); for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } } @@ -394,17 +355,18 @@ private: typedef Kokkos::RangePolicy< Traits ... > Policy ; + typedef FunctorAnalysis< FunctorPatternInterface::SCAN , Policy , FunctorType > Analysis ; + typedef typename Policy::work_tag WorkTag ; typedef typename Policy::WorkRange WorkRange ; typedef typename Policy::member_type Member ; - typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ; typedef Kokkos::Impl::FunctorValueInit< FunctorType, WorkTag > ValueInit ; typedef Kokkos::Impl::FunctorValueJoin< FunctorType, WorkTag > ValueJoin ; typedef Kokkos::Impl::FunctorValueOps< FunctorType, WorkTag > ValueOps ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const Policy m_policy ; @@ -452,53 +414,63 @@ public: OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_scan"); OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_scan"); - OpenMPexec::resize_scratch( 2 * ValueTraits::value_size( m_functor ) , 0 ); + const int value_count = Analysis::value_count( m_functor ); + const size_t pool_reduce_bytes = 2 * Analysis::value_size( m_functor ); + + OpenMPexec::resize_thread_data( pool_reduce_bytes + , 0 // team_reduce_bytes + , 0 // team_shared_bytes + , 0 // thread_local_bytes + ); #pragma omp parallel { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); - const pointer_type ptr = - pointer_type( exec.scratch_reduce() ) + - ValueTraits::value_count( m_functor ); + HostThreadTeamData & data = *OpenMPexec::get_thread_data(); + + const WorkRange range( m_policy, data.pool_rank(), data.pool_size() ); + + reference_type update_sum = + ValueInit::init( m_functor , data.pool_reduce_local() ); + ParallelScan::template exec_range< WorkTag > - ( m_functor , range.begin() , range.end() - , ValueInit::init( m_functor , ptr ) , false ); - } -/* END #pragma omp parallel */ + ( m_functor , range.begin() , range.end() , update_sum , false ); - { - const unsigned thread_count = OpenMPexec::pool_size(); - const unsigned value_count = ValueTraits::value_count( m_functor ); + if ( data.pool_rendezvous() ) { - pointer_type ptr_prev = 0 ; + pointer_type ptr_prev = 0 ; - for ( unsigned rank_rev = thread_count ; rank_rev-- ; ) { + const int n = data.pool_size(); - pointer_type ptr = pointer_type( OpenMPexec::pool_rev(rank_rev)->scratch_reduce() ); + for ( int i = 0 ; i < n ; ++i ) { - if ( ptr_prev ) { - for ( unsigned i = 0 ; i < value_count ; ++i ) { ptr[i] = ptr_prev[ i + value_count ] ; } - ValueJoin::join( m_functor , ptr + value_count , ptr ); - } - else { - ValueInit::init( m_functor , ptr ); + pointer_type ptr = (pointer_type) + data.pool_member(i)->pool_reduce_local(); + + if ( i ) { + for ( int j = 0 ; j < value_count ; ++j ) { + ptr[j+value_count] = ptr_prev[j+value_count] ; + } + ValueJoin::join( m_functor , ptr + value_count , ptr_prev ); + } + else { + ValueInit::init( m_functor , ptr + value_count ); + } + + ptr_prev = ptr ; } - ptr_prev = ptr ; + data.pool_rendezvous_release(); } - } -#pragma omp parallel - { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); - const WorkRange range( m_policy, exec.pool_rank(), exec.pool_size() ); - const pointer_type ptr = pointer_type( exec.scratch_reduce() ); + reference_type update_base = + ValueOps::reference + ( ((pointer_type)data.pool_reduce_local()) + value_count ); + ParallelScan::template exec_range< WorkTag > - ( m_functor , range.begin() , range.end() - , ValueOps::reference( ptr ) , true ); + ( m_functor , range.begin() , range.end() , update_base , true ); } /* END #pragma omp parallel */ + } //---------------------------------------- @@ -530,55 +502,59 @@ class ParallelFor< FunctorType { private: + enum { TEAM_REDUCE_SIZE = 512 }; + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::OpenMP, Properties ... > Policy ; - typedef typename Policy::work_tag WorkTag ; - typedef typename Policy::member_type Member ; + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::schedule_type::type SchedTag ; + typedef typename Policy::member_type Member ; const FunctorType m_functor ; const Policy m_policy ; const int m_shmem_size ; - template< class TagType, class Schedule > + template< class TagType > inline static - typename std::enable_if< std::is_same< TagType , void >::value && std::is_same<Schedule,Kokkos::Static>::value>::type - exec_team( const FunctorType & functor , Member member ) + typename std::enable_if< ( std::is_same< TagType , void >::value ) >::type + exec_team( const FunctorType & functor + , HostThreadTeamData & data + , const int league_rank_begin + , const int league_rank_end + , const int league_size ) { - for ( ; member.valid_static() ; member.next_static() ) { - functor( member ); - } - } + for ( int r = league_rank_begin ; r < league_rank_end ; ) { - template< class TagType, class Schedule > - inline static - typename std::enable_if< (! std::is_same< TagType , void >::value) && std::is_same<Schedule,Kokkos::Static>::value >::type - exec_team( const FunctorType & functor , Member member ) - { - const TagType t{} ; - for ( ; member.valid_static() ; member.next_static() ) { - functor( t , member ); - } - } + functor( Member( data, r , league_size ) ); - template< class TagType, class Schedule > - inline static - typename std::enable_if< std::is_same< TagType , void >::value && std::is_same<Schedule,Kokkos::Dynamic>::value>::type - exec_team( const FunctorType & functor , Member member ) - { - #pragma omp barrier - for ( ; member.valid_dynamic() ; member.next_dynamic() ) { - functor( member ); + if ( ++r < league_rank_end ) { + // Don't allow team members to lap one another + // so that they don't overwrite shared memory. + if ( data.team_rendezvous() ) { data.team_rendezvous_release(); } + } } } - template< class TagType, class Schedule > + + template< class TagType > inline static - typename std::enable_if< (! std::is_same< TagType , void >::value) && std::is_same<Schedule,Kokkos::Dynamic>::value >::type - exec_team( const FunctorType & functor , Member member ) + typename std::enable_if< ( ! std::is_same< TagType , void >::value ) >::type + exec_team( const FunctorType & functor + , HostThreadTeamData & data + , const int league_rank_begin + , const int league_rank_end + , const int league_size ) { - #pragma omp barrier - const TagType t{} ; - for ( ; member.valid_dynamic() ; member.next_dynamic() ) { - functor( t , member ); + const TagType t{}; + + for ( int r = league_rank_begin ; r < league_rank_end ; ) { + + functor( t , Member( data, r , league_size ) ); + + if ( ++r < league_rank_end ) { + // Don't allow team members to lap one another + // so that they don't overwrite shared memory. + if ( data.team_rendezvous() ) { data.team_rendezvous_release(); } + } } } @@ -587,31 +563,75 @@ public: inline void execute() const { + enum { is_dynamic = std::is_same< SchedTag , Kokkos::Dynamic >::value }; + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_for"); OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_for"); - const size_t team_reduce_size = Policy::member_type::team_reduce_size(); + const size_t pool_reduce_size = 0 ; // Never shrinks + const size_t team_reduce_size = TEAM_REDUCE_SIZE * m_policy.team_size(); + const size_t team_shared_size = m_shmem_size + m_policy.scratch_size(1); + const size_t thread_local_size = 0 ; // Never shrinks - OpenMPexec::resize_scratch( 0 , team_reduce_size + m_shmem_size + m_policy.scratch_size(1)); + OpenMPexec::resize_thread_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); #pragma omp parallel { - ParallelFor::template exec_team< WorkTag, typename Policy::schedule_type::type> - ( m_functor - , Member( * OpenMPexec::get_thread_omp(), m_policy, m_shmem_size, 0) ); + HostThreadTeamData & data = *OpenMPexec::get_thread_data(); + + const int active = data.organize_team( m_policy.team_size() ); + + if ( active ) { + data.set_work_partition( m_policy.league_size() + , ( 0 < m_policy.chunk_size() + ? m_policy.chunk_size() + : m_policy.team_iter() ) ); + } + + if ( is_dynamic ) { + // Must synchronize to make sure each team has set its + // partition before begining the work stealing loop. + if ( data.pool_rendezvous() ) data.pool_rendezvous_release(); + } + + if ( active ) { + + std::pair<int64_t,int64_t> range(0,0); + + do { + + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); + + ParallelFor::template exec_team< WorkTag > + ( m_functor , data + , range.first , range.second , m_policy.league_size() ); + + } while ( is_dynamic && 0 <= range.first ); + } + + data.disband_team(); } -/* END #pragma omp parallel */ +// END #pragma omp parallel } + inline ParallelFor( const FunctorType & arg_functor , const Policy & arg_policy ) : m_functor( arg_functor ) , m_policy( arg_policy ) - , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + , m_shmem_size( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType > + ::value( arg_functor , arg_policy.team_size() ) ) {} }; +//---------------------------------------------------------------------------- template< class FunctorType , class ReducerType, class ... Properties > class ParallelReduce< FunctorType @@ -622,20 +642,26 @@ class ParallelReduce< FunctorType { private: + enum { TEAM_REDUCE_SIZE = 512 }; + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::OpenMP, Properties ... > Policy ; - typedef typename Policy::work_tag WorkTag ; - typedef typename Policy::member_type Member ; + typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; + + typedef typename Policy::work_tag WorkTag ; + typedef typename Policy::schedule_type::type SchedTag ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value + , FunctorType, ReducerType> ReducerConditional; - typedef Kokkos::Impl::if_c< std::is_same<InvalidType,ReducerType>::value, FunctorType, ReducerType> ReducerConditional; typedef typename ReducerConditional::type ReducerTypeFwd; - typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd , WorkTag > ValueTraits ; typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd , WorkTag > ValueJoin ; - typedef typename ValueTraits::pointer_type pointer_type ; - typedef typename ValueTraits::reference_type reference_type ; + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; const FunctorType m_functor ; const Policy m_policy ; @@ -645,22 +671,48 @@ private: template< class TagType > inline static - typename std::enable_if< std::is_same< TagType , void >::value >::type - exec_team( const FunctorType & functor , Member member , reference_type update ) + typename std::enable_if< ( std::is_same< TagType , void >::value ) >::type + exec_team( const FunctorType & functor + , HostThreadTeamData & data + , reference_type & update + , const int league_rank_begin + , const int league_rank_end + , const int league_size ) { - for ( ; member.valid_static() ; member.next_static() ) { - functor( member , update ); + for ( int r = league_rank_begin ; r < league_rank_end ; ) { + + functor( Member( data, r , league_size ) , update ); + + if ( ++r < league_rank_end ) { + // Don't allow team members to lap one another + // so that they don't overwrite shared memory. + if ( data.team_rendezvous() ) { data.team_rendezvous_release(); } + } } } + template< class TagType > inline static - typename std::enable_if< ! std::is_same< TagType , void >::value >::type - exec_team( const FunctorType & functor , Member member , reference_type update ) + typename std::enable_if< ( ! std::is_same< TagType , void >::value ) >::type + exec_team( const FunctorType & functor + , HostThreadTeamData & data + , reference_type & update + , const int league_rank_begin + , const int league_rank_end + , const int league_size ) { - const TagType t{} ; - for ( ; member.valid_static() ; member.next_static() ) { - functor( t , member , update ); + const TagType t{}; + + for ( int r = league_rank_begin ; r < league_rank_end ; ) { + + functor( t , Member( data, r , league_size ) , update ); + + if ( ++r < league_rank_end ) { + // Don't allow team members to lap one another + // so that they don't overwrite shared memory. + if ( data.team_rendezvous() ) { data.team_rendezvous_release(); } + } } } @@ -669,44 +721,89 @@ public: inline void execute() const { + enum { is_dynamic = std::is_same< SchedTag , Kokkos::Dynamic >::value }; + OpenMPexec::verify_is_process("Kokkos::OpenMP parallel_reduce"); + OpenMPexec::verify_initialized("Kokkos::OpenMP parallel_reduce"); + + const size_t pool_reduce_size = + Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); - const size_t team_reduce_size = Policy::member_type::team_reduce_size(); + const size_t team_reduce_size = TEAM_REDUCE_SIZE * m_policy.team_size(); + const size_t team_shared_size = m_shmem_size + m_policy.scratch_size(1); + const size_t thread_local_size = 0 ; // Never shrinks - OpenMPexec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , team_reduce_size + m_shmem_size ); + OpenMPexec::resize_thread_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); #pragma omp parallel { - OpenMPexec & exec = * OpenMPexec::get_thread_omp(); + HostThreadTeamData & data = *OpenMPexec::get_thread_data(); - ParallelReduce::template exec_team< WorkTag > - ( m_functor - , Member( exec , m_policy , m_shmem_size, 0 ) - , ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , exec.scratch_reduce() ) ); - } -/* END #pragma omp parallel */ + const int active = data.organize_team( m_policy.team_size() ); - { - const pointer_type ptr = pointer_type( OpenMPexec::pool_rev(0)->scratch_reduce() ); - - int max_active_threads = OpenMPexec::pool_size(); - if( max_active_threads > m_policy.league_size()* m_policy.team_size() ) - max_active_threads = m_policy.league_size()* m_policy.team_size(); + if ( active ) { + data.set_work_partition( m_policy.league_size() + , ( 0 < m_policy.chunk_size() + ? m_policy.chunk_size() + : m_policy.team_iter() ) ); + } - for ( int i = 1 ; i < max_active_threads ; ++i ) { - ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr , OpenMPexec::pool_rev(i)->scratch_reduce() ); + if ( is_dynamic ) { + // Must synchronize to make sure each team has set its + // partition before begining the work stealing loop. + if ( data.pool_rendezvous() ) data.pool_rendezvous_release(); } - Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + if ( active ) { + reference_type update = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) + , data.pool_reduce_local() ); + + std::pair<int64_t,int64_t> range(0,0); - if ( m_result_ptr ) { - const int n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + do { - for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); + + ParallelReduce::template exec_team< WorkTag > + ( m_functor , data , update + , range.first , range.second , m_policy.league_size() ); + + } while ( is_dynamic && 0 <= range.first ); + } else { + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) + , data.pool_reduce_local() ); } + + data.disband_team(); + } +// END #pragma omp parallel + + // Reduction: + + const pointer_type ptr = pointer_type( OpenMPexec::get_thread_data(0)->pool_reduce_local() ); + + for ( int i = 1 ; i < OpenMPexec::pool_size() ; ++i ) { + ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) + , ptr + , OpenMPexec::get_thread_data(i)->pool_reduce_local() ); + } + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + if ( m_result_ptr ) { + const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) ); + + for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } } } + //---------------------------------------- + template< class ViewType > inline ParallelReduce( const FunctorType & arg_functor , @@ -720,7 +817,10 @@ public: , m_policy( arg_policy ) , m_reducer( InvalidType() ) , m_result_ptr( arg_result.ptr_on_device() ) - , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + , m_shmem_size( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType > + ::value( arg_functor , arg_policy.team_size() ) ) {} inline @@ -731,7 +831,10 @@ public: , m_policy( arg_policy ) , m_reducer( reducer ) , m_result_ptr( reducer.result_view().data() ) - , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + FunctorTeamShmemSize< FunctorType >::value( arg_functor , arg_policy.team_size() ) ) + , m_shmem_size( arg_policy.scratch_size(0) + + arg_policy.scratch_size(1) + + FunctorTeamShmemSize< FunctorType > + ::value( arg_functor , arg_policy.team_size() ) ) { /*static_assert( std::is_same< typename ViewType::memory_space , Kokkos::HostSpace >::value diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index 5b3e9873e17bc360f28a8338b7b59b69cf627ec3..9144d8c2799a7db81af0886aafcff1ebcd828833 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -46,6 +46,7 @@ #if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) #include <impl/Kokkos_TaskQueue_impl.hpp> +#include <impl/Kokkos_HostThreadTeam.hpp> //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -55,231 +56,214 @@ namespace Impl { template class TaskQueue< Kokkos::OpenMP > ; -//---------------------------------------------------------------------------- - -TaskExec< Kokkos::OpenMP >:: -TaskExec() - : m_self_exec( 0 ) - , m_team_exec( 0 ) - , m_sync_mask( 0 ) - , m_sync_value( 0 ) - , m_sync_step( 0 ) - , m_group_rank( 0 ) - , m_team_rank( 0 ) - , m_team_size( 1 ) -{ -} - -TaskExec< Kokkos::OpenMP >:: -TaskExec( Kokkos::Impl::OpenMPexec & arg_exec , int const arg_team_size ) - : m_self_exec( & arg_exec ) - , m_team_exec( arg_exec.pool_rev(arg_exec.pool_rank_rev() / arg_team_size) ) - , m_sync_mask( 0 ) - , m_sync_value( 0 ) - , m_sync_step( 0 ) - , m_group_rank( arg_exec.pool_rank_rev() / arg_team_size ) - , m_team_rank( arg_exec.pool_rank_rev() % arg_team_size ) - , m_team_size( arg_team_size ) -{ - // This team spans - // m_self_exec->pool_rev( team_size * group_rank ) - // m_self_exec->pool_rev( team_size * ( group_rank + 1 ) - 1 ) - - int64_t volatile * const sync = (int64_t *) m_self_exec->scratch_reduce(); - - sync[0] = int64_t(0) ; - sync[1] = int64_t(0) ; - - for ( int i = 0 ; i < m_team_size ; ++i ) { - m_sync_value |= int64_t(1) << (8*i); - m_sync_mask |= int64_t(3) << (8*i); - } +class HostThreadTeamDataSingleton : private HostThreadTeamData { +private: + + HostThreadTeamDataSingleton() : HostThreadTeamData() + { + Kokkos::OpenMP::memory_space space ; + const size_t num_pool_reduce_bytes = 32 ; + const size_t num_team_reduce_bytes = 32 ; + const size_t num_team_shared_bytes = 1024 ; + const size_t num_thread_local_bytes = 1024 ; + const size_t alloc_bytes = + HostThreadTeamData::scratch_size( num_pool_reduce_bytes + , num_team_reduce_bytes + , num_team_shared_bytes + , num_thread_local_bytes ); + + HostThreadTeamData::scratch_assign + ( space.allocate( alloc_bytes ) + , alloc_bytes + , num_pool_reduce_bytes + , num_team_reduce_bytes + , num_team_shared_bytes + , num_thread_local_bytes ); + } + + ~HostThreadTeamDataSingleton() + { + Kokkos::OpenMP::memory_space space ; + space.deallocate( HostThreadTeamData::scratch_buffer() + , HostThreadTeamData::scratch_bytes() ); + } + +public: + + static HostThreadTeamData & singleton() + { + static HostThreadTeamDataSingleton s ; + return s ; + } +}; - Kokkos::memory_fence(); -} - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) +//---------------------------------------------------------------------------- -void TaskExec< Kokkos::OpenMP >::team_barrier_impl() const +void TaskQueueSpecialization< Kokkos::OpenMP >::execute + ( TaskQueue< Kokkos::OpenMP > * const queue ) { - if ( m_team_exec->scratch_reduce_size() < int(2 * sizeof(int64_t)) ) { - Kokkos::abort("TaskQueue<OpenMP> scratch_reduce memory too small"); - } + using execution_space = Kokkos::OpenMP ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space , void , void > ; + using Member = Impl::HostThreadTeamMember< execution_space > ; - // Use team shared memory to synchronize. - // Alternate memory locations between barriers to avoid a sequence - // of barriers overtaking one another. + static task_root_type * const end = + (task_root_type *) task_root_type::EndTag ; - int64_t volatile * const sync = - ((int64_t *) m_team_exec->scratch_reduce()) + ( m_sync_step & 0x01 ); + HostThreadTeamData & team_data_single = + HostThreadTeamDataSingleton::singleton(); - // This team member sets one byte within the sync variable - int8_t volatile * const sync_self = - ((int8_t *) sync) + m_team_rank ; + const int team_size = Impl::OpenMPexec::pool_size(2); // Threads per core + // const int team_size = Impl::OpenMPexec::pool_size(1); // Threads per NUMA #if 0 -fprintf( stdout - , "barrier group(%d) member(%d) step(%d) wait(%lx) : before(%lx)\n" - , m_group_rank - , m_team_rank - , m_sync_step - , m_sync_value - , *sync - ); +fprintf(stdout,"TaskQueue<OpenMP> execute %d\n", team_size ); fflush(stdout); #endif - *sync_self = int8_t( m_sync_value & 0x03 ); // signal arrival - while ( m_sync_value != *sync ); // wait for team to arrive +#pragma omp parallel + { + Impl::HostThreadTeamData & self = *Impl::OpenMPexec::get_thread_data(); -#if 0 -fprintf( stdout - , "barrier group(%d) member(%d) step(%d) wait(%lx) : after(%lx)\n" - , m_group_rank - , m_team_rank - , m_sync_step - , m_sync_value - , *sync - ); -fflush(stdout); -#endif + // Organizing threads into a team performs a barrier across the + // entire pool to insure proper initialization of the team + // rendezvous mechanism before a team rendezvous can be performed. - ++m_sync_step ; + if ( self.organize_team( team_size ) ) { - if ( 0 == ( 0x01 & m_sync_step ) ) { // Every other step - m_sync_value ^= m_sync_mask ; - if ( 1000 < m_sync_step ) m_sync_step = 0 ; - } -} + Member single_exec( team_data_single ); + Member team_exec( self ); +#if 0 +fprintf(stdout,"TaskQueue<OpenMP> pool(%d of %d) team(%d of %d) league(%d of %d) running\n" + , self.pool_rank() + , self.pool_size() + , team_exec.team_rank() + , team_exec.team_size() + , team_exec.league_rank() + , team_exec.league_size() + ); +fflush(stdout); #endif -//---------------------------------------------------------------------------- - -void TaskQueueSpecialization< Kokkos::OpenMP >::execute - ( TaskQueue< Kokkos::OpenMP > * const queue ) -{ - using execution_space = Kokkos::OpenMP ; - using queue_type = TaskQueue< execution_space > ; - using task_root_type = TaskBase< execution_space , void , void > ; - using PoolExec = Kokkos::Impl::OpenMPexec ; - using Member = TaskExec< execution_space > ; + // Loop until all queues are empty and no tasks in flight - task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + task_root_type * task = 0 ; - // Required: team_size <= 8 + do { + // Each team lead attempts to acquire either a thread team task + // or a single thread task for the team. - const int team_size = PoolExec::pool_size(2); // Threads per core - // const int team_size = PoolExec::pool_size(1); // Threads per NUMA + if ( 0 == team_exec.team_rank() ) { - if ( 8 < team_size ) { - Kokkos::abort("TaskQueue<OpenMP> unsupported team size"); - } + bool leader_loop = false ; -#pragma omp parallel - { - PoolExec & self = *PoolExec::get_thread_omp(); + do { - Member single_exec ; - Member team_exec( self , team_size ); + if ( 0 != task && end != task ) { + // team member #0 completes the previously executed task, + // completion may delete the task + queue->complete( task ); + } - // Team shared memory - task_root_type * volatile * const task_shared = - (task_root_type **) team_exec.m_team_exec->scratch_thread(); + // If 0 == m_ready_count then set task = 0 -// Barrier across entire OpenMP thread pool to insure initialization -#pragma omp barrier + task = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; - // Loop until all queues are empty and no tasks in flight + // Attempt to acquire a task + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); + } + } - do { + // If still tasks are still executing + // and no task could be acquired + // then continue this leader loop + leader_loop = end == task ; - task_root_type * task = 0 ; + if ( ( ! leader_loop ) && + ( 0 != task ) && + ( task_root_type::TaskSingle == task->m_task_type ) ) { - // Each team lead attempts to acquire either a thread team task - // or a single thread task for the team. + // if a single thread task then execute now - if ( 0 == team_exec.team_rank() ) { +#if 0 +fprintf(stdout,"TaskQueue<OpenMP> pool(%d of %d) executing single task 0x%lx\n" + , self.pool_rank() + , self.pool_size() + , int64_t(task) + ); +fflush(stdout); +#endif - task = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; + (*task->m_apply)( task , & single_exec ); - // Loop by priority and then type - for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { - for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_task( & queue->m_ready[i][j] ); - } + leader_loop = true ; + } + } while ( leader_loop ); } - } - - // Team lead broadcast acquired task to team members: - - if ( 1 < team_exec.team_size() ) { - - if ( 0 == team_exec.team_rank() ) *task_shared = task ; - - // Fence to be sure task_shared is stored before the barrier - Kokkos::memory_fence(); - // Whole team waits for every team member to reach this statement - team_exec.team_barrier(); + // Team lead either found 0 == m_ready_count or a team task + // Team lead broadcast acquired task: - // Fence to be sure task_shared is stored - Kokkos::memory_fence(); + team_exec.team_broadcast( task , 0); - task = *task_shared ; - } + if ( 0 != task ) { // Thread Team Task #if 0 -fprintf( stdout - , "\nexecute group(%d) member(%d) task_shared(0x%lx) task(0x%lx)\n" - , team_exec.m_group_rank - , team_exec.m_team_rank - , uintptr_t(task_shared) - , uintptr_t(task) +fprintf(stdout,"TaskQueue<OpenMP> pool(%d of %d) team((%d of %d) league(%d of %d) executing team task 0x%lx\n" + , self.pool_rank() + , self.pool_size() + , team_exec.team_rank() + , team_exec.team_size() + , team_exec.league_rank() + , team_exec.league_size() + , int64_t(task) ); fflush(stdout); #endif - if ( 0 == task ) break ; // 0 == m_ready_count - - if ( end == task ) { - // All team members wait for whole team to reach this statement. - // Is necessary to prevent task_shared from being updated - // before it is read by all threads. - team_exec.team_barrier(); - } - else if ( task_root_type::TaskTeam == task->m_task_type ) { - // Thread Team Task - (*task->m_apply)( task , & team_exec ); + (*task->m_apply)( task , & team_exec ); - // The m_apply function performs a barrier - - if ( 0 == team_exec.team_rank() ) { - // team member #0 completes the task, which may delete the task - queue->complete( task ); + // The m_apply function performs a barrier } - } - else { - // Single Thread Task + } while( 0 != task ); - if ( 0 == team_exec.team_rank() ) { +#if 0 +fprintf(stdout,"TaskQueue<OpenMP> pool(%d of %d) team(%d of %d) league(%d of %d) ending\n" + , self.pool_rank() + , self.pool_size() + , team_exec.team_rank() + , team_exec.team_size() + , team_exec.league_rank() + , team_exec.league_size() + ); +fflush(stdout); +#endif - (*task->m_apply)( task , & single_exec ); + } - queue->complete( task ); - } + self.disband_team(); + +#if 0 +fprintf(stdout,"TaskQueue<OpenMP> pool(%d of %d) disbanded\n" + , self.pool_rank() + , self.pool_size() + ); +fflush(stdout); +#endif - // All team members wait for whole team to reach this statement. - // Not necessary to complete the task. - // Is necessary to prevent task_shared from being updated - // before it is read by all threads. - team_exec.team_barrier(); - } - } while(1); } // END #pragma omp parallel +#if 0 +fprintf(stdout,"TaskQueue<OpenMP> execute %d end\n", team_size ); +fflush(stdout); +#endif + } void TaskQueueSpecialization< Kokkos::OpenMP >:: @@ -289,13 +273,16 @@ void TaskQueueSpecialization< Kokkos::OpenMP >:: using execution_space = Kokkos::OpenMP ; using queue_type = TaskQueue< execution_space > ; using task_root_type = TaskBase< execution_space , void , void > ; - using Member = TaskExec< execution_space > ; + using Member = Impl::HostThreadTeamMember< execution_space > ; if ( 1 == omp_get_num_threads() ) { task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - Member single_exec ; + HostThreadTeamData & team_data_single = + HostThreadTeamDataSingleton::singleton(); + + Member single_exec( team_data_single ); task_root_type * task = end ; @@ -306,7 +293,7 @@ void TaskQueueSpecialization< Kokkos::OpenMP >:: // Loop by priority and then type for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_task( & queue->m_ready[i][j] ); + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); } } diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index 15dbb77c26c7432497417b0b27508b00d3d717af..3cfdf790bfb75165b936ce547828fd7f248f0b00 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -60,6 +60,7 @@ public: using execution_space = Kokkos::OpenMP ; using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ; + using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ; // Must specify memory space using memory_space = Kokkos::HostSpace ; @@ -70,296 +71,19 @@ public: // Must provide task queue execution function static void execute( queue_type * const ); - // Must provide mechanism to set function pointer in - // execution space from the host process. - template< typename FunctorType > + template< typename TaskType > static - void proc_set_apply( task_base_type::function_type * ptr ) - { - using TaskType = TaskBase< Kokkos::OpenMP - , typename FunctorType::value_type - , FunctorType - > ; - *ptr = TaskType::apply ; - } + typename TaskType::function_type + get_function_pointer() { return TaskType::apply ; } }; extern template class TaskQueue< Kokkos::OpenMP > ; -//---------------------------------------------------------------------------- - -template<> -class TaskExec< Kokkos::OpenMP > -{ -private: - - TaskExec( TaskExec && ) = delete ; - TaskExec( TaskExec const & ) = delete ; - TaskExec & operator = ( TaskExec && ) = delete ; - TaskExec & operator = ( TaskExec const & ) = delete ; - - - using PoolExec = Kokkos::Impl::OpenMPexec ; - - friend class Kokkos::Impl::TaskQueue< Kokkos::OpenMP > ; - friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::OpenMP > ; - - PoolExec * const m_self_exec ; ///< This thread's thread pool data structure - PoolExec * const m_team_exec ; ///< Team thread's thread pool data structure - int64_t m_sync_mask ; - int64_t mutable m_sync_value ; - int mutable m_sync_step ; - int m_group_rank ; ///< Which "team" subset of thread pool - int m_team_rank ; ///< Which thread within a team - int m_team_size ; - - TaskExec(); - TaskExec( PoolExec & arg_exec , int arg_team_size ); - - void team_barrier_impl() const ; - -public: - -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - void * team_shared() const - { return m_team_exec ? m_team_exec->scratch_thread() : (void*) 0 ; } - - int team_shared_size() const - { return m_team_exec ? m_team_exec->scratch_thread_size() : 0 ; } - - /**\brief Whole team enters this function call - * before any teeam member returns from - * this function call. - */ - void team_barrier() const { if ( 1 < m_team_size ) team_barrier_impl(); } -#else - KOKKOS_INLINE_FUNCTION void team_barrier() const {} - KOKKOS_INLINE_FUNCTION void * team_shared() const { return 0 ; } - KOKKOS_INLINE_FUNCTION int team_shared_size() const { return 0 ; } -#endif - - KOKKOS_INLINE_FUNCTION - int team_rank() const { return m_team_rank ; } - - KOKKOS_INLINE_FUNCTION - int team_size() const { return m_team_size ; } -}; - }} /* namespace Kokkos::Impl */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { - -template<typename iType> -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > > -TeamThreadRange - ( Impl::TaskExec< Kokkos::OpenMP > & thread, const iType & count ) -{ - return Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >(thread,count); -} - -template<typename iType1, typename iType2> -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::TaskExec< Kokkos::OpenMP > > -TeamThreadRange - ( Impl:: TaskExec< Kokkos::OpenMP > & thread, const iType1 & begin, const iType2 & end ) -{ - typedef typename std::common_type<iType1, iType2>::type iType; - return Impl::TeamThreadRangeBoundariesStruct<iType, Impl::TaskExec< Kokkos::OpenMP > >(thread, begin, end); -} - -template<typename iType> -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > > -ThreadVectorRange - ( Impl::TaskExec< Kokkos::OpenMP > & thread - , const iType & count ) -{ - return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >(thread,count); -} - -/** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support. -*/ -template<typename iType, class Lambda> -KOKKOS_INLINE_FUNCTION -void parallel_for - ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::OpenMP > >& loop_boundaries - , const Lambda& lambda - ) -{ - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i); - } -} - -template<typename iType, class Lambda, typename ValueType> -KOKKOS_INLINE_FUNCTION -void parallel_reduce - ( const Impl::TeamThreadRangeBoundariesStruct<iType,Impl:: TaskExec< Kokkos::OpenMP > >& loop_boundaries - , const Lambda& lambda - , ValueType& initialized_result) -{ - int team_rank = loop_boundaries.thread.team_rank(); // member num within the team - ValueType result = initialized_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i, result); - } - - if ( 1 < loop_boundaries.thread.team_size() ) { - - ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); - - loop_boundaries.thread.team_barrier(); - shared[team_rank] = result; - - loop_boundaries.thread.team_barrier(); - - // reduce across threads to thread 0 - if (team_rank == 0) { - for (int i = 1; i < loop_boundaries.thread.team_size(); i++) { - shared[0] += shared[i]; - } - } - - loop_boundaries.thread.team_barrier(); - - // broadcast result - initialized_result = shared[0]; - } - else { - initialized_result = result ; - } -} - -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >& loop_boundaries, - const Lambda & lambda, - const JoinType & join, - ValueType& initialized_result) -{ - int team_rank = loop_boundaries.thread.team_rank(); // member num within the team - ValueType result = initialized_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i, result); - } - - if ( 1 < loop_boundaries.thread.team_size() ) { - ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); - - loop_boundaries.thread.team_barrier(); - shared[team_rank] = result; - - loop_boundaries.thread.team_barrier(); - - // reduce across threads to thread 0 - if (team_rank == 0) { - for (int i = 1; i < loop_boundaries.thread.team_size(); i++) { - join(shared[0], shared[i]); - } - } - - loop_boundaries.thread.team_barrier(); - - // broadcast result - initialized_result = shared[0]; - } - else { - initialized_result = result ; - } -} - -// placeholder for future function -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >& loop_boundaries, - const Lambda & lambda, - ValueType& initialized_result) -{ -} - -// placeholder for future function -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >& loop_boundaries, - const Lambda & lambda, - const JoinType & join, - ValueType& initialized_result) -{ -} - -template< typename ValueType, typename iType, class Lambda > -KOKKOS_INLINE_FUNCTION -void parallel_scan - (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >& loop_boundaries, - const Lambda & lambda) -{ - ValueType accum = 0 ; - ValueType val, local_total; - ValueType *shared = (ValueType*) loop_boundaries.thread.team_shared(); - int team_size = loop_boundaries.thread.team_size(); - int team_rank = loop_boundaries.thread.team_rank(); // member num within the team - - // Intra-member scan - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - local_total = 0; - lambda(i,local_total,false); - val = accum; - lambda(i,val,true); - accum += local_total; - } - - shared[team_rank] = accum; - loop_boundaries.thread.team_barrier(); - - // Member 0 do scan on accumulated totals - if (team_rank == 0) { - for( iType i = 1; i < team_size; i+=1) { - shared[i] += shared[i-1]; - } - accum = 0; // Member 0 set accum to 0 in preparation for inter-member scan - } - - loop_boundaries.thread.team_barrier(); - - // Inter-member scan adding in accumulated totals - if (team_rank != 0) { accum = shared[team_rank-1]; } - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - local_total = 0; - lambda(i,local_total,false); - val = accum; - lambda(i,val,true); - accum += local_total; - } -} - -// placeholder for future function -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_scan - (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::OpenMP > >& loop_boundaries, - const Lambda & lambda) -{ -} - - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_OPENMP_TASK_HPP */ diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp index 34cf581a4796feb2e8b3d8a3f57343148ac955d9..2d50c6e54886087deea707d0dbb155566ed51428 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp @@ -86,7 +86,7 @@ int OpenMPexec::m_map_rank[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; int OpenMPexec::m_pool_topo[ 4 ] = { 0 }; -OpenMPexec * OpenMPexec::m_pool[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; +HostThreadTeamData * OpenMPexec::m_pool[ OpenMPexec::MAX_THREAD_COUNT ] = { 0 }; void OpenMPexec::verify_is_process( const char * const label ) { @@ -113,67 +113,110 @@ void OpenMPexec::verify_initialized( const char * const label ) } -void OpenMPexec::clear_scratch() +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +void OpenMPexec::clear_thread_data() { + const size_t member_bytes = + sizeof(int64_t) * + HostThreadTeamData::align_to_int64( sizeof(HostThreadTeamData) ); + + const int old_alloc_bytes = + m_pool[0] ? ( member_bytes + m_pool[0]->scratch_bytes() ) : 0 ; + + Kokkos::HostSpace space ; + #pragma omp parallel { - const int rank_rev = m_map_rank[ omp_get_thread_num() ]; - typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ; - if ( m_pool[ rank_rev ] ) { - Record * const r = Record::get_record( m_pool[ rank_rev ] ); - m_pool[ rank_rev ] = 0 ; - Record::decrement( r ); + const int rank = m_map_rank[ omp_get_thread_num() ]; + + if ( 0 != m_pool[rank] ) { + + m_pool[rank]->disband_pool(); + + space.deallocate( m_pool[rank] , old_alloc_bytes ); + + m_pool[rank] = 0 ; } } /* END #pragma omp parallel */ } -void OpenMPexec::resize_scratch( size_t reduce_size , size_t thread_size ) +void OpenMPexec::resize_thread_data( size_t pool_reduce_bytes + , size_t team_reduce_bytes + , size_t team_shared_bytes + , size_t thread_local_bytes ) { - enum { ALIGN_MASK = Kokkos::Impl::MEMORY_ALIGNMENT - 1 }; - enum { ALLOC_EXEC = ( sizeof(OpenMPexec) + ALIGN_MASK ) & ~ALIGN_MASK }; + const size_t member_bytes = + sizeof(int64_t) * + HostThreadTeamData::align_to_int64( sizeof(HostThreadTeamData) ); - const size_t old_reduce_size = m_pool[0] ? m_pool[0]->m_scratch_reduce_end : 0 ; - const size_t old_thread_size = m_pool[0] ? m_pool[0]->m_scratch_thread_end - m_pool[0]->m_scratch_reduce_end : 0 ; + HostThreadTeamData * root = m_pool[0] ; - reduce_size = ( reduce_size + ALIGN_MASK ) & ~ALIGN_MASK ; - thread_size = ( thread_size + ALIGN_MASK ) & ~ALIGN_MASK ; + const size_t old_pool_reduce = root ? root->pool_reduce_bytes() : 0 ; + const size_t old_team_reduce = root ? root->team_reduce_bytes() : 0 ; + const size_t old_team_shared = root ? root->team_shared_bytes() : 0 ; + const size_t old_thread_local = root ? root->thread_local_bytes() : 0 ; + const size_t old_alloc_bytes = root ? ( member_bytes + root->scratch_bytes() ) : 0 ; - // Requesting allocation and old allocation is too small: + // Allocate if any of the old allocation is tool small: - const bool allocate = ( old_reduce_size < reduce_size ) || - ( old_thread_size < thread_size ); + const bool allocate = ( old_pool_reduce < pool_reduce_bytes ) || + ( old_team_reduce < team_reduce_bytes ) || + ( old_team_shared < team_shared_bytes ) || + ( old_thread_local < thread_local_bytes ); if ( allocate ) { - if ( reduce_size < old_reduce_size ) { reduce_size = old_reduce_size ; } - if ( thread_size < old_thread_size ) { thread_size = old_thread_size ; } - } - const size_t alloc_size = allocate ? ALLOC_EXEC + reduce_size + thread_size : 0 ; - const int pool_size = m_pool_topo[0] ; + if ( pool_reduce_bytes < old_pool_reduce ) { pool_reduce_bytes = old_pool_reduce ; } + if ( team_reduce_bytes < old_team_reduce ) { team_reduce_bytes = old_team_reduce ; } + if ( team_shared_bytes < old_team_shared ) { team_shared_bytes = old_team_shared ; } + if ( thread_local_bytes < old_thread_local ) { thread_local_bytes = old_thread_local ; } - if ( allocate ) { + const size_t alloc_bytes = + member_bytes + + HostThreadTeamData::scratch_size( pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes ); + + const int pool_size = omp_get_max_threads(); - clear_scratch(); + Kokkos::HostSpace space ; #pragma omp parallel { - const int rank_rev = m_map_rank[ omp_get_thread_num() ]; - const int rank = pool_size - ( rank_rev + 1 ); + const int rank = m_map_rank[ omp_get_thread_num() ]; - typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ; + if ( 0 != m_pool[rank] ) { - Record * const r = Record::allocate( Kokkos::HostSpace() - , "openmp_scratch" - , alloc_size ); + m_pool[rank]->disband_pool(); - Record::increment( r ); + space.deallocate( m_pool[rank] , old_alloc_bytes ); + } + + void * const ptr = space.allocate( alloc_bytes ); - m_pool[ rank_rev ] = reinterpret_cast<OpenMPexec*>( r->data() ); + m_pool[ rank ] = new( ptr ) HostThreadTeamData(); - new ( m_pool[ rank_rev ] ) OpenMPexec( rank , ALLOC_EXEC , reduce_size , thread_size ); + m_pool[ rank ]-> + scratch_assign( ((char *)ptr) + member_bytes + , alloc_bytes + , pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes ); } /* END #pragma omp parallel */ + + HostThreadTeamData::organize_pool( m_pool , pool_size ); } } @@ -197,14 +240,14 @@ void OpenMP::initialize( unsigned thread_count , // Before any other call to OMP query the maximum number of threads // and save the value for re-initialization unit testing. - //Using omp_get_max_threads(); is problematic in conjunction with - //Hwloc on Intel (essentially an initial call to the OpenMP runtime - //without a parallel region before will set a process mask for a single core - //The runtime will than bind threads for a parallel region to other cores on the - //entering the first parallel region and make the process mask the aggregate of - //the thread masks. The intend seems to be to make serial code run fast, if you - //compile with OpenMP enabled but don't actually use parallel regions or so - //static int omp_max_threads = omp_get_max_threads(); + // Using omp_get_max_threads(); is problematic in conjunction with + // Hwloc on Intel (essentially an initial call to the OpenMP runtime + // without a parallel region before will set a process mask for a single core + // The runtime will than bind threads for a parallel region to other cores on the + // entering the first parallel region and make the process mask the aggregate of + // the thread masks. The intend seems to be to make serial code run fast, if you + // compile with OpenMP enabled but don't actually use parallel regions or so + // static int omp_max_threads = omp_get_max_threads(); int nthreads = 0; #pragma omp parallel { @@ -268,8 +311,6 @@ void OpenMP::initialize( unsigned thread_count , // Call to 'bind_this_thread' is not thread safe so place this whole block in a critical region. // Call to 'new' may not be thread safe as well. - // Reverse the rank for threads so that the scan operation reduces to the highest rank thread. - const unsigned omp_rank = omp_get_thread_num(); const unsigned thread_r = Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads() ? Kokkos::hwloc::bind_this_thread( thread_count , threads_coord ) @@ -286,7 +327,19 @@ void OpenMP::initialize( unsigned thread_count , Impl::OpenMPexec::m_pool_topo[1] = Impl::s_using_hwloc ? thread_count / use_numa_count : thread_count; Impl::OpenMPexec::m_pool_topo[2] = Impl::s_using_hwloc ? thread_count / ( use_numa_count * use_cores_per_numa ) : 1; - Impl::OpenMPexec::resize_scratch( 1024 , 1024 ); + // New, unified host thread team data: + { + size_t pool_reduce_bytes = 32 * thread_count ; + size_t team_reduce_bytes = 32 * thread_count ; + size_t team_shared_bytes = 1024 * thread_count ; + size_t thread_local_bytes = 1024 ; + + Impl::OpenMPexec::resize_thread_data( pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes + ); + } } } @@ -309,7 +362,7 @@ void OpenMP::initialize( unsigned thread_count , // Init the array for used for arbitrarily sized atomics Impl::init_lock_array_host_space(); - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #endif } @@ -321,7 +374,8 @@ void OpenMP::finalize() Impl::OpenMPexec::verify_initialized( "OpenMP::finalize" ); Impl::OpenMPexec::verify_is_process( "OpenMP::finalize" ); - Impl::OpenMPexec::clear_scratch(); + // New, unified host thread team data: + Impl::OpenMPexec::clear_thread_data(); Impl::OpenMPexec::m_pool_topo[0] = 0 ; Impl::OpenMPexec::m_pool_topo[1] = 0 ; @@ -333,7 +387,7 @@ void OpenMP::finalize() hwloc::unbind_this_thread(); } - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::finalize(); #endif } diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp index 63f7234da3a81a5e040f76e264377156cf024bb0..39ace3131927d8071c50fc44dedb046bf598f0de 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp @@ -44,13 +44,22 @@ #ifndef KOKKOS_OPENMPEXEC_HPP #define KOKKOS_OPENMPEXEC_HPP +#include <Kokkos_OpenMP.hpp> + #include <impl/Kokkos_Traits.hpp> -#include <impl/Kokkos_spinwait.hpp> +#include <impl/Kokkos_HostThreadTeam.hpp> #include <Kokkos_Atomic.hpp> + #include <iostream> #include <sstream> #include <fstream> + +#include <omp.h> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + namespace Kokkos { namespace Impl { @@ -60,41 +69,19 @@ namespace Impl { class OpenMPexec { public: + friend class Kokkos::OpenMP ; + enum { MAX_THREAD_COUNT = 4096 }; private: - static OpenMPexec * m_pool[ MAX_THREAD_COUNT ]; // Indexed by: m_pool_rank_rev - static int m_pool_topo[ 4 ]; static int m_map_rank[ MAX_THREAD_COUNT ]; - friend class Kokkos::OpenMP ; - - int const m_pool_rank ; - int const m_pool_rank_rev ; - int const m_scratch_exec_end ; - int const m_scratch_reduce_end ; - int const m_scratch_thread_end ; - - int volatile m_barrier_state ; - - // Members for dynamic scheduling - // Which thread am I stealing from currently - int m_current_steal_target; - // This thread's owned work_range - Kokkos::pair<long,long> m_work_range KOKKOS_ALIGN(16); - // Team Offset if one thread determines work_range for others - long m_team_work_index; + static HostThreadTeamData * m_pool[ MAX_THREAD_COUNT ]; - // Is this thread stealing (i.e. its owned work_range is exhausted - bool m_stealing; - - OpenMPexec(); - OpenMPexec( const OpenMPexec & ); - OpenMPexec & operator = ( const OpenMPexec & ); - - static void clear_scratch(); + static + void clear_thread_data(); public: @@ -108,47 +95,9 @@ public: inline static int pool_size( int depth = 0 ) { return m_pool_topo[ depth ]; } - inline static - OpenMPexec * pool_rev( int pool_rank_rev ) { return m_pool[ pool_rank_rev ]; } - - inline int pool_rank() const { return m_pool_rank ; } - inline int pool_rank_rev() const { return m_pool_rank_rev ; } - - inline long team_work_index() const { return m_team_work_index ; } - - inline int scratch_reduce_size() const - { return m_scratch_reduce_end - m_scratch_exec_end ; } - - inline int scratch_thread_size() const - { return m_scratch_thread_end - m_scratch_reduce_end ; } - - inline void * scratch_reduce() const { return ((char *) this) + m_scratch_exec_end ; } - inline void * scratch_thread() const { return ((char *) this) + m_scratch_reduce_end ; } - - inline - void state_wait( int state ) - { Impl::spinwait( m_barrier_state , state ); } - - inline - void state_set( int state ) { m_barrier_state = state ; } - - ~OpenMPexec() {} - - OpenMPexec( const int arg_poolRank - , const int arg_scratch_exec_size - , const int arg_scratch_reduce_size - , const int arg_scratch_thread_size ) - : m_pool_rank( arg_poolRank ) - , m_pool_rank_rev( pool_size() - ( arg_poolRank + 1 ) ) - , m_scratch_exec_end( arg_scratch_exec_size ) - , m_scratch_reduce_end( m_scratch_exec_end + arg_scratch_reduce_size ) - , m_scratch_thread_end( m_scratch_reduce_end + arg_scratch_thread_size ) - , m_barrier_state(0) - {} - static void finalize(); - static void initialize( const unsigned team_count , + static void initialize( const unsigned team_count , const unsigned threads_per_team , const unsigned numa_count , const unsigned cores_per_numa ); @@ -156,133 +105,20 @@ public: static void verify_is_process( const char * const ); static void verify_initialized( const char * const ); - static void resize_scratch( size_t reduce_size , size_t thread_size ); - inline static - OpenMPexec * get_thread_omp() { return m_pool[ m_map_rank[ omp_get_thread_num() ] ]; } + static + void resize_thread_data( size_t pool_reduce_bytes + , size_t team_reduce_bytes + , size_t team_shared_bytes + , size_t thread_local_bytes ); - /* Dynamic Scheduling related functionality */ - // Initialize the work range for this thread - inline void set_work_range(const long& begin, const long& end, const long& chunk_size) { - m_work_range.first = (begin+chunk_size-1)/chunk_size; - m_work_range.second = end>0?(end+chunk_size-1)/chunk_size:m_work_range.first; - } - - // Claim and index from this thread's range from the beginning - inline long get_work_index_begin () { - Kokkos::pair<long,long> work_range_new = m_work_range; - Kokkos::pair<long,long> work_range_old = work_range_new; - if(work_range_old.first>=work_range_old.second) - return -1; - - work_range_new.first+=1; - - bool success = false; - while(!success) { - work_range_new = Kokkos::atomic_compare_exchange(&m_work_range,work_range_old,work_range_new); - success = ( (work_range_new == work_range_old) || - (work_range_new.first>=work_range_new.second)); - work_range_old = work_range_new; - work_range_new.first+=1; - } - if(work_range_old.first<work_range_old.second) - return work_range_old.first; - else - return -1; - } - - // Claim and index from this thread's range from the end - inline long get_work_index_end () { - Kokkos::pair<long,long> work_range_new = m_work_range; - Kokkos::pair<long,long> work_range_old = work_range_new; - if(work_range_old.first>=work_range_old.second) - return -1; - work_range_new.second-=1; - bool success = false; - while(!success) { - work_range_new = Kokkos::atomic_compare_exchange(&m_work_range,work_range_old,work_range_new); - success = ( (work_range_new == work_range_old) || - (work_range_new.first>=work_range_new.second) ); - work_range_old = work_range_new; - work_range_new.second-=1; - } - if(work_range_old.first<work_range_old.second) - return work_range_old.second-1; - else - return -1; - } - - // Reset the steal target - inline void reset_steal_target() { - m_current_steal_target = (m_pool_rank+1)%m_pool_topo[0]; - m_stealing = false; - } - - // Reset the steal target - inline void reset_steal_target(int team_size) { - m_current_steal_target = (m_pool_rank_rev+team_size); - if(m_current_steal_target>=m_pool_topo[0]) - m_current_steal_target = 0;//m_pool_topo[0]-1; - m_stealing = false; - } - - // Get a steal target; start with my-rank + 1 and go round robin, until arriving at this threads rank - // Returns -1 fi no active steal target available - inline int get_steal_target() { - while(( m_pool[m_current_steal_target]->m_work_range.second <= - m_pool[m_current_steal_target]->m_work_range.first ) && - (m_current_steal_target!=m_pool_rank) ) { - m_current_steal_target = (m_current_steal_target+1)%m_pool_topo[0]; - } - if(m_current_steal_target == m_pool_rank) - return -1; - else - return m_current_steal_target; - } - - inline int get_steal_target(int team_size) { - - while(( m_pool[m_current_steal_target]->m_work_range.second <= - m_pool[m_current_steal_target]->m_work_range.first ) && - (m_current_steal_target!=m_pool_rank_rev) ) { - if(m_current_steal_target + team_size < m_pool_topo[0]) - m_current_steal_target = (m_current_steal_target+team_size); - else - m_current_steal_target = 0; - } - - if(m_current_steal_target == m_pool_rank_rev) - return -1; - else - return m_current_steal_target; - } - - inline long steal_work_index (int team_size = 0) { - long index = -1; - int steal_target = team_size>0?get_steal_target(team_size):get_steal_target(); - while ( (steal_target != -1) && (index == -1)) { - index = m_pool[steal_target]->get_work_index_end(); - if(index == -1) - steal_target = team_size>0?get_steal_target(team_size):get_steal_target(); - } - return index; - } - - // Get a work index. Claim from owned range until its exhausted, then steal from other thread - inline long get_work_index (int team_size = 0) { - long work_index = -1; - if(!m_stealing) work_index = get_work_index_begin(); - - if( work_index == -1) { - memory_fence(); - m_stealing = true; - work_index = steal_work_index(team_size); - } - m_team_work_index = work_index; - memory_fence(); - return work_index; - } + inline static + HostThreadTeamData * get_thread_data() noexcept + { return m_pool[ m_map_rank[ omp_get_thread_num() ] ]; } + inline static + HostThreadTeamData * get_thread_data( int i ) noexcept + { return m_pool[i]; } }; } // namespace Impl @@ -294,356 +130,6 @@ public: namespace Kokkos { namespace Impl { -class OpenMPexecTeamMember { -public: - - enum { TEAM_REDUCE_SIZE = 512 }; - - /** \brief Thread states for team synchronization */ - enum { Active = 0 , Rendezvous = 1 }; - - typedef Kokkos::OpenMP execution_space ; - typedef execution_space::scratch_memory_space scratch_memory_space ; - - Impl::OpenMPexec & m_exec ; - scratch_memory_space m_team_shared ; - int m_team_scratch_size[2] ; - int m_team_base_rev ; - int m_team_rank_rev ; - int m_team_rank ; - int m_team_size ; - int m_league_rank ; - int m_league_end ; - int m_league_size ; - - int m_chunk_size; - int m_league_chunk_end; - Impl::OpenMPexec & m_team_lead_exec ; - int m_invalid_thread; - int m_team_alloc; - - // Fan-in team threads, root of the fan-in which does not block returns true - inline - bool team_fan_in() const - { - memory_fence(); - for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) { - - m_exec.pool_rev( m_team_base_rev + j )->state_wait( Active ); - } - - if ( m_team_rank_rev ) { - m_exec.state_set( Rendezvous ); - memory_fence(); - m_exec.state_wait( Rendezvous ); - } - - return 0 == m_team_rank_rev ; - } - - inline - void team_fan_out() const - { - memory_fence(); - for ( int n = 1 , j ; ( ( j = m_team_rank_rev + n ) < m_team_size ) && ! ( m_team_rank_rev & n ) ; n <<= 1 ) { - m_exec.pool_rev( m_team_base_rev + j )->state_set( Active ); - memory_fence(); - } - } - -public: - - KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space& team_shmem() const - { return m_team_shared.set_team_thread_mode(0,1,0) ; } - - KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space& team_scratch(int) const - { return m_team_shared.set_team_thread_mode(0,1,0) ; } - - KOKKOS_INLINE_FUNCTION - const execution_space::scratch_memory_space& thread_scratch(int) const - { return m_team_shared.set_team_thread_mode(0,team_size(),team_rank()) ; } - - KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } - KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } - KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; } - KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; } - - KOKKOS_INLINE_FUNCTION void team_barrier() const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - {} -#else - { - if ( 1 < m_team_size && !m_invalid_thread) { - team_fan_in(); - team_fan_out(); - } - } -#endif - - template<class ValueType> - KOKKOS_INLINE_FUNCTION - void team_broadcast(ValueType& value, const int& thread_id) const - { -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { } -#else - // Make sure there is enough scratch space: - typedef typename if_c< sizeof(ValueType) < TEAM_REDUCE_SIZE - , ValueType , void >::type type ; - - type volatile * const shared_value = - ((type*) m_exec.pool_rev( m_team_base_rev )->scratch_thread()); - - if ( team_rank() == thread_id ) *shared_value = value; - memory_fence(); - team_barrier(); // Wait for 'thread_id' to write - value = *shared_value ; - team_barrier(); // Wait for team members to read -#endif - } - - template< class ValueType, class JoinOp > - KOKKOS_INLINE_FUNCTION ValueType - team_reduce( const ValueType & value - , const JoinOp & op_in ) const - #if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return ValueType(); } - #else - { - memory_fence(); - typedef ValueType value_type; - const JoinLambdaAdapter<value_type,JoinOp> op(op_in); - #endif -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - // Make sure there is enough scratch space: - typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE - , value_type , void >::type type ; - - type * const local_value = ((type*) m_exec.scratch_thread()); - - // Set this thread's contribution - *local_value = value ; - - // Fence to make sure the base team member has access: - memory_fence(); - - if ( team_fan_in() ) { - // The last thread to synchronize returns true, all other threads wait for team_fan_out() - type * const team_value = ((type*) m_exec.pool_rev( m_team_base_rev )->scratch_thread()); - - // Join to the team value: - for ( int i = 1 ; i < m_team_size ; ++i ) { - op.join( *team_value , *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) ); - } - memory_fence(); - - // The base team member may "lap" the other team members, - // copy to their local value before proceeding. - for ( int i = 1 ; i < m_team_size ; ++i ) { - *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()) = *team_value ; - } - - // Fence to make sure all team members have access - memory_fence(); - } - - team_fan_out(); - - return *((type volatile const *)local_value); - } -#endif - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the - * league's parallel execution, be the scan's total. - * Parallel execution ordering of the league's teams is non-deterministic. - * As such the base value for each team's scan operation is similarly - * non-deterministic. - */ - template< typename ArgType > - KOKKOS_INLINE_FUNCTION ArgType team_scan( const ArgType & value , ArgType * const global_accum ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return ArgType(); } -#else - { - // Make sure there is enough scratch space: - typedef typename if_c< sizeof(ArgType) < TEAM_REDUCE_SIZE , ArgType , void >::type type ; - - volatile type * const work_value = ((type*) m_exec.scratch_thread()); - - *work_value = value ; - - memory_fence(); - - if ( team_fan_in() ) { - // The last thread to synchronize returns true, all other threads wait for team_fan_out() - // m_team_base[0] == highest ranking team member - // m_team_base[ m_team_size - 1 ] == lowest ranking team member - // - // 1) copy from lower to higher rank, initialize lowest rank to zero - // 2) prefix sum from lowest to highest rank, skipping lowest rank - - type accum = 0 ; - - if ( global_accum ) { - for ( int i = m_team_size ; i-- ; ) { - type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()); - accum += val ; - } - accum = atomic_fetch_add( global_accum , accum ); - } - - for ( int i = m_team_size ; i-- ; ) { - type & val = *((type*) m_exec.pool_rev( m_team_base_rev + i )->scratch_thread()); - const type offset = accum ; - accum += val ; - val = offset ; - } - - memory_fence(); - } - - team_fan_out(); - - return *work_value ; - } -#endif - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value ; - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const - { return this-> template team_scan<Type>( value , 0 ); } - - //---------------------------------------- - // Private for the driver - -private: - - typedef execution_space::scratch_memory_space space ; - -public: - - template< class ... Properties > - inline - OpenMPexecTeamMember( Impl::OpenMPexec & exec - , const TeamPolicyInternal< OpenMP, Properties ...> & team - , const int shmem_size_L1 - , const int shmem_size_L2 - ) - : m_exec( exec ) - , m_team_shared(0,0) - , m_team_scratch_size{ shmem_size_L1 , shmem_size_L2 } - , m_team_base_rev(0) - , m_team_rank_rev(0) - , m_team_rank(0) - , m_team_size( team.team_size() ) - , m_league_rank(0) - , m_league_end(0) - , m_league_size( team.league_size() ) - , m_chunk_size( team.chunk_size()>0?team.chunk_size():team.team_iter() ) - , m_league_chunk_end(0) - , m_team_lead_exec( *exec.pool_rev( team.team_alloc() * (m_exec.pool_rank_rev()/team.team_alloc()) )) - , m_team_alloc( team.team_alloc()) - { - const int pool_rank_rev = m_exec.pool_rank_rev(); - const int pool_team_rank_rev = pool_rank_rev % team.team_alloc(); - const int pool_league_rank_rev = pool_rank_rev / team.team_alloc(); - const int pool_num_teams = OpenMP::thread_pool_size(0)/team.team_alloc(); - const int chunks_per_team = ( team.league_size() + m_chunk_size*pool_num_teams-1 ) / (m_chunk_size*pool_num_teams); - int league_iter_end = team.league_size() - pool_league_rank_rev * chunks_per_team * m_chunk_size; - int league_iter_begin = league_iter_end - chunks_per_team * m_chunk_size; - if (league_iter_begin < 0) league_iter_begin = 0; - if (league_iter_end>team.league_size()) league_iter_end = team.league_size(); - - if ((team.team_alloc()>m_team_size)? - (pool_team_rank_rev >= m_team_size): - (m_exec.pool_size() - pool_num_teams*m_team_size > m_exec.pool_rank()) - ) - m_invalid_thread = 1; - else - m_invalid_thread = 0; - - m_team_rank_rev = pool_team_rank_rev ; - if ( pool_team_rank_rev < m_team_size && !m_invalid_thread ) { - m_team_base_rev = team.team_alloc() * pool_league_rank_rev ; - m_team_rank_rev = pool_team_rank_rev ; - m_team_rank = m_team_size - ( m_team_rank_rev + 1 ); - m_league_end = league_iter_end ; - m_league_rank = league_iter_begin ; - new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_scratch_size[0] , - ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE + m_team_scratch_size[0], - 0 ); - } - - if ( (m_team_rank_rev == 0) && (m_invalid_thread == 0) ) { - m_exec.set_work_range(m_league_rank,m_league_end,m_chunk_size); - m_exec.reset_steal_target(m_team_size); - } - } - - bool valid_static() const - { - return m_league_rank < m_league_end ; - } - - void next_static() - { - if ( m_league_rank < m_league_end ) { - team_barrier(); - new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_scratch_size[0] , - ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE + m_team_scratch_size[0], - 0); - } - m_league_rank++; - } - - bool valid_dynamic() { - if(m_invalid_thread) - return false; - if ((m_league_rank < m_league_chunk_end) && (m_league_rank < m_league_size)) { - return true; - } - - if ( m_team_rank_rev == 0 ) { - m_team_lead_exec.get_work_index(m_team_alloc); - } - team_barrier(); - - long work_index = m_team_lead_exec.team_work_index(); - - m_league_rank = work_index * m_chunk_size; - m_league_chunk_end = (work_index +1 ) * m_chunk_size; - - if(m_league_chunk_end > m_league_size) m_league_chunk_end = m_league_size; - - if(m_league_rank>=0) - return true; - return false; - } - - void next_dynamic() { - if(m_invalid_thread) - return; - - if ( m_league_rank < m_league_chunk_end ) { - team_barrier(); - new( (void*) &m_team_shared ) space( ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE , m_team_scratch_size[0] , - ( (char*) m_exec.pool_rev(m_team_base_rev)->scratch_thread() ) + TEAM_REDUCE_SIZE + m_team_scratch_size[0], - 0); - } - m_league_rank++; - } - - static inline int team_reduce_size() { return TEAM_REDUCE_SIZE ; } -}; - template< class ... Properties > class TeamPolicyInternal< Kokkos::OpenMP, Properties ... >: public PolicyTraits<Properties ...> { @@ -671,8 +157,11 @@ public: template< class FunctorType > inline static - int team_size_max( const FunctorType & ) - { return traits::execution_space::thread_pool_size(1); } + int team_size_max( const FunctorType & ) { + int pool_size = traits::execution_space::thread_pool_size(1); + int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + return pool_size<max_host_team_size?pool_size:max_host_team_size; + } template< class FunctorType > inline static @@ -702,7 +191,8 @@ private: , const int team_size_request ) { const int pool_size = traits::execution_space::thread_pool_size(0); - const int team_max = traits::execution_space::thread_pool_size(1); + const int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + const int team_max = pool_size<max_host_team_size?pool_size:max_host_team_size; const int team_grain = traits::execution_space::thread_pool_size(2); m_league_size = league_size_request ; @@ -823,7 +313,7 @@ private: } public: - typedef Impl::OpenMPexecTeamMember member_type ; + typedef Impl::HostThreadTeamMember< Kokkos::OpenMP > member_type ; }; } // namespace Impl @@ -850,216 +340,6 @@ int OpenMP::thread_pool_rank() #endif } -template< typename iType > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember > -TeamThreadRange( const Impl::OpenMPexecTeamMember& thread, const iType& count ) { - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember >( thread, count ); -} - -template< typename iType1, typename iType2 > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::OpenMPexecTeamMember > -TeamThreadRange( const Impl::OpenMPexecTeamMember& thread, const iType1& begin, const iType2& end ) { - typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::OpenMPexecTeamMember >( thread, iType(begin), iType(end) ); -} - -template<typename iType> -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember > -ThreadVectorRange(const Impl::OpenMPexecTeamMember& thread, const iType& count) { - return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >(thread,count); -} - -KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember> PerTeam(const Impl::OpenMPexecTeamMember& thread) { - return Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>(thread); -} - -KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember> PerThread(const Impl::OpenMPexecTeamMember& thread) { - return Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>(thread); -} - } // namespace Kokkos -namespace Kokkos { - - /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support.*/ -template<typename iType, class Lambda> -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries, const Lambda& lambda) { - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -} - -/** \brief Inter-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team and a summation of - * val is performed and put into result. This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries, - const Lambda & lambda, ValueType& result) { - - result = ValueType(); - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - result+=tmp; - } - - result = loop_boundaries.thread.team_reduce(result,Impl::JoinAdd<ValueType>()); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember>& loop_boundaries, - const Lambda & lambda, const JoinType& join, ValueType& init_result) { - - ValueType result = init_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - - init_result = loop_boundaries.thread.team_reduce(result,join); -} - -} //namespace Kokkos - -namespace Kokkos { -/** \brief Intra-thread vector parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread. - * This functionality requires C++11 support.*/ -template<typename iType, class Lambda> -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >& - loop_boundaries, const Lambda& lambda) { - #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP - #pragma ivdep - #endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a summation of - * val is performed and put into result. This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >& - loop_boundaries, const Lambda & lambda, ValueType& result) { - result = ValueType(); -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - result+=tmp; - } -} - -/** \brief Intra-thread vector parallel_reduce. Executes lambda(iType i, ValueType & val) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes of the the calling thread and a reduction of - * val is performed using JoinType(ValueType& val, const ValueType& update) and put into init_result. - * The input value of init_result is used as initializer for temporary variables of ValueType. Therefore - * the input value should be the neutral element with respect to the join operation (e.g. '0 for +-' or - * '1 for *'). This functionality requires C++11 support.*/ -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >& - loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { - - ValueType result = init_result; -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - init_result = result; -} - -/** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) - * for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all vector lanes in the thread and a scan operation is performed. - * Depending on the target execution space the operator might be called twice: once with final=false - * and once with final=true. When final==true val contains the prefix sum value. The contribution of this - * "i" needs to be added to val no matter whether final==true or not. In a serial execution - * (i.e. team_size==1) the operator is only called once with final==true. Scan_val will be set - * to the final sum value over all vector lanes. - * This functionality requires C++11 support.*/ -template< typename iType, class FunctorType > -KOKKOS_INLINE_FUNCTION -void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::OpenMPexecTeamMember >& - loop_boundaries, const FunctorType & lambda) { - - typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; - typedef typename ValueTraits::value_type value_type ; - - value_type scan_val = value_type(); - -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - lambda(i,scan_val,true); - } -} - -} // namespace Kokkos - -namespace Kokkos { - -template<class FunctorType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda) { - lambda(); -} - -template<class FunctorType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda) { - if(single_struct.team_member.team_rank()==0) lambda(); -} - -template<class FunctorType, class ValueType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { - lambda(val); -} - -template<class FunctorType, class ValueType> -KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct<Impl::OpenMPexecTeamMember>& single_struct, const FunctorType& lambda, ValueType& val) { - if(single_struct.team_member.team_rank()==0) { - lambda(val); - } - single_struct.team_member.team_broadcast(val,0); -} -} - #endif /* #ifndef KOKKOS_OPENMPEXEC_HPP */ diff --git a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp b/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp deleted file mode 100644 index b4df5e35bb7897b7e7bdf76acb4f2bc4d9a9fe77..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp +++ /dev/null @@ -1,511 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <Kokkos_Core_fwd.hpp> - -#if defined( KOKKOS_ENABLE_QTHREAD ) - -#include <stdio.h> -#include <stdlib.h> -#include <iostream> -#include <sstream> -#include <utility> -#include <Kokkos_Qthread.hpp> -#include <Kokkos_Atomic.hpp> -#include <impl/Kokkos_Error.hpp> - -// Defines to enable experimental Qthread functionality - -#define QTHREAD_LOCAL_PRIORITY -#define CLONED_TASKS - -#include <qthread/qthread.h> - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { -namespace { - -enum { MAXIMUM_QTHREAD_WORKERS = 1024 }; - -/** s_exec is indexed by the reverse rank of the workers - * for faster fan-in / fan-out lookups - * [ n - 1 , n - 2 , ... , 0 ] - */ -QthreadExec * s_exec[ MAXIMUM_QTHREAD_WORKERS ]; - -int s_number_shepherds = 0 ; -int s_number_workers_per_shepherd = 0 ; -int s_number_workers = 0 ; - -inline -QthreadExec ** worker_exec() -{ - return s_exec + s_number_workers - ( qthread_shep() * s_number_workers_per_shepherd + qthread_worker_local(NULL) + 1 ); -} - -const int s_base_size = QthreadExec::align_alloc( sizeof(QthreadExec) ); - -int s_worker_reduce_end = 0 ; /* End of worker reduction memory */ -int s_worker_shared_end = 0 ; /* Total of worker scratch memory */ -int s_worker_shared_begin = 0 ; /* Beginning of worker shared memory */ - -QthreadExecFunctionPointer volatile s_active_function = 0 ; -const void * volatile s_active_function_arg = 0 ; - -} /* namespace */ -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- - -namespace Kokkos { - -int Qthread::is_initialized() -{ - return Impl::s_number_workers != 0 ; -} - -int Qthread::concurrency() -{ - return Impl::s_number_workers_per_shepherd ; -} - -int Qthread::in_parallel() -{ - return Impl::s_active_function != 0 ; -} - -void Qthread::initialize( int thread_count ) -{ - // Environment variable: QTHREAD_NUM_SHEPHERDS - // Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP - // Environment variable: QTHREAD_HWPAR - - { - char buffer[256]; - snprintf(buffer,sizeof(buffer),"QTHREAD_HWPAR=%d",thread_count); - putenv(buffer); - } - - const bool ok_init = ( QTHREAD_SUCCESS == qthread_initialize() ) && - ( thread_count == qthread_num_shepherds() * qthread_num_workers_local(NO_SHEPHERD) ) && - ( thread_count == qthread_num_workers() ); - - bool ok_symmetry = true ; - - if ( ok_init ) { - Impl::s_number_shepherds = qthread_num_shepherds(); - Impl::s_number_workers_per_shepherd = qthread_num_workers_local(NO_SHEPHERD); - Impl::s_number_workers = Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd ; - - for ( int i = 0 ; ok_symmetry && i < Impl::s_number_shepherds ; ++i ) { - ok_symmetry = ( Impl::s_number_workers_per_shepherd == qthread_num_workers_local(i) ); - } - } - - if ( ! ok_init || ! ok_symmetry ) { - std::ostringstream msg ; - - msg << "Kokkos::Qthread::initialize(" << thread_count << ") FAILED" ; - msg << " : qthread_num_shepherds = " << qthread_num_shepherds(); - msg << " : qthread_num_workers_per_shepherd = " << qthread_num_workers_local(NO_SHEPHERD); - msg << " : qthread_num_workers = " << qthread_num_workers(); - - if ( ! ok_symmetry ) { - msg << " : qthread_num_workers_local = {" ; - for ( int i = 0 ; i < Impl::s_number_shepherds ; ++i ) { - msg << " " << qthread_num_workers_local(i) ; - } - msg << " }" ; - } - - Impl::s_number_workers = 0 ; - Impl::s_number_shepherds = 0 ; - Impl::s_number_workers_per_shepherd = 0 ; - - if ( ok_init ) { qthread_finalize(); } - - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - - Impl::QthreadExec::resize_worker_scratch( 256 , 256 ); - - // Init the array for used for arbitrarily sized atomics - Impl::init_lock_array_host_space(); - -} - -void Qthread::finalize() -{ - Impl::QthreadExec::clear_workers(); - - if ( Impl::s_number_workers ) { - qthread_finalize(); - } - - Impl::s_number_workers = 0 ; - Impl::s_number_shepherds = 0 ; - Impl::s_number_workers_per_shepherd = 0 ; -} - -void Qthread::print_configuration( std::ostream & s , const bool detail ) -{ - s << "Kokkos::Qthread {" - << " num_shepherds(" << Impl::s_number_shepherds << ")" - << " num_workers_per_shepherd(" << Impl::s_number_workers_per_shepherd << ")" - << " }" << std::endl ; -} - -Qthread & Qthread::instance( int ) -{ - static Qthread q ; - return q ; -} - -void Qthread::fence() -{ -} - -int Qthread::shepherd_size() const { return Impl::s_number_shepherds ; } -int Qthread::shepherd_worker_size() const { return Impl::s_number_workers_per_shepherd ; } - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { -namespace { - -aligned_t driver_exec_all( void * arg ) -{ - QthreadExec & exec = **worker_exec(); - - (*s_active_function)( exec , s_active_function_arg ); - -/* - fprintf( stdout - , "QthreadExec driver worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n" - , exec.worker_rank() - , exec.worker_size() - , exec.shepherd_rank() - , exec.shepherd_size() - , exec.shepherd_worker_rank() - , exec.shepherd_worker_size() - ); - fflush(stdout); -*/ - - return 0 ; -} - -aligned_t driver_resize_worker_scratch( void * arg ) -{ - static volatile int lock_begin = 0 ; - static volatile int lock_end = 0 ; - - QthreadExec ** const exec = worker_exec(); - - //---------------------------------------- - // Serialize allocation for thread safety - - while ( ! atomic_compare_exchange_strong( & lock_begin , 0 , 1 ) ); // Spin wait to claim lock - - const bool ok = 0 == *exec ; - - if ( ok ) { *exec = (QthreadExec *) malloc( s_base_size + s_worker_shared_end ); } - - lock_begin = 0 ; // release lock - - if ( ok ) { new( *exec ) QthreadExec(); } - - //---------------------------------------- - // Wait for all calls to complete to insure that each worker has executed. - - if ( s_number_workers == 1 + atomic_fetch_add( & lock_end , 1 ) ) { lock_end = 0 ; } - - while ( lock_end ); - -/* - fprintf( stdout - , "QthreadExec resize worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n" - , (**exec).worker_rank() - , (**exec).worker_size() - , (**exec).shepherd_rank() - , (**exec).shepherd_size() - , (**exec).shepherd_worker_rank() - , (**exec).shepherd_worker_size() - ); - fflush(stdout); -*/ - - //---------------------------------------- - - if ( ! ok ) { - fprintf( stderr , "Kokkos::QthreadExec resize failed\n" ); - fflush( stderr ); - } - - return 0 ; -} - -void verify_is_process( const char * const label , bool not_active = false ) -{ - const bool not_process = 0 != qthread_shep() || 0 != qthread_worker_local(NULL); - const bool is_active = not_active && ( s_active_function || s_active_function_arg ); - - if ( not_process || is_active ) { - std::string msg( label ); - msg.append( " : FAILED" ); - if ( not_process ) msg.append(" : not called by main process"); - if ( is_active ) msg.append(" : parallel execution in progress"); - Kokkos::Impl::throw_runtime_exception( msg ); - } -} - -} - -int QthreadExec::worker_per_shepherd() -{ - return s_number_workers_per_shepherd ; -} - -QthreadExec::QthreadExec() -{ - const int shepherd_rank = qthread_shep(); - const int shepherd_worker_rank = qthread_worker_local(NULL); - const int worker_rank = shepherd_rank * s_number_workers_per_shepherd + shepherd_worker_rank ; - - m_worker_base = s_exec ; - m_shepherd_base = s_exec + s_number_workers_per_shepherd * ( ( s_number_shepherds - ( shepherd_rank + 1 ) ) ); - m_scratch_alloc = ( (unsigned char *) this ) + s_base_size ; - m_reduce_end = s_worker_reduce_end ; - m_shepherd_rank = shepherd_rank ; - m_shepherd_size = s_number_shepherds ; - m_shepherd_worker_rank = shepherd_worker_rank ; - m_shepherd_worker_size = s_number_workers_per_shepherd ; - m_worker_rank = worker_rank ; - m_worker_size = s_number_workers ; - m_worker_state = QthreadExec::Active ; -} - -void QthreadExec::clear_workers() -{ - for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) { - QthreadExec * const exec = s_exec[iwork] ; - s_exec[iwork] = 0 ; - free( exec ); - } -} - -void QthreadExec::shared_reset( Qthread::scratch_memory_space & space ) -{ - new( & space ) - Qthread::scratch_memory_space( - ((unsigned char *) (**m_shepherd_base).m_scratch_alloc ) + s_worker_shared_begin , - s_worker_shared_end - s_worker_shared_begin - ); -} - -void QthreadExec::resize_worker_scratch( const int reduce_size , const int shared_size ) -{ - const int exec_all_reduce_alloc = align_alloc( reduce_size ); - const int shepherd_scan_alloc = align_alloc( 8 ); - const int shepherd_shared_end = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size ); - - if ( s_worker_reduce_end < exec_all_reduce_alloc || - s_worker_shared_end < shepherd_shared_end ) { - -/* - fprintf( stdout , "QthreadExec::resize\n"); - fflush(stdout); -*/ - - // Clear current worker memory before allocating new worker memory - clear_workers(); - - // Increase the buffers to an aligned allocation - s_worker_reduce_end = exec_all_reduce_alloc ; - s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc ; - s_worker_shared_end = shepherd_shared_end ; - - // Need to query which shepherd this main 'process' is running... - - const int main_shep = qthread_shep(); - - // Have each worker resize its memory for proper first-touch -#if 0 - for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { - for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i ) { - qthread_fork_to( driver_resize_worker_scratch , NULL , NULL , jshep ); - }} -#else - // If this function is used before the 'qthread.task_policy' unit test - // the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so. - for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { - const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ; - - if ( num_clone ) { - const int ret = qthread_fork_clones_to_local_priority - ( driver_resize_worker_scratch /* function */ - , NULL /* function data block */ - , NULL /* pointer to return value feb */ - , jshep /* shepherd number */ - , num_clone - 1 /* number of instances - 1 */ - ); - - assert(ret == QTHREAD_SUCCESS); - } - } -#endif - - driver_resize_worker_scratch( NULL ); - - // Verify all workers allocated - - bool ok = true ; - for ( int iwork = 0 ; ok && iwork < s_number_workers ; ++iwork ) { ok = 0 != s_exec[iwork] ; } - - if ( ! ok ) { - std::ostringstream msg ; - msg << "Kokkos::Impl::QthreadExec::resize : FAILED for workers {" ; - for ( int iwork = 0 ; iwork < s_number_workers ; ++iwork ) { - if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); } - } - msg << " }" ; - Kokkos::Impl::throw_runtime_exception( msg.str() ); - } - } -} - -void QthreadExec::exec_all( Qthread & , QthreadExecFunctionPointer func , const void * arg ) -{ - verify_is_process("QthreadExec::exec_all(...)",true); - -/* - fprintf( stdout , "QthreadExec::exec_all\n"); - fflush(stdout); -*/ - - s_active_function = func ; - s_active_function_arg = arg ; - - // Need to query which shepherd this main 'process' is running... - - const int main_shep = qthread_shep(); - -#if 0 - for ( int jshep = 0 , iwork = 0 ; jshep < s_number_shepherds ; ++jshep ) { - for ( int i = jshep != main_shep ? 0 : 1 ; i < s_number_workers_per_shepherd ; ++i , ++iwork ) { - qthread_fork_to( driver_exec_all , NULL , NULL , jshep ); - }} -#else - // If this function is used before the 'qthread.task_policy' unit test - // the 'qthread.task_policy' unit test fails with a seg-fault within libqthread.so. - for ( int jshep = 0 ; jshep < s_number_shepherds ; ++jshep ) { - const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1 ; - - if ( num_clone ) { - const int ret = qthread_fork_clones_to_local_priority - ( driver_exec_all /* function */ - , NULL /* function data block */ - , NULL /* pointer to return value feb */ - , jshep /* shepherd number */ - , num_clone - 1 /* number of instances - 1 */ - ); - - assert(ret == QTHREAD_SUCCESS); - } - } -#endif - - driver_exec_all( NULL ); - - s_active_function = 0 ; - s_active_function_arg = 0 ; -} - -void * QthreadExec::exec_all_reduce_result() -{ - return s_exec[0]->m_scratch_alloc ; -} - -} /* namespace Impl */ -} /* namespace Kokkos */ - -namespace Kokkos { -namespace Impl { - -QthreadTeamPolicyMember::QthreadTeamPolicyMember() - : m_exec( **worker_exec() ) - , m_team_shared(0,0) - , m_team_size( 1 ) - , m_team_rank( 0 ) - , m_league_size(1) - , m_league_end(1) - , m_league_rank(0) -{ - m_exec.shared_reset( m_team_shared ); -} - -QthreadTeamPolicyMember::QthreadTeamPolicyMember( const QthreadTeamPolicyMember::TaskTeam & ) - : m_exec( **worker_exec() ) - , m_team_shared(0,0) - , m_team_size( s_number_workers_per_shepherd ) - , m_team_rank( m_exec.shepherd_worker_rank() ) - , m_league_size(1) - , m_league_end(1) - , m_league_rank(0) -{ - m_exec.shared_reset( m_team_shared ); -} - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- - -#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */ - diff --git a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp b/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp deleted file mode 100644 index f948eb2903b631e82727e670e84339383d5891c9..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp +++ /dev/null @@ -1,620 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#ifndef KOKKOS_QTHREADEXEC_HPP -#define KOKKOS_QTHREADEXEC_HPP - -#include <impl/Kokkos_spinwait.hpp> - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -//---------------------------------------------------------------------------- - -class QthreadExec ; - -typedef void (*QthreadExecFunctionPointer)( QthreadExec & , const void * ); - -class QthreadExec { -private: - - enum { Inactive = 0 , Active = 1 }; - - const QthreadExec * const * m_worker_base ; - const QthreadExec * const * m_shepherd_base ; - - void * m_scratch_alloc ; ///< Scratch memory [ reduce , team , shared ] - int m_reduce_end ; ///< End of scratch reduction memory - - int m_shepherd_rank ; - int m_shepherd_size ; - - int m_shepherd_worker_rank ; - int m_shepherd_worker_size ; - - /* - * m_worker_rank = m_shepherd_rank * m_shepherd_worker_size + m_shepherd_worker_rank - * m_worker_size = m_shepherd_size * m_shepherd_worker_size - */ - int m_worker_rank ; - int m_worker_size ; - - int mutable volatile m_worker_state ; - - - friend class Kokkos::Qthread ; - - ~QthreadExec(); - QthreadExec( const QthreadExec & ); - QthreadExec & operator = ( const QthreadExec & ); - -public: - - QthreadExec(); - - /** Execute the input function on all available Qthread workers */ - static void exec_all( Qthread & , QthreadExecFunctionPointer , const void * ); - - //---------------------------------------- - /** Barrier across all workers participating in the 'exec_all' */ - void exec_all_barrier() const - { - const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - m_worker_base[j]->m_worker_state = QthreadExec::Active ; - } - } - - /** Barrier across workers within the shepherd with rank < team_rank */ - void shepherd_barrier( const int team_size ) const - { - if ( m_shepherd_worker_rank < team_size ) { - - const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; - } - } - } - - //---------------------------------------- - /** Reduce across all workers participating in the 'exec_all' */ - template< class FunctorType , class ReducerType , class ArgTag > - inline - void exec_all_reduce( const FunctorType & func, const ReducerType & reduce ) const - { - typedef Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType > ReducerConditional; - typedef typename ReducerConditional::type ReducerTypeFwd; - typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, ArgTag > ValueJoin ; - - const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - const QthreadExec & fan = *m_worker_base[j]; - - Impl::spinwait( fan.m_worker_state , QthreadExec::Active ); - - ValueJoin::join( ReducerConditional::select(func , reduce) , m_scratch_alloc , fan.m_scratch_alloc ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - m_worker_base[j]->m_worker_state = QthreadExec::Active ; - } - } - - //---------------------------------------- - /** Scall across all workers participating in the 'exec_all' */ - template< class FunctorType , class ArgTag > - inline - void exec_all_scan( const FunctorType & func ) const - { - typedef Kokkos::Impl::FunctorValueInit< FunctorType , ArgTag > ValueInit ; - typedef Kokkos::Impl::FunctorValueJoin< FunctorType , ArgTag > ValueJoin ; - typedef Kokkos::Impl::FunctorValueOps< FunctorType , ArgTag > ValueOps ; - - const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - Impl::spinwait( m_worker_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - else { - // Root thread scans across values before releasing threads - // Worker data is in reverse order, so m_worker_base[0] is the - // highest ranking thread. - - // Copy from lower ranking to higher ranking worker. - for ( int i = 1 ; i < m_worker_size ; ++i ) { - ValueOps::copy( func - , m_worker_base[i-1]->m_scratch_alloc - , m_worker_base[i]->m_scratch_alloc - ); - } - - ValueInit::init( func , m_worker_base[m_worker_size-1]->m_scratch_alloc ); - - // Join from lower ranking to higher ranking worker. - // Value at m_worker_base[n-1] is zero so skip adding it to m_worker_base[n-2]. - for ( int i = m_worker_size - 1 ; --i > 0 ; ) { - ValueJoin::join( func , m_worker_base[i-1]->m_scratch_alloc , m_worker_base[i]->m_scratch_alloc ); - } - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ) ; n <<= 1 ) { - m_worker_base[j]->m_worker_state = QthreadExec::Active ; - } - } - - //---------------------------------------- - - template< class Type> - inline - volatile Type * shepherd_team_scratch_value() const - { return (volatile Type*)(((unsigned char *) m_scratch_alloc) + m_reduce_end); } - - template< class Type > - inline - void shepherd_broadcast( Type & value , const int team_size , const int team_rank ) const - { - if ( m_shepherd_base ) { - Type * const shared_value = m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); - if ( m_shepherd_worker_rank == team_rank ) { *shared_value = value ; } - memory_fence(); - shepherd_barrier( team_size ); - value = *shared_value ; - } - } - - template< class Type > - inline - Type shepherd_reduce( const int team_size , const Type & value ) const - { - *shepherd_team_scratch_value<Type>() = value ; - - memory_fence(); - - const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - else { - Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); - for ( int i = 1 ; i < n ; ++i ) { - accum += * m_shepherd_base[i]->shepherd_team_scratch_value<Type>(); - } - for ( int i = 1 ; i < n ; ++i ) { - * m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum ; - } - - memory_fence(); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; - } - - return *shepherd_team_scratch_value<Type>(); - } - - template< class JoinOp > - inline - typename JoinOp::value_type - shepherd_reduce( const int team_size - , const typename JoinOp::value_type & value - , const JoinOp & op ) const - { - typedef typename JoinOp::value_type Type ; - - *shepherd_team_scratch_value<Type>() = value ; - - memory_fence(); - - const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - else { - volatile Type & accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); - for ( int i = 1 ; i < team_size ; ++i ) { - op.join( accum , * m_shepherd_base[i]->shepherd_team_scratch_value<Type>() ); - } - for ( int i = 1 ; i < team_size ; ++i ) { - * m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum ; - } - - memory_fence(); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; - } - - return *shepherd_team_scratch_value<Type>(); - } - - template< class Type > - inline - Type shepherd_scan( const int team_size - , const Type & value - , Type * const global_value = 0 ) const - { - *shepherd_team_scratch_value<Type>() = value ; - - memory_fence(); - - const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); - - int n , j ; - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - Impl::spinwait( m_shepherd_base[j]->m_worker_state , QthreadExec::Active ); - } - - if ( rev_rank ) { - m_worker_state = QthreadExec::Inactive ; - Impl::spinwait( m_worker_state , QthreadExec::Inactive ); - } - else { - // Root thread scans across values before releasing threads - // Worker data is in reverse order, so m_shepherd_base[0] is the - // highest ranking thread. - - // Copy from lower ranking to higher ranking worker. - - Type accum = * m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); - for ( int i = 1 ; i < team_size ; ++i ) { - const Type tmp = * m_shepherd_base[i]->shepherd_team_scratch_value<Type>(); - accum += tmp ; - * m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() = tmp ; - } - - * m_shepherd_base[team_size-1]->shepherd_team_scratch_value<Type>() = - global_value ? atomic_fetch_add( global_value , accum ) : 0 ; - - // Join from lower ranking to higher ranking worker. - for ( int i = team_size ; --i ; ) { - * m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() += * m_shepherd_base[i]->shepherd_team_scratch_value<Type>(); - } - - memory_fence(); - } - - for ( n = 1 ; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ) ; n <<= 1 ) { - m_shepherd_base[j]->m_worker_state = QthreadExec::Active ; - } - - return *shepherd_team_scratch_value<Type>(); - } - - //---------------------------------------- - - static inline - int align_alloc( int size ) - { - enum { ALLOC_GRAIN = 1 << 6 /* power of two, 64bytes */}; - enum { ALLOC_GRAIN_MASK = ALLOC_GRAIN - 1 }; - return ( size + ALLOC_GRAIN_MASK ) & ~ALLOC_GRAIN_MASK ; - } - - void shared_reset( Qthread::scratch_memory_space & ); - - void * exec_all_reduce_value() const { return m_scratch_alloc ; } - - static void * exec_all_reduce_result(); - - static void resize_worker_scratch( const int reduce_size , const int shared_size ); - static void clear_workers(); - - //---------------------------------------- - - inline int worker_rank() const { return m_worker_rank ; } - inline int worker_size() const { return m_worker_size ; } - inline int shepherd_worker_rank() const { return m_shepherd_worker_rank ; } - inline int shepherd_worker_size() const { return m_shepherd_worker_size ; } - inline int shepherd_rank() const { return m_shepherd_rank ; } - inline int shepherd_size() const { return m_shepherd_size ; } - - static int worker_per_shepherd(); -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -class QthreadTeamPolicyMember { -private: - - typedef Kokkos::Qthread execution_space ; - typedef execution_space::scratch_memory_space scratch_memory_space ; - - - Impl::QthreadExec & m_exec ; - scratch_memory_space m_team_shared ; - const int m_team_size ; - const int m_team_rank ; - const int m_league_size ; - const int m_league_end ; - int m_league_rank ; - -public: - - KOKKOS_INLINE_FUNCTION - const scratch_memory_space & team_shmem() const { return m_team_shared ; } - - KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank ; } - KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size ; } - KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank ; } - KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size ; } - - KOKKOS_INLINE_FUNCTION void team_barrier() const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - {} -#else - { m_exec.shepherd_barrier( m_team_size ); } -#endif - - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_broadcast( const Type & value , int rank ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return Type(); } -#else - { return m_exec.template shepherd_broadcast<Type>( value , m_team_size , rank ); } -#endif - - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return Type(); } -#else - { return m_exec.template shepherd_reduce<Type>( m_team_size , value ); } -#endif - - template< typename JoinOp > - KOKKOS_INLINE_FUNCTION typename JoinOp::value_type - team_reduce( const typename JoinOp::value_type & value - , const JoinOp & op ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return typename JoinOp::value_type(); } -#else - { return m_exec.template shepherd_reduce<JoinOp>( m_team_size , value , op ); } -#endif - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering. - * - * The highest rank thread can compute the reduction total as - * reduction_total = dev.team_scan( value ) + value ; - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return Type(); } -#else - { return m_exec.template shepherd_scan<Type>( m_team_size , value ); } -#endif - - /** \brief Intra-team exclusive prefix sum with team_rank() ordering - * with intra-team non-deterministic ordering accumulation. - * - * The global inter-team accumulation value will, at the end of the - * league's parallel execution, be the scan's total. - * Parallel execution ordering of the league's teams is non-deterministic. - * As such the base value for each team's scan operation is similarly - * non-deterministic. - */ - template< typename Type > - KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value , Type * const global_accum ) const -#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - { return Type(); } -#else - { return m_exec.template shepherd_scan<Type>( m_team_size , value , global_accum ); } -#endif - - //---------------------------------------- - // Private driver for task-team parallel - - struct TaskTeam {}; - - QthreadTeamPolicyMember(); - explicit QthreadTeamPolicyMember( const TaskTeam & ); - - //---------------------------------------- - // Private for the driver ( for ( member_type i(exec,team); i ; i.next_team() ) { ... } - - // Initialize - template< class ... Properties > - QthreadTeamPolicyMember( Impl::QthreadExec & exec - , const Kokkos::Impl::TeamPolicyInternal<Qthread,Properties...> & team ) - : m_exec( exec ) - , m_team_shared(0,0) - , m_team_size( team.m_team_size ) - , m_team_rank( exec.shepherd_worker_rank() ) - , m_league_size( team.m_league_size ) - , m_league_end( team.m_league_size - team.m_shepherd_iter * ( exec.shepherd_size() - ( exec.shepherd_rank() + 1 ) ) ) - , m_league_rank( m_league_end > team.m_shepherd_iter ? m_league_end - team.m_shepherd_iter : 0 ) - { - m_exec.shared_reset( m_team_shared ); - } - - // Continue - operator bool () const { return m_league_rank < m_league_end ; } - - // iterate - void next_team() { ++m_league_rank ; m_exec.shared_reset( m_team_shared ); } -}; - - -template< class ... Properties > -class TeamPolicyInternal< Kokkos::Qthread , Properties ... > - : public PolicyTraits< Properties... > -{ -private: - - const int m_league_size ; - const int m_team_size ; - const int m_shepherd_iter ; - -public: - - //! Tag this class as a kokkos execution policy - typedef TeamPolicyInternal execution_policy ; - typedef Qthread execution_space ; - typedef PolicyTraits< Properties ... > traits ; - - //---------------------------------------- - - template< class FunctorType > - inline static - int team_size_max( const FunctorType & ) - { return Qthread::instance().shepherd_worker_size(); } - - template< class FunctorType > - static int team_size_recommended( const FunctorType & f ) - { return team_size_max( f ); } - - template< class FunctorType > - inline static - int team_size_recommended( const FunctorType & f , const int& ) - { return team_size_max( f ); } - - //---------------------------------------- - - inline int team_size() const { return m_team_size ; } - inline int league_size() const { return m_league_size ; } - - // One active team per shepherd - TeamPolicyInternal( Kokkos::Qthread & q - , const int league_size - , const int team_size - , const int /* vector_length */ = 0 - ) - : m_league_size( league_size ) - , m_team_size( team_size < q.shepherd_worker_size() - ? team_size : q.shepherd_worker_size() ) - , m_shepherd_iter( ( league_size + q.shepherd_size() - 1 ) / q.shepherd_size() ) - { - } - - // One active team per shepherd - TeamPolicyInternal( const int league_size - , const int team_size - , const int /* vector_length */ = 0 - ) - : m_league_size( league_size ) - , m_team_size( team_size < Qthread::instance().shepherd_worker_size() - ? team_size : Qthread::instance().shepherd_worker_size() ) - , m_shepherd_iter( ( league_size + Qthread::instance().shepherd_size() - 1 ) / Qthread::instance().shepherd_size() ) - { - } - - typedef Impl::QthreadTeamPolicyMember member_type ; - - friend class Impl::QthreadTeamPolicyMember ; -}; - -} /* namespace Impl */ -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -#endif /* #define KOKKOS_QTHREADEXEC_HPP */ - diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.cpp b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1b92494084c10763ad60ba458888204bd2bd77a3 --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.cpp @@ -0,0 +1,519 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core_fwd.hpp> + +#if defined( KOKKOS_ENABLE_QTHREADS ) + +#include <stdio.h> +#include <stdlib.h> +#include <iostream> +#include <sstream> +#include <utility> + +#include <Kokkos_Qthreads.hpp> +#include <Kokkos_Atomic.hpp> +#include <impl/Kokkos_Error.hpp> + +// Defines to enable experimental Qthreads functionality. +//#define QTHREAD_LOCAL_PRIORITY +//#define CLONED_TASKS + +//#include <qthread.h> + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + +namespace { + +enum { MAXIMUM_QTHREADS_WORKERS = 1024 }; + +/** s_exec is indexed by the reverse rank of the workers + * for faster fan-in / fan-out lookups + * [ n - 1, n - 2, ..., 0 ] + */ +QthreadsExec * s_exec[ MAXIMUM_QTHREADS_WORKERS ]; + +int s_number_shepherds = 0; +int s_number_workers_per_shepherd = 0; +int s_number_workers = 0; + +inline +QthreadsExec ** worker_exec() +{ + return s_exec + s_number_workers - ( qthread_shep() * s_number_workers_per_shepherd + qthread_worker_local( NULL ) + 1 ); +} + +const int s_base_size = QthreadsExec::align_alloc( sizeof(QthreadsExec) ); + +int s_worker_reduce_end = 0; // End of worker reduction memory. +int s_worker_shared_end = 0; // Total of worker scratch memory. +int s_worker_shared_begin = 0; // Beginning of worker shared memory. + +QthreadsExecFunctionPointer volatile s_active_function = 0; +const void * volatile s_active_function_arg = 0; + +} // namespace + +} // namespace Impl + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +int Qthreads::is_initialized() +{ + return Impl::s_number_workers != 0; +} + +int Qthreads::concurrency() +{ + return Impl::s_number_workers_per_shepherd; +} + +int Qthreads::in_parallel() +{ + return Impl::s_active_function != 0; +} + +void Qthreads::initialize( int thread_count ) +{ + // Environment variable: QTHREAD_NUM_SHEPHERDS + // Environment variable: QTHREAD_NUM_WORKERS_PER_SHEP + // Environment variable: QTHREAD_HWPAR + + { + char buffer[256]; + snprintf( buffer, sizeof(buffer), "QTHREAD_HWPAR=%d", thread_count ); + putenv( buffer ); + } + + const bool ok_init = ( QTHREAD_SUCCESS == qthread_initialize() ) && + ( thread_count == qthread_num_shepherds() * qthread_num_workers_local( NO_SHEPHERD ) ) && + ( thread_count == qthread_num_workers() ); + + bool ok_symmetry = true; + + if ( ok_init ) { + Impl::s_number_shepherds = qthread_num_shepherds(); + Impl::s_number_workers_per_shepherd = qthread_num_workers_local( NO_SHEPHERD ); + Impl::s_number_workers = Impl::s_number_shepherds * Impl::s_number_workers_per_shepherd; + + for ( int i = 0; ok_symmetry && i < Impl::s_number_shepherds; ++i ) { + ok_symmetry = ( Impl::s_number_workers_per_shepherd == qthread_num_workers_local( i ) ); + } + } + + if ( ! ok_init || ! ok_symmetry ) { + std::ostringstream msg; + + msg << "Kokkos::Qthreads::initialize(" << thread_count << ") FAILED"; + msg << " : qthread_num_shepherds = " << qthread_num_shepherds(); + msg << " : qthread_num_workers_per_shepherd = " << qthread_num_workers_local( NO_SHEPHERD ); + msg << " : qthread_num_workers = " << qthread_num_workers(); + + if ( ! ok_symmetry ) { + msg << " : qthread_num_workers_local = {"; + for ( int i = 0; i < Impl::s_number_shepherds; ++i ) { + msg << " " << qthread_num_workers_local( i ); + } + msg << " }"; + } + + Impl::s_number_workers = 0; + Impl::s_number_shepherds = 0; + Impl::s_number_workers_per_shepherd = 0; + + if ( ok_init ) { qthread_finalize(); } + + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + + Impl::QthreadsExec::resize_worker_scratch( 256, 256 ); + + // Init the array for used for arbitrarily sized atomics. + Impl::init_lock_array_host_space(); + +} + +void Qthreads::finalize() +{ + Impl::QthreadsExec::clear_workers(); + + if ( Impl::s_number_workers ) { + qthread_finalize(); + } + + Impl::s_number_workers = 0; + Impl::s_number_shepherds = 0; + Impl::s_number_workers_per_shepherd = 0; +} + +void Qthreads::print_configuration( std::ostream & s, const bool detail ) +{ + s << "Kokkos::Qthreads {" + << " num_shepherds(" << Impl::s_number_shepherds << ")" + << " num_workers_per_shepherd(" << Impl::s_number_workers_per_shepherd << ")" + << " }" << std::endl; +} + +Qthreads & Qthreads::instance( int ) +{ + static Qthreads q; + return q; +} + +void Qthreads::fence() +{ +} + +int Qthreads::shepherd_size() const { return Impl::s_number_shepherds; } +int Qthreads::shepherd_worker_size() const { return Impl::s_number_workers_per_shepherd; } + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + +namespace { + +aligned_t driver_exec_all( void * arg ) +{ + QthreadsExec & exec = **worker_exec(); + + (*s_active_function)( exec, s_active_function_arg ); + +/* + fprintf( stdout + , "QthreadsExec driver worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n" + , exec.worker_rank() + , exec.worker_size() + , exec.shepherd_rank() + , exec.shepherd_size() + , exec.shepherd_worker_rank() + , exec.shepherd_worker_size() + ); + fflush(stdout); +*/ + + return 0; +} + +aligned_t driver_resize_worker_scratch( void * arg ) +{ + static volatile int lock_begin = 0; + static volatile int lock_end = 0; + + QthreadsExec ** const exec = worker_exec(); + + //---------------------------------------- + // Serialize allocation for thread safety. + + while ( ! atomic_compare_exchange_strong( & lock_begin, 0, 1 ) ); // Spin wait to claim lock. + + const bool ok = 0 == *exec; + + if ( ok ) { *exec = (QthreadsExec *) malloc( s_base_size + s_worker_shared_end ); } + + lock_begin = 0; // Release lock. + + if ( ok ) { new( *exec ) QthreadsExec(); } + + //---------------------------------------- + // Wait for all calls to complete to insure that each worker has executed. + + if ( s_number_workers == 1 + atomic_fetch_add( & lock_end, 1 ) ) { lock_end = 0; } + + while ( lock_end ); + +/* + fprintf( stdout + , "QthreadsExec resize worker(%d:%d) shepherd(%d:%d) shepherd_worker(%d:%d) done\n" + , (**exec).worker_rank() + , (**exec).worker_size() + , (**exec).shepherd_rank() + , (**exec).shepherd_size() + , (**exec).shepherd_worker_rank() + , (**exec).shepherd_worker_size() + ); + fflush(stdout); +*/ + + //---------------------------------------- + + if ( ! ok ) { + fprintf( stderr, "Kokkos::QthreadsExec resize failed\n" ); + fflush( stderr ); + } + + return 0; +} + +void verify_is_process( const char * const label, bool not_active = false ) +{ + const bool not_process = 0 != qthread_shep() || 0 != qthread_worker_local( NULL ); + const bool is_active = not_active && ( s_active_function || s_active_function_arg ); + + if ( not_process || is_active ) { + std::string msg( label ); + msg.append( " : FAILED" ); + if ( not_process ) msg.append(" : not called by main process"); + if ( is_active ) msg.append(" : parallel execution in progress"); + Kokkos::Impl::throw_runtime_exception( msg ); + } +} + +} // namespace + +int QthreadsExec::worker_per_shepherd() +{ + return s_number_workers_per_shepherd; +} + +QthreadsExec::QthreadsExec() +{ + const int shepherd_rank = qthread_shep(); + const int shepherd_worker_rank = qthread_worker_local( NULL ); + const int worker_rank = shepherd_rank * s_number_workers_per_shepherd + shepherd_worker_rank; + + m_worker_base = s_exec; + m_shepherd_base = s_exec + s_number_workers_per_shepherd * ( ( s_number_shepherds - ( shepherd_rank + 1 ) ) ); + m_scratch_alloc = ( (unsigned char *) this ) + s_base_size; + m_reduce_end = s_worker_reduce_end; + m_shepherd_rank = shepherd_rank; + m_shepherd_size = s_number_shepherds; + m_shepherd_worker_rank = shepherd_worker_rank; + m_shepherd_worker_size = s_number_workers_per_shepherd; + m_worker_rank = worker_rank; + m_worker_size = s_number_workers; + m_worker_state = QthreadsExec::Active; +} + +void QthreadsExec::clear_workers() +{ + for ( int iwork = 0; iwork < s_number_workers; ++iwork ) { + QthreadsExec * const exec = s_exec[iwork]; + s_exec[iwork] = 0; + free( exec ); + } +} + +void QthreadsExec::shared_reset( Qthreads::scratch_memory_space & space ) +{ + new( & space ) + Qthreads::scratch_memory_space( + ((unsigned char *) (**m_shepherd_base).m_scratch_alloc ) + s_worker_shared_begin, + s_worker_shared_end - s_worker_shared_begin + ); +} + +void QthreadsExec::resize_worker_scratch( const int reduce_size, const int shared_size ) +{ + const int exec_all_reduce_alloc = align_alloc( reduce_size ); + const int shepherd_scan_alloc = align_alloc( 8 ); + const int shepherd_shared_end = exec_all_reduce_alloc + shepherd_scan_alloc + align_alloc( shared_size ); + + if ( s_worker_reduce_end < exec_all_reduce_alloc || + s_worker_shared_end < shepherd_shared_end ) { + +/* + fprintf( stdout, "QthreadsExec::resize\n"); + fflush(stdout); +*/ + + // Clear current worker memory before allocating new worker memory. + clear_workers(); + + // Increase the buffers to an aligned allocation. + s_worker_reduce_end = exec_all_reduce_alloc; + s_worker_shared_begin = exec_all_reduce_alloc + shepherd_scan_alloc; + s_worker_shared_end = shepherd_shared_end; + + // Need to query which shepherd this main 'process' is running. + + const int main_shep = qthread_shep(); + + // Have each worker resize its memory for proper first-touch. +#if 0 + for ( int jshep = 0; jshep < s_number_shepherds; ++jshep ) { + for ( int i = jshep != main_shep ? 0 : 1; i < s_number_workers_per_shepherd; ++i ) { + qthread_fork_to( driver_resize_worker_scratch, NULL, NULL, jshep ); + } + } +#else + // If this function is used before the 'qthreads.task_policy' unit test, + // the 'qthreads.task_policy' unit test fails with a seg-fault within libqthread.so. + for ( int jshep = 0; jshep < s_number_shepherds; ++jshep ) { + const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1; + + if ( num_clone ) { + const int ret = qthread_fork_clones_to_local_priority + ( driver_resize_worker_scratch // Function + , NULL // Function data block + , NULL // Pointer to return value feb + , jshep // Shepherd number + , num_clone - 1 // Number of instances - 1 + ); + + assert( ret == QTHREAD_SUCCESS ); + } + } +#endif + + driver_resize_worker_scratch( NULL ); + + // Verify all workers allocated. + + bool ok = true; + for ( int iwork = 0; ok && iwork < s_number_workers; ++iwork ) { ok = 0 != s_exec[iwork]; } + + if ( ! ok ) { + std::ostringstream msg; + msg << "Kokkos::Impl::QthreadsExec::resize : FAILED for workers {"; + for ( int iwork = 0; iwork < s_number_workers; ++iwork ) { + if ( 0 == s_exec[iwork] ) { msg << " " << ( s_number_workers - ( iwork + 1 ) ); } + } + msg << " }"; + Kokkos::Impl::throw_runtime_exception( msg.str() ); + } + } +} + +void QthreadsExec::exec_all( Qthreads &, QthreadsExecFunctionPointer func, const void * arg ) +{ + verify_is_process("QthreadsExec::exec_all(...)",true); + +/* + fprintf( stdout, "QthreadsExec::exec_all\n"); + fflush(stdout); +*/ + + s_active_function = func; + s_active_function_arg = arg; + + // Need to query which shepherd this main 'process' is running. + + const int main_shep = qthread_shep(); + +#if 0 + for ( int jshep = 0, iwork = 0; jshep < s_number_shepherds; ++jshep ) { + for ( int i = jshep != main_shep ? 0 : 1; i < s_number_workers_per_shepherd; ++i, ++iwork ) { + qthread_fork_to( driver_exec_all, NULL, NULL, jshep ); + } + } +#else + // If this function is used before the 'qthreads.task_policy' unit test, + // the 'qthreads.task_policy' unit test fails with a seg-fault within libqthread.so. + for ( int jshep = 0; jshep < s_number_shepherds; ++jshep ) { + const int num_clone = jshep != main_shep ? s_number_workers_per_shepherd : s_number_workers_per_shepherd - 1; + + if ( num_clone ) { + const int ret = qthread_fork_clones_to_local_priority + ( driver_exec_all // Function + , NULL // Function data block + , NULL // Pointer to return value feb + , jshep // Shepherd number + , num_clone - 1 // Number of instances - 1 + ); + + assert(ret == QTHREAD_SUCCESS); + } + } +#endif + + driver_exec_all( NULL ); + + s_active_function = 0; + s_active_function_arg = 0; +} + +void * QthreadsExec::exec_all_reduce_result() +{ + return s_exec[0]->m_scratch_alloc; +} + +} // namespace Impl + +} // namespace Kokkos + +namespace Kokkos { + +namespace Impl { + +QthreadsTeamPolicyMember::QthreadsTeamPolicyMember() + : m_exec( **worker_exec() ) + , m_team_shared( 0, 0 ) + , m_team_size( 1 ) + , m_team_rank( 0 ) + , m_league_size( 1 ) + , m_league_end( 1 ) + , m_league_rank( 0 ) +{ + m_exec.shared_reset( m_team_shared ); +} + +QthreadsTeamPolicyMember::QthreadsTeamPolicyMember( const QthreadsTeamPolicyMember::TaskTeam & ) + : m_exec( **worker_exec() ) + , m_team_shared( 0, 0 ) + , m_team_size( s_number_workers_per_shepherd ) + , m_team_rank( m_exec.shepherd_worker_rank() ) + , m_league_size( 1 ) + , m_league_end( 1 ) + , m_league_rank( 0 ) +{ + m_exec.shared_reset( m_team_shared ); +} + +} // namespace Impl + +} // namespace Kokkos + +#endif // #if defined( KOKKOS_ENABLE_QTHREADS ) diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.hpp new file mode 100644 index 0000000000000000000000000000000000000000..64856eb99e014272fd92f638e2d7f312d3039120 --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_QthreadsExec.hpp @@ -0,0 +1,640 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_QTHREADSEXEC_HPP +#define KOKKOS_QTHREADSEXEC_HPP + +#include <impl/Kokkos_spinwait.hpp> + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + +class QthreadsExec; + +typedef void (*QthreadsExecFunctionPointer)( QthreadsExec &, const void * ); + +class QthreadsExec { +private: + enum { Inactive = 0, Active = 1 }; + + const QthreadsExec * const * m_worker_base; + const QthreadsExec * const * m_shepherd_base; + + void * m_scratch_alloc; ///< Scratch memory [ reduce, team, shared ] + int m_reduce_end; ///< End of scratch reduction memory + + int m_shepherd_rank; + int m_shepherd_size; + + int m_shepherd_worker_rank; + int m_shepherd_worker_size; + + /* + * m_worker_rank = m_shepherd_rank * m_shepherd_worker_size + m_shepherd_worker_rank + * m_worker_size = m_shepherd_size * m_shepherd_worker_size + */ + int m_worker_rank; + int m_worker_size; + + int mutable volatile m_worker_state; + + friend class Kokkos::Qthreads; + + ~QthreadsExec(); + QthreadsExec( const QthreadsExec & ); + QthreadsExec & operator = ( const QthreadsExec & ); + +public: + QthreadsExec(); + + /** Execute the input function on all available Qthreads workers. */ + static void exec_all( Qthreads &, QthreadsExecFunctionPointer, const void * ); + + /** Barrier across all workers participating in the 'exec_all'. */ + void exec_all_barrier() const + { + const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_worker_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + m_worker_base[j]->m_worker_state = QthreadsExec::Active; + } + } + + /** Barrier across workers within the shepherd with rank < team_rank. */ + void shepherd_barrier( const int team_size ) const + { + if ( m_shepherd_worker_rank < team_size ) { + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_shepherd_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; + } + } + } + + /** Reduce across all workers participating in the 'exec_all'. */ + template< class FunctorType, class ReducerType, class ArgTag > + inline + void exec_all_reduce( const FunctorType & func, const ReducerType & reduce ) const + { + typedef Kokkos::Impl::if_c< std::is_same<InvalidType, ReducerType>::value, FunctorType, ReducerType > ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, ArgTag > ValueJoin; + + const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + const QthreadsExec & fan = *m_worker_base[j]; + + Impl::spinwait_while_equal( fan.m_worker_state, QthreadsExec::Active ); + + ValueJoin::join( ReducerConditional::select( func, reduce ), m_scratch_alloc, fan.m_scratch_alloc ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + m_worker_base[j]->m_worker_state = QthreadsExec::Active; + } + } + + /** Scan across all workers participating in the 'exec_all'. */ + template< class FunctorType, class ArgTag > + inline + void exec_all_scan( const FunctorType & func ) const + { + typedef Kokkos::Impl::FunctorValueInit< FunctorType, ArgTag > ValueInit; + typedef Kokkos::Impl::FunctorValueJoin< FunctorType, ArgTag > ValueJoin; + typedef Kokkos::Impl::FunctorValueOps< FunctorType, ArgTag > ValueOps; + + const int rev_rank = m_worker_size - ( m_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_worker_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + else { + // Root thread scans across values before releasing threads. + // Worker data is in reverse order, so m_worker_base[0] is the + // highest ranking thread. + + // Copy from lower ranking to higher ranking worker. + for ( int i = 1; i < m_worker_size; ++i ) { + ValueOps::copy( func + , m_worker_base[i-1]->m_scratch_alloc + , m_worker_base[i]->m_scratch_alloc + ); + } + + ValueInit::init( func, m_worker_base[m_worker_size-1]->m_scratch_alloc ); + + // Join from lower ranking to higher ranking worker. + // Value at m_worker_base[n-1] is zero so skip adding it to m_worker_base[n-2]. + for ( int i = m_worker_size - 1; --i > 0; ) { + ValueJoin::join( func, m_worker_base[i-1]->m_scratch_alloc, m_worker_base[i]->m_scratch_alloc ); + } + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < m_worker_size ); n <<= 1 ) { + m_worker_base[j]->m_worker_state = QthreadsExec::Active; + } + } + + //---------------------------------------- + + template< class Type > + inline + volatile Type * shepherd_team_scratch_value() const + { return (volatile Type*)( ( (unsigned char *) m_scratch_alloc ) + m_reduce_end ); } + + template< class Type > + inline + void shepherd_broadcast( Type & value, const int team_size, const int team_rank ) const + { + if ( m_shepherd_base ) { + Type * const shared_value = m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); + if ( m_shepherd_worker_rank == team_rank ) { *shared_value = value; } + memory_fence(); + shepherd_barrier( team_size ); + value = *shared_value; + } + } + + template< class Type > + inline + Type shepherd_reduce( const int team_size, const Type & value ) const + { + volatile Type * const shared_value = shepherd_team_scratch_value<Type>(); + *shared_value = value; +// *shepherd_team_scratch_value<Type>() = value; + + memory_fence(); + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_shepherd_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + else { + Type & accum = *m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); + for ( int i = 1; i < n; ++i ) { + accum += *m_shepherd_base[i]->shepherd_team_scratch_value<Type>(); + } + for ( int i = 1; i < n; ++i ) { + *m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum; + } + + memory_fence(); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; + } + + return *shepherd_team_scratch_value<Type>(); + } + + template< class JoinOp > + inline + typename JoinOp::value_type + shepherd_reduce( const int team_size + , const typename JoinOp::value_type & value + , const JoinOp & op ) const + { + typedef typename JoinOp::value_type Type; + + volatile Type * const shared_value = shepherd_team_scratch_value<Type>(); + *shared_value = value; +// *shepherd_team_scratch_value<Type>() = value; + + memory_fence(); + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_shepherd_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + else { + volatile Type & accum = *m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); + for ( int i = 1; i < team_size; ++i ) { + op.join( accum, *m_shepherd_base[i]->shepherd_team_scratch_value<Type>() ); + } + for ( int i = 1; i < team_size; ++i ) { + *m_shepherd_base[i]->shepherd_team_scratch_value<Type>() = accum; + } + + memory_fence(); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; + } + + return *shepherd_team_scratch_value<Type>(); + } + + template< class Type > + inline + Type shepherd_scan( const int team_size + , const Type & value + , Type * const global_value = 0 ) const + { + *shepherd_team_scratch_value<Type>() = value; + + memory_fence(); + + const int rev_rank = team_size - ( m_shepherd_worker_rank + 1 ); + + int n, j; + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + Impl::spinwait_while_equal( m_shepherd_base[j]->m_worker_state, QthreadsExec::Active ); + } + + if ( rev_rank ) { + m_worker_state = QthreadsExec::Inactive; + Impl::spinwait_while_equal( m_worker_state, QthreadsExec::Inactive ); + } + else { + // Root thread scans across values before releasing threads. + // Worker data is in reverse order, so m_shepherd_base[0] is the + // highest ranking thread. + + // Copy from lower ranking to higher ranking worker. + + Type accum = *m_shepherd_base[0]->shepherd_team_scratch_value<Type>(); + for ( int i = 1; i < team_size; ++i ) { + const Type tmp = *m_shepherd_base[i]->shepherd_team_scratch_value<Type>(); + accum += tmp; + *m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() = tmp; + } + + *m_shepherd_base[team_size-1]->shepherd_team_scratch_value<Type>() = + global_value ? atomic_fetch_add( global_value, accum ) : 0; + + // Join from lower ranking to higher ranking worker. + for ( int i = team_size; --i; ) { + *m_shepherd_base[i-1]->shepherd_team_scratch_value<Type>() += *m_shepherd_base[i]->shepherd_team_scratch_value<Type>(); + } + + memory_fence(); + } + + for ( n = 1; ( ! ( rev_rank & n ) ) && ( ( j = rev_rank + n ) < team_size ); n <<= 1 ) { + m_shepherd_base[j]->m_worker_state = QthreadsExec::Active; + } + + return *shepherd_team_scratch_value<Type>(); + } + + //---------------------------------------- + + static inline + int align_alloc( int size ) + { + enum { ALLOC_GRAIN = 1 << 6 /* power of two, 64bytes */ }; + enum { ALLOC_GRAIN_MASK = ALLOC_GRAIN - 1 }; + return ( size + ALLOC_GRAIN_MASK ) & ~ALLOC_GRAIN_MASK; + } + + void shared_reset( Qthreads::scratch_memory_space & ); + + void * exec_all_reduce_value() const { return m_scratch_alloc; } + + static void * exec_all_reduce_result(); + + static void resize_worker_scratch( const int reduce_size, const int shared_size ); + static void clear_workers(); + + //---------------------------------------- + + inline int worker_rank() const { return m_worker_rank; } + inline int worker_size() const { return m_worker_size; } + inline int shepherd_worker_rank() const { return m_shepherd_worker_rank; } + inline int shepherd_worker_size() const { return m_shepherd_worker_size; } + inline int shepherd_rank() const { return m_shepherd_rank; } + inline int shepherd_size() const { return m_shepherd_size; } + + static int worker_per_shepherd(); +}; + +} // namespace Impl + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +namespace Kokkos { + +namespace Impl { + +class QthreadsTeamPolicyMember { +private: + typedef Kokkos::Qthreads execution_space; + typedef execution_space::scratch_memory_space scratch_memory_space; + + Impl::QthreadsExec & m_exec; + scratch_memory_space m_team_shared; + const int m_team_size; + const int m_team_rank; + const int m_league_size; + const int m_league_end; + int m_league_rank; + +public: + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & team_shmem() const { return m_team_shared; } + + KOKKOS_INLINE_FUNCTION int league_rank() const { return m_league_rank; } + KOKKOS_INLINE_FUNCTION int league_size() const { return m_league_size; } + KOKKOS_INLINE_FUNCTION int team_rank() const { return m_team_rank; } + KOKKOS_INLINE_FUNCTION int team_size() const { return m_team_size; } + + KOKKOS_INLINE_FUNCTION void team_barrier() const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + {} +#else + { m_exec.shepherd_barrier( m_team_size ); } +#endif + + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_broadcast( const Type & value, int rank ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_broadcast<Type>( value, m_team_size, rank ); } +#endif + + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_reduce( const Type & value ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_reduce<Type>( m_team_size, value ); } +#endif + + template< typename JoinOp > + KOKKOS_INLINE_FUNCTION typename JoinOp::value_type + team_reduce( const typename JoinOp::value_type & value + , const JoinOp & op ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return typename JoinOp::value_type(); } +#else + { return m_exec.template shepherd_reduce<JoinOp>( m_team_size, value, op ); } +#endif + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering. + * + * The highest rank thread can compute the reduction total as + * reduction_total = dev.team_scan( value ) + value; + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_scan<Type>( m_team_size, value ); } +#endif + + /** \brief Intra-team exclusive prefix sum with team_rank() ordering + * with intra-team non-deterministic ordering accumulation. + * + * The global inter-team accumulation value will, at the end of the league's + * parallel execution, be the scan's total. Parallel execution ordering of + * the league's teams is non-deterministic. As such the base value for each + * team's scan operation is similarly non-deterministic. + */ + template< typename Type > + KOKKOS_INLINE_FUNCTION Type team_scan( const Type & value, Type * const global_accum ) const +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { return Type(); } +#else + { return m_exec.template shepherd_scan<Type>( m_team_size, value, global_accum ); } +#endif + + //---------------------------------------- + // Private driver for task-team parallel. + + struct TaskTeam {}; + + QthreadsTeamPolicyMember(); + explicit QthreadsTeamPolicyMember( const TaskTeam & ); + + //---------------------------------------- + // Private for the driver ( for ( member_type i( exec, team ); i; i.next_team() ) { ... } + + // Initialize. + template< class ... Properties > + QthreadsTeamPolicyMember( Impl::QthreadsExec & exec + , const Kokkos::Impl::TeamPolicyInternal< Qthreads, Properties... > & team ) + : m_exec( exec ) + , m_team_shared( 0, 0 ) + , m_team_size( team.m_team_size ) + , m_team_rank( exec.shepherd_worker_rank() ) + , m_league_size( team.m_league_size ) + , m_league_end( team.m_league_size - team.m_shepherd_iter * ( exec.shepherd_size() - ( exec.shepherd_rank() + 1 ) ) ) + , m_league_rank( m_league_end > team.m_shepherd_iter ? m_league_end - team.m_shepherd_iter : 0 ) + { + m_exec.shared_reset( m_team_shared ); + } + + // Continue. + operator bool () const { return m_league_rank < m_league_end; } + + // Iterate. + void next_team() { ++m_league_rank; m_exec.shared_reset( m_team_shared ); } +}; + +template< class ... Properties > +class TeamPolicyInternal< Kokkos::Qthreads, Properties ... > + : public PolicyTraits< Properties... > +{ +private: + const int m_league_size; + const int m_team_size; + const int m_shepherd_iter; + +public: + //! Tag this class as a kokkos execution policy. + typedef TeamPolicyInternal execution_policy; + typedef Qthreads execution_space; + typedef PolicyTraits< Properties ... > traits; + + //---------------------------------------- + + template< class FunctorType > + inline static + int team_size_max( const FunctorType & ) + { return Qthreads::instance().shepherd_worker_size(); } + + template< class FunctorType > + static int team_size_recommended( const FunctorType & f ) + { return team_size_max( f ); } + + template< class FunctorType > + inline static + int team_size_recommended( const FunctorType & f, const int& ) + { return team_size_max( f ); } + + //---------------------------------------- + + inline int team_size() const { return m_team_size; } + inline int league_size() const { return m_league_size; } + + // One active team per shepherd. + TeamPolicyInternal( Kokkos::Qthreads & q + , const int league_size + , const int team_size + , const int /* vector_length */ = 0 + ) + : m_league_size( league_size ) + , m_team_size( team_size < q.shepherd_worker_size() + ? team_size : q.shepherd_worker_size() ) + , m_shepherd_iter( ( league_size + q.shepherd_size() - 1 ) / q.shepherd_size() ) + {} + + // TODO: Make sure this is correct. + // One active team per shepherd. + TeamPolicyInternal( Kokkos::Qthreads & q + , const int league_size + , const Kokkos::AUTO_t & /* team_size_request */ + , const int /* vector_length */ = 0 + ) + : m_league_size( league_size ) + , m_team_size( q.shepherd_worker_size() ) + , m_shepherd_iter( ( league_size + q.shepherd_size() - 1 ) / q.shepherd_size() ) + {} + + // One active team per shepherd. + TeamPolicyInternal( const int league_size + , const int team_size + , const int /* vector_length */ = 0 + ) + : m_league_size( league_size ) + , m_team_size( team_size < Qthreads::instance().shepherd_worker_size() + ? team_size : Qthreads::instance().shepherd_worker_size() ) + , m_shepherd_iter( ( league_size + Qthreads::instance().shepherd_size() - 1 ) / Qthreads::instance().shepherd_size() ) + {} + + // TODO: Make sure this is correct. + // One active team per shepherd. + TeamPolicyInternal( const int league_size + , const Kokkos::AUTO_t & /* team_size_request */ + , const int /* vector_length */ = 0 + ) + : m_league_size( league_size ) + , m_team_size( Qthreads::instance().shepherd_worker_size() ) + , m_shepherd_iter( ( league_size + Qthreads::instance().shepherd_size() - 1 ) / Qthreads::instance().shepherd_size() ) + {} + + // TODO: Doesn't do anything yet. Fix this. + /** \brief set chunk_size to a discrete value*/ + inline TeamPolicyInternal set_chunk_size(typename traits::index_type chunk_size_) const { + TeamPolicyInternal p = *this; +// p.m_chunk_size = chunk_size_; + return p; + } + + typedef Impl::QthreadsTeamPolicyMember member_type; + + friend class Impl::QthreadsTeamPolicyMember; +}; + +} // namespace Impl + +} // namespace Kokkos + +//---------------------------------------------------------------------------- + +#endif // #define KOKKOS_QTHREADSEXEC_HPP diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp similarity index 86% rename from lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp rename to lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp index cb5b18094833a48905293175f6655f08f4596c8c..9f996075403f7cdd06fddfcb60d829dfab64bf0a 100644 --- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Parallel.hpp @@ -41,8 +41,8 @@ //@HEADER */ -#ifndef KOKKOS_QTHREAD_PARALLEL_HPP -#define KOKKOS_QTHREAD_PARALLEL_HPP +#ifndef KOKKOS_QTHREADS_PARALLEL_HPP +#define KOKKOS_QTHREADS_PARALLEL_HPP #include <vector> @@ -51,7 +51,7 @@ #include <impl/Kokkos_StaticAssert.hpp> #include <impl/Kokkos_FunctorAdapter.hpp> -#include <Qthread/Kokkos_QthreadExec.hpp> +#include <Qthreads/Kokkos_QthreadsExec.hpp> //---------------------------------------------------------------------------- @@ -63,7 +63,7 @@ namespace Impl { template< class FunctorType , class ... Traits > class ParallelFor< FunctorType , Kokkos::RangePolicy< Traits ... > - , Kokkos::Qthread + , Kokkos::Qthreads > { private: @@ -99,7 +99,7 @@ private: } // Function is called once by every concurrent thread. - static void exec( QthreadExec & exec , const void * arg ) + static void exec( QthreadsExec & exec , const void * arg ) { const ParallelFor & self = * ((const ParallelFor *) arg ); @@ -116,7 +116,7 @@ public: inline void execute() const { - Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::exec , this ); + Impl::QthreadsExec::exec_all( Qthreads::instance() , & ParallelFor::exec , this ); } @@ -134,7 +134,7 @@ template< class FunctorType , class ReducerType , class ... Traits > class ParallelReduce< FunctorType , Kokkos::RangePolicy< Traits ... > , ReducerType - , Kokkos::Qthread + , Kokkos::Qthreads > { private: @@ -186,7 +186,7 @@ private: } } - static void exec( QthreadExec & exec , const void * arg ) + static void exec( QthreadsExec & exec , const void * arg ) { const ParallelReduce & self = * ((const ParallelReduce *) arg ); @@ -205,10 +205,10 @@ public: inline void execute() const { - QthreadExec::resize_worker_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); - Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::exec , this ); + QthreadsExec::resize_worker_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + Impl::QthreadsExec::exec_all( Qthreads::instance() , & ParallelReduce::exec , this ); - const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result(); + const pointer_type data = (pointer_type) QthreadsExec::exec_all_reduce_result(); Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , data ); @@ -246,11 +246,11 @@ public: template< class FunctorType , class ... Properties > class ParallelFor< FunctorType , TeamPolicy< Properties ... > - , Kokkos::Qthread > + , Kokkos::Qthreads > { private: - typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Qthread , Properties ... > Policy ; + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Qthreads , Properties ... > Policy ; typedef typename Policy::member_type Member ; typedef typename Policy::work_tag WorkTag ; @@ -282,7 +282,7 @@ private: } } - static void exec( QthreadExec & exec , const void * arg ) + static void exec( QthreadsExec & exec , const void * arg ) { const ParallelFor & self = * ((const ParallelFor *) arg ); @@ -297,10 +297,10 @@ public: inline void execute() const { - QthreadExec::resize_worker_scratch + QthreadsExec::resize_worker_scratch ( /* reduction memory */ 0 , /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( m_functor , m_policy.team_size() ) ); - Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelFor::exec , this ); + Impl::QthreadsExec::exec_all( Qthreads::instance() , & ParallelFor::exec , this ); } ParallelFor( const FunctorType & arg_functor , @@ -316,12 +316,12 @@ template< class FunctorType , class ReducerType , class ... Properties > class ParallelReduce< FunctorType , TeamPolicy< Properties... > , ReducerType - , Kokkos::Qthread + , Kokkos::Qthreads > { private: - typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Qthread , Properties ... > Policy ; + typedef Kokkos::Impl::TeamPolicyInternal< Kokkos::Qthreads , Properties ... > Policy ; typedef typename Policy::work_tag WorkTag ; typedef typename Policy::member_type Member ; @@ -365,7 +365,7 @@ private: } } - static void exec( QthreadExec & exec , const void * arg ) + static void exec( QthreadsExec & exec , const void * arg ) { const ParallelReduce & self = * ((const ParallelReduce *) arg ); @@ -383,13 +383,13 @@ public: inline void execute() const { - QthreadExec::resize_worker_scratch + QthreadsExec::resize_worker_scratch ( /* reduction memory */ ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , /* team shared memory */ FunctorTeamShmemSize< FunctorType >::value( m_functor , m_policy.team_size() ) ); - Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelReduce::exec , this ); + Impl::QthreadsExec::exec_all( Qthreads::instance() , & ParallelReduce::exec , this ); - const pointer_type data = (pointer_type) QthreadExec::exec_all_reduce_result(); + const pointer_type data = (pointer_type) QthreadsExec::exec_all_reduce_result(); Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer), data ); @@ -429,7 +429,7 @@ public: template< class FunctorType , class ... Traits > class ParallelScan< FunctorType , Kokkos::RangePolicy< Traits ... > - , Kokkos::Qthread + , Kokkos::Qthreads > { private: @@ -474,7 +474,7 @@ private: } } - static void exec( QthreadExec & exec , const void * arg ) + static void exec( QthreadsExec & exec , const void * arg ) { const ParallelScan & self = * ((const ParallelScan *) arg ); @@ -497,8 +497,8 @@ public: inline void execute() const { - QthreadExec::resize_worker_scratch( ValueTraits::value_size( m_functor ) , 0 ); - Impl::QthreadExec::exec_all( Qthread::instance() , & ParallelScan::exec , this ); + QthreadsExec::resize_worker_scratch( ValueTraits::value_size( m_functor ) , 0 ); + Impl::QthreadsExec::exec_all( Qthreads::instance() , & ParallelScan::exec , this ); } ParallelScan( const FunctorType & arg_functor @@ -521,37 +521,37 @@ namespace Kokkos { template< typename iType > KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember > -TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread, const iType& count ) +Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadsTeamPolicyMember > +TeamThreadRange( const Impl::QthreadsTeamPolicyMember& thread, const iType& count ) { - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember >( thread, count ); + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadsTeamPolicyMember >( thread, count ); } template< typename iType1, typename iType2 > KOKKOS_INLINE_FUNCTION Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::QthreadTeamPolicyMember > -TeamThreadRange( const Impl::QthreadTeamPolicyMember& thread, const iType1 & begin, const iType2 & end ) + Impl::QthreadsTeamPolicyMember > +TeamThreadRange( const Impl::QthreadsTeamPolicyMember& thread, const iType1 & begin, const iType2 & end ) { typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadTeamPolicyMember >( thread, iType(begin), iType(end) ); + return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::QthreadsTeamPolicyMember >( thread, iType(begin), iType(end) ); } template<typename iType> KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember > - ThreadVectorRange(const Impl::QthreadTeamPolicyMember& thread, const iType& count) { - return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >(thread,count); +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadsTeamPolicyMember > + ThreadVectorRange(const Impl::QthreadsTeamPolicyMember& thread, const iType& count) { + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadsTeamPolicyMember >(thread,count); } KOKKOS_INLINE_FUNCTION -Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember> PerTeam(const Impl::QthreadTeamPolicyMember& thread) { - return Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>(thread); +Impl::ThreadSingleStruct<Impl::QthreadsTeamPolicyMember> PerTeam(const Impl::QthreadsTeamPolicyMember& thread) { + return Impl::ThreadSingleStruct<Impl::QthreadsTeamPolicyMember>(thread); } KOKKOS_INLINE_FUNCTION -Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember> PerThread(const Impl::QthreadTeamPolicyMember& thread) { - return Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>(thread); +Impl::VectorSingleStruct<Impl::QthreadsTeamPolicyMember> PerThread(const Impl::QthreadsTeamPolicyMember& thread) { + return Impl::VectorSingleStruct<Impl::QthreadsTeamPolicyMember>(thread); } /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. @@ -560,7 +560,7 @@ Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember> PerThread(const Impl::Qt * This functionality requires C++11 support.*/ template<typename iType, class Lambda> KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries, const Lambda& lambda) { +void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadsTeamPolicyMember>& loop_boundaries, const Lambda& lambda) { for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) lambda(i); } @@ -571,7 +571,7 @@ void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qthrea * val is performed and put into result. This functionality requires C++11 support.*/ template< typename iType, class Lambda, typename ValueType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries, +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadsTeamPolicyMember>& loop_boundaries, const Lambda & lambda, ValueType& result) { result = ValueType(); @@ -595,7 +595,7 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth * '1 for *'). This functionality requires C++11 support.*/ template< typename iType, class Lambda, typename ValueType, class JoinType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember>& loop_boundaries, +void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::QthreadsTeamPolicyMember>& loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { ValueType result = init_result; @@ -615,7 +615,7 @@ void parallel_reduce(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::Qth * This functionality requires C++11 support.*/ template<typename iType, class Lambda> KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >& +void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadsTeamPolicyMember >& loop_boundaries, const Lambda& lambda) { #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep @@ -630,7 +630,7 @@ void parallel_for(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Qthr * val is performed and put into result. This functionality requires C++11 support.*/ template< typename iType, class Lambda, typename ValueType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >& +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadsTeamPolicyMember >& loop_boundaries, const Lambda & lambda, ValueType& result) { result = ValueType(); #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP @@ -652,7 +652,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Q * '1 for *'). This functionality requires C++11 support.*/ template< typename iType, class Lambda, typename ValueType, class JoinType > KOKKOS_INLINE_FUNCTION -void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >& +void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadsTeamPolicyMember >& loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { ValueType result = init_result; @@ -679,7 +679,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Q * This functionality requires C++11 support.*/ template< typename iType, class FunctorType > KOKKOS_INLINE_FUNCTION -void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadTeamPolicyMember >& +void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::QthreadsTeamPolicyMember >& loop_boundaries, const FunctorType & lambda) { typedef Kokkos::Impl::FunctorValueTraits< FunctorType , void > ValueTraits ; @@ -697,25 +697,25 @@ void parallel_scan(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::Qth template<class FunctorType> KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda) { +void single(const Impl::VectorSingleStruct<Impl::QthreadsTeamPolicyMember>& single_struct, const FunctorType& lambda) { lambda(); } template<class FunctorType> KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda) { +void single(const Impl::ThreadSingleStruct<Impl::QthreadsTeamPolicyMember>& single_struct, const FunctorType& lambda) { if(single_struct.team_member.team_rank()==0) lambda(); } template<class FunctorType, class ValueType> KOKKOS_INLINE_FUNCTION -void single(const Impl::VectorSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda, ValueType& val) { +void single(const Impl::VectorSingleStruct<Impl::QthreadsTeamPolicyMember>& single_struct, const FunctorType& lambda, ValueType& val) { lambda(val); } template<class FunctorType, class ValueType> KOKKOS_INLINE_FUNCTION -void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& single_struct, const FunctorType& lambda, ValueType& val) { +void single(const Impl::ThreadSingleStruct<Impl::QthreadsTeamPolicyMember>& single_struct, const FunctorType& lambda, ValueType& val) { if(single_struct.team_member.team_rank()==0) { lambda(val); } @@ -724,4 +724,4 @@ void single(const Impl::ThreadSingleStruct<Impl::QthreadTeamPolicyMember>& singl } // namespace Kokkos -#endif /* #define KOKKOS_QTHREAD_PARALLEL_HPP */ +#endif /* #define KOKKOS_QTHREADS_PARALLEL_HPP */ diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.cpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.cpp new file mode 100644 index 0000000000000000000000000000000000000000..614a2c03f03e8c9cfbd15653295a254a350fb25a --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.cpp @@ -0,0 +1,320 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <Kokkos_Core.hpp> + +#if defined( KOKKOS_ENABLE_QTHREADS ) && defined( KOKKOS_ENABLE_TASKPOLICY ) + +#include <impl/Kokkos_TaskQueue_impl.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template class TaskQueue< Kokkos::Qthreads > ; + +//---------------------------------------------------------------------------- + +TaskExec< Kokkos::Qthreads >::TaskExec() + : m_self_exec( 0 ), + m_team_exec( 0 ), + m_sync_mask( 0 ), + m_sync_value( 0 ), + m_sync_step( 0 ), + m_group_rank( 0 ), + m_team_rank( 0 ), + m_team_size( 1 ) +{} + +TaskExec< Kokkos::Qthreads >:: +TaskExec( Kokkos::Impl::QthreadsExec & arg_exec, int const arg_team_size ) + : m_self_exec( & arg_exec ), + m_team_exec( arg_exec.pool_rev(arg_exec.pool_rank_rev() / arg_team_size) ), + m_sync_mask( 0 ), + m_sync_value( 0 ), + m_sync_step( 0 ), + m_group_rank( arg_exec.pool_rank_rev() / arg_team_size ), + m_team_rank( arg_exec.pool_rank_rev() % arg_team_size ), + m_team_size( arg_team_size ) +{ + // This team spans + // m_self_exec->pool_rev( team_size * group_rank ) + // m_self_exec->pool_rev( team_size * ( group_rank + 1 ) - 1 ) + + int64_t volatile * const sync = (int64_t *) m_self_exec->scratch_reduce(); + + sync[0] = int64_t(0) ; + sync[1] = int64_t(0) ; + + for ( int i = 0 ; i < m_team_size ; ++i ) { + m_sync_value |= int64_t(1) << (8*i); + m_sync_mask |= int64_t(3) << (8*i); + } + + Kokkos::memory_fence(); +} + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + +void TaskExec< Kokkos::Qthreads >::team_barrier() const +{ + if ( 1 < m_team_size ) { + + if ( m_team_exec->scratch_reduce_size() < int(2 * sizeof(int64_t)) ) { + Kokkos::abort("TaskQueue<Qthreads> scratch_reduce memory too small"); + } + + // Use team shared memory to synchronize. + // Alternate memory locations between barriers to avoid a sequence + // of barriers overtaking one another. + + int64_t volatile * const sync = + ((int64_t *) m_team_exec->scratch_reduce()) + ( m_sync_step & 0x01 ); + + // This team member sets one byte within the sync variable + int8_t volatile * const sync_self = + ((int8_t *) sync) + m_team_rank ; + +#if 0 +fprintf( stdout, + "barrier group(%d) member(%d) step(%d) wait(%lx) : before(%lx)\n", + m_group_rank, + m_team_rank, + m_sync_step, + m_sync_value, + *sync + ); +fflush(stdout); +#endif + + *sync_self = int8_t( m_sync_value & 0x03 ); // signal arrival + + while ( m_sync_value != *sync ); // wait for team to arrive + +#if 0 +fprintf( stdout, + "barrier group(%d) member(%d) step(%d) wait(%lx) : after(%lx)\n", + m_group_rank, + m_team_rank, + m_sync_step, + m_sync_value, + *sync + ); +fflush(stdout); +#endif + + ++m_sync_step ; + + if ( 0 == ( 0x01 & m_sync_step ) ) { // Every other step + m_sync_value ^= m_sync_mask ; + if ( 1000 < m_sync_step ) m_sync_step = 0 ; + } + } +} + +#endif + +//---------------------------------------------------------------------------- + +void TaskQueueSpecialization< Kokkos::Qthreads >::execute + ( TaskQueue< Kokkos::Qthreads > * const queue ) +{ + using execution_space = Kokkos::Qthreads ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space, void, void > ; + using PoolExec = Kokkos::Impl::QthreadsExec ; + using Member = TaskExec< execution_space > ; + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + // Required: team_size <= 8 + + const int team_size = PoolExec::pool_size(2); // Threads per core + // const int team_size = PoolExec::pool_size(1); // Threads per NUMA + + if ( 8 < team_size ) { + Kokkos::abort("TaskQueue<Qthreads> unsupported team size"); + } + +#pragma omp parallel + { + PoolExec & self = *PoolExec::get_thread_omp(); + + Member single_exec ; + Member team_exec( self, team_size ); + + // Team shared memory + task_root_type * volatile * const task_shared = + (task_root_type **) team_exec.m_team_exec->scratch_thread(); + +// Barrier across entire Qthreads thread pool to insure initialization +#pragma omp barrier + + // Loop until all queues are empty and no tasks in flight + + do { + + // Each team lead attempts to acquire either a thread team task + // or collection of single thread tasks for the team. + + if ( 0 == team_exec.team_rank() ) { + + task_root_type * tmp = + 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ; + + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == tmp ; ++i ) { + for ( int j = 0 ; j < 2 && end == tmp ; ++j ) { + tmp = queue_type::pop_task( & queue->m_ready[i][j] ); + } + } + + *task_shared = tmp ; + + // Fence to be sure shared_task_array is stored + Kokkos::memory_fence(); + } + + // Whole team waits for every team member to reach this statement + team_exec.team_barrier(); + + Kokkos::memory_fence(); + + task_root_type * const task = *task_shared ; + +#if 0 +fprintf( stdout, + "\nexecute group(%d) member(%d) task_shared(0x%lx) task(0x%lx)\n", + team_exec.m_group_rank, + team_exec.m_team_rank, + uintptr_t(task_shared), + uintptr_t(task) + ); +fflush(stdout); +#endif + + if ( 0 == task ) break ; // 0 == m_ready_count + + if ( end == task ) { + team_exec.team_barrier(); + } + else if ( task_root_type::TaskTeam == task->m_task_type ) { + // Thread Team Task + (*task->m_apply)( task, & team_exec ); + + // The m_apply function performs a barrier + + if ( 0 == team_exec.team_rank() ) { + // team member #0 completes the task, which may delete the task + queue->complete( task ); + } + } + else { + // Single Thread Task + + if ( 0 == team_exec.team_rank() ) { + + (*task->m_apply)( task, & single_exec ); + + queue->complete( task ); + } + + // All team members wait for whole team to reach this statement. + // Not necessary to complete the task. + // Is necessary to prevent task_shared from being updated + // before it is read by all threads. + team_exec.team_barrier(); + } + } while(1); + } +// END #pragma omp parallel + +} + +void TaskQueueSpecialization< Kokkos::Qthreads >:: + iff_single_thread_recursive_execute + ( TaskQueue< Kokkos::Qthreads > * const queue ) +{ + using execution_space = Kokkos::Qthreads ; + using queue_type = TaskQueue< execution_space > ; + using task_root_type = TaskBase< execution_space, void, void > ; + using Member = TaskExec< execution_space > ; + + if ( 1 == omp_get_num_threads() ) { + + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + + Member single_exec ; + + task_root_type * task = end ; + + do { + + task = end ; + + // Loop by priority and then type + for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { + for ( int j = 0 ; j < 2 && end == task ; ++j ) { + task = queue_type::pop_task( & queue->m_ready[i][j] ); + } + } + + if ( end == task ) break ; + + (*task->m_apply)( task, & single_exec ); + + queue->complete( task ); + + } while(1); + } +} + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_QTHREADS ) && defined( KOKKOS_ENABLE_TASKPOLICY ) */ + + diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.hpp new file mode 100644 index 0000000000000000000000000000000000000000..836452dde93767f172e47d2c19f74498e4dde246 --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_Task.hpp @@ -0,0 +1,156 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_QTHREADS_TASK_HPP +#define KOKKOS_IMPL_QTHREADS_TASK_HPP + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +class TaskQueueSpecialization< Kokkos::Qthreads > +{ +public: + + using execution_space = Kokkos::Qthreads ; + using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; + using task_base_type = Kokkos::Impl::TaskBase< execution_space, void, void > ; + + // Must specify memory space + using memory_space = Kokkos::HostSpace ; + + static + void iff_single_thread_recursive_execute( queue_type * const ); + + // Must provide task queue execution function + static void execute( queue_type * const ); + + // Must provide mechanism to set function pointer in + // execution space from the host process. + template< typename FunctorType > + static + void proc_set_apply( task_base_type::function_type * ptr ) + { + using TaskType = TaskBase< execution_space, + typename FunctorType::value_type, + FunctorType + > ; + *ptr = TaskType::apply ; + } +}; + +extern template class TaskQueue< Kokkos::Qthreads > ; + +//---------------------------------------------------------------------------- + +template<> +class TaskExec< Kokkos::Qthreads > +{ +private: + + TaskExec( TaskExec && ) = delete ; + TaskExec( TaskExec const & ) = delete ; + TaskExec & operator = ( TaskExec && ) = delete ; + TaskExec & operator = ( TaskExec const & ) = delete ; + + + using PoolExec = Kokkos::Impl::QthreadsExec ; + + friend class Kokkos::Impl::TaskQueue< Kokkos::Qthreads > ; + friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::Qthreads > ; + + PoolExec * const m_self_exec ; ///< This thread's thread pool data structure + PoolExec * const m_team_exec ; ///< Team thread's thread pool data structure + int64_t m_sync_mask ; + int64_t mutable m_sync_value ; + int mutable m_sync_step ; + int m_group_rank ; ///< Which "team" subset of thread pool + int m_team_rank ; ///< Which thread within a team + int m_team_size ; + + TaskExec(); + TaskExec( PoolExec & arg_exec, int arg_team_size ); + +public: + +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + void * team_shared() const + { return m_team_exec ? m_team_exec->scratch_thread() : (void*) 0 ; } + + int team_shared_size() const + { return m_team_exec ? m_team_exec->scratch_thread_size() : 0 ; } + + /**\brief Whole team enters this function call + * before any teeam member returns from + * this function call. + */ + void team_barrier() const ; +#else + KOKKOS_INLINE_FUNCTION void team_barrier() const {} + KOKKOS_INLINE_FUNCTION void * team_shared() const { return 0 ; } + KOKKOS_INLINE_FUNCTION int team_shared_size() const { return 0 ; } +#endif + + KOKKOS_INLINE_FUNCTION + int team_rank() const { return m_team_rank ; } + + KOKKOS_INLINE_FUNCTION + int team_size() const { return m_team_size ; } +}; + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ +#endif /* #ifndef KOKKOS_IMPL_QTHREADS_TASK_HPP */ + diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old similarity index 91% rename from lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp rename to lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old index 50444177ceaa46218f9757636d46c8a1a0b339bf..aa159cff6a5211d721a7b6beb31a5969851d080d 100644 --- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.cpp.old @@ -41,11 +41,11 @@ //@HEADER */ -// Experimental unified task-data parallel manycore LDRD +// Experimental unified task-data parallel manycore LDRD. #include <Kokkos_Core_fwd.hpp> -#if defined( KOKKOS_ENABLE_QTHREAD ) +#if defined( KOKKOS_ENABLE_QTHREADS ) #include <stdio.h> @@ -56,17 +56,15 @@ #include <string> #include <Kokkos_Atomic.hpp> -#include <Qthread/Kokkos_Qthread_TaskPolicy.hpp> +#include <Qthreads/Kokkos_Qthreads_TaskPolicy.hpp> #if defined( KOKKOS_ENABLE_TASKDAG ) -//---------------------------------------------------------------------------- - namespace Kokkos { namespace Experimental { namespace Impl { -typedef TaskMember< Kokkos::Qthread , void , void > Task ; +typedef TaskMember< Kokkos::Qthreads , void , void > Task ; namespace { @@ -173,16 +171,16 @@ Task::TaskMember( const function_dealloc_type arg_dealloc void Task::throw_error_add_dependence() const { - std::cerr << "TaskMember< Qthread >::add_dependence ERROR" + std::cerr << "TaskMember< Qthreads >::add_dependence ERROR" << " state(" << m_state << ")" << " dep_size(" << m_dep_size << ")" << std::endl ; - throw std::runtime_error("TaskMember< Qthread >::add_dependence ERROR"); + throw std::runtime_error("TaskMember< Qthreads >::add_dependence ERROR"); } void Task::throw_error_verify_type() { - throw std::runtime_error("TaskMember< Qthread >::verify_type ERROR"); + throw std::runtime_error("TaskMember< Qthreads >::verify_type ERROR"); } //---------------------------------------------------------------------------- @@ -190,7 +188,7 @@ void Task::throw_error_verify_type() #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) void Task::assign( Task ** const lhs , Task * rhs , const bool no_throw ) { - static const char msg_error_header[] = "Kokkos::Impl::TaskManager<Kokkos::Qthread>::assign ERROR" ; + static const char msg_error_header[] = "Kokkos::Impl::TaskManager<Kokkos::Qthreads>::assign ERROR" ; static const char msg_error_count[] = ": negative reference count" ; static const char msg_error_complete[] = ": destroy task that is not complete" ; static const char msg_error_dependences[] = ": destroy task that has dependences" ; @@ -294,7 +292,7 @@ fflush(stdout); assign( & m_dep[i] , 0 ); } - // Set qthread FEB to full so that dependent tasks are allowed to execute. + // Set Qthreads FEB to full so that dependent tasks are allowed to execute. // This 'task' may be deleted immediately following this function call. qthread_fill( & m_qfeb ); @@ -319,10 +317,10 @@ aligned_t Task::qthread_func( void * arg ) ); if ( task->m_apply_team && ! task->m_apply_single ) { - Kokkos::Impl::QthreadTeamPolicyMember::TaskTeam task_team_tag ; + Kokkos::Impl::QthreadsTeamPolicyMember::TaskTeam task_team_tag ; // Initialize team size and rank with shephered info - Kokkos::Impl::QthreadTeamPolicyMember member( task_team_tag ); + Kokkos::Impl::QthreadsTeamPolicyMember member( task_team_tag ); (*task->m_apply_team)( task , member ); @@ -344,7 +342,7 @@ fflush(stdout); } else if ( task->m_apply_team && task->m_apply_single == reinterpret_cast<function_single_type>(1) ) { // Team hard-wired to one, no cloning - Kokkos::Impl::QthreadTeamPolicyMember member ; + Kokkos::Impl::QthreadsTeamPolicyMember member ; (*task->m_apply_team)( task , member ); task->closeout(); } @@ -384,8 +382,8 @@ void Task::schedule() // Increment active task count before spawning. Kokkos::atomic_increment( m_active_count ); - // spawn in qthread. must malloc the precondition array and give to qthread. - // qthread will eventually free this allocation so memory will not be leaked. + // spawn in Qthreads. must malloc the precondition array and give to Qthreads. + // Qthreads will eventually free this allocation so memory will not be leaked. // concern with thread safety of malloc, does this need to be guarded? aligned_t ** qprecon = (aligned_t **) malloc( ( m_dep_size + 1 ) * sizeof(aligned_t *) ); @@ -393,7 +391,7 @@ void Task::schedule() qprecon[0] = reinterpret_cast<aligned_t *>( uintptr_t(m_dep_size) ); for ( int i = 0 ; i < m_dep_size ; ++i ) { - qprecon[i+1] = & m_dep[i]->m_qfeb ; // Qthread precondition flag + qprecon[i+1] = & m_dep[i]->m_qfeb ; // Qthreads precondition flag } if ( m_apply_team && ! m_apply_single ) { @@ -446,7 +444,7 @@ fflush(stdout); namespace Kokkos { namespace Experimental { -TaskPolicy< Kokkos::Qthread >:: +TaskPolicy< Kokkos::Qthreads >:: TaskPolicy ( const unsigned /* arg_task_max_count */ , const unsigned /* arg_task_max_size */ @@ -462,7 +460,7 @@ TaskPolicy if ( m_team_size != 1 && m_team_size != num_worker_per_shepherd ) { std::ostringstream msg ; - msg << "Kokkos::Experimental::TaskPolicy< Kokkos::Qthread >( " + msg << "Kokkos::Experimental::TaskPolicy< Kokkos::Qthreads >( " << "default_depedence = " << arg_task_default_dependence_capacity << " , team_size = " << arg_task_team_size << " ) ERROR, valid team_size arguments are { (omitted) , 1 , " << num_worker_per_shepherd << " }" ; @@ -470,14 +468,14 @@ TaskPolicy } } -TaskPolicy< Kokkos::Qthread >::member_type & -TaskPolicy< Kokkos::Qthread >::member_single() +TaskPolicy< Kokkos::Qthreads >::member_type & +TaskPolicy< Kokkos::Qthreads >::member_single() { static member_type s ; return s ; } -void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy ) +void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthreads > & policy ) { volatile int * const active_task_count = & policy.m_active_count ; while ( *active_task_count ) qthread_yield(); @@ -486,6 +484,5 @@ void wait( Kokkos::Experimental::TaskPolicy< Kokkos::Qthread > & policy ) } // namespace Experimental } // namespace Kokkos -#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ -#endif /* #if defined( KOKKOS_ENABLE_QTHREAD ) */ - +#endif // #if defined( KOKKOS_ENABLE_TASKDAG ) +#endif // #if defined( KOKKOS_ENABLE_QTHREADS ) diff --git a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old similarity index 90% rename from lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp rename to lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old index 565dbf7e61716717bdbac0e1b3adf007493cf27d..1e5a4dc593cc6de9fff9d2a762b4f864c6c12e9c 100644 --- a/lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskPolicy.hpp.old @@ -43,15 +43,15 @@ // Experimental unified task-data parallel manycore LDRD -#ifndef KOKKOS_QTHREAD_TASKSCHEDULER_HPP -#define KOKKOS_QTHREAD_TASKSCHEDULER_HPP +#ifndef KOKKOS_QTHREADS_TASKSCHEDULER_HPP +#define KOKKOS_QTHREADS_TASKSCHEDULER_HPP #include <string> #include <typeinfo> #include <stdexcept> //---------------------------------------------------------------------------- -// Defines to enable experimental Qthread functionality +// Defines to enable experimental Qthreads functionality #define QTHREAD_LOCAL_PRIORITY #define CLONED_TASKS @@ -63,7 +63,7 @@ //---------------------------------------------------------------------------- -#include <Kokkos_Qthread.hpp> +#include <Kokkos_Qthreads.hpp> #include <Kokkos_TaskScheduler.hpp> #include <Kokkos_View.hpp> @@ -78,13 +78,13 @@ namespace Experimental { namespace Impl { template<> -class TaskMember< Kokkos::Qthread , void , void > +class TaskMember< Kokkos::Qthreads , void , void > { public: typedef TaskMember * (* function_verify_type) ( TaskMember * ); typedef void (* function_single_type) ( TaskMember * ); - typedef void (* function_team_type) ( TaskMember * , Kokkos::Impl::QthreadTeamPolicyMember & ); + typedef void (* function_team_type) ( TaskMember * , Kokkos::Impl::QthreadsTeamPolicyMember & ); typedef void (* function_dealloc_type)( TaskMember * ); private: @@ -94,7 +94,7 @@ private: const function_single_type m_apply_single ; ///< Apply function const function_team_type m_apply_team ; ///< Apply function int volatile * const m_active_count ; ///< Count of active tasks on this policy - aligned_t m_qfeb ; ///< Qthread full/empty bit + aligned_t m_qfeb ; ///< Qthreads full/empty bit TaskMember ** const m_dep ; ///< Dependences const int m_dep_capacity ; ///< Capacity of dependences int m_dep_size ; ///< Actual count of dependences @@ -129,7 +129,7 @@ protected : ~TaskMember(); - // Used by TaskMember< Qthread , ResultType , void > + // Used by TaskMember< Qthreads , ResultType , void > TaskMember( const function_verify_type arg_verify , const function_dealloc_type arg_dealloc , const function_single_type arg_apply_single @@ -139,7 +139,7 @@ protected : , const unsigned arg_dependence_capacity ); - // Used for TaskMember< Qthread , void , void > + // Used for TaskMember< Qthreads , void , void > TaskMember( const function_dealloc_type arg_dealloc , const function_single_type arg_apply_single , const function_team_type arg_apply_team @@ -175,15 +175,15 @@ public: /* Inheritence Requirements on task types: * typedef FunctorType::value_type value_type ; * class DerivedTaskType - * : public TaskMember< Qthread , value_type , FunctorType > + * : public TaskMember< Qthreads , value_type , FunctorType > * { ... }; - * class TaskMember< Qthread , value_type , FunctorType > - * : public TaskMember< Qthread , value_type , void > + * class TaskMember< Qthreads , value_type , FunctorType > + * : public TaskMember< Qthreads , value_type , void > * , public Functor * { ... }; * If value_type != void - * class TaskMember< Qthread , value_type , void > - * : public TaskMember< Qthread , void , void > + * class TaskMember< Qthreads , value_type , void > + * : public TaskMember< Qthreads , void , void > * * Allocate space for DerivedTaskType followed by TaskMember*[ dependence_capacity ] * @@ -300,10 +300,10 @@ public: KOKKOS_INLINE_FUNCTION static void apply_single( typename std::enable_if< ! std::is_same< ResultType , void >::value , TaskMember * >::type t ) { - typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; - // TaskMember< Kokkos::Qthread , ResultType , FunctorType > - // : public TaskMember< Kokkos::Qthread , ResultType , void > + // TaskMember< Kokkos::Qthreads , ResultType , FunctorType > + // : public TaskMember< Kokkos::Qthreads , ResultType , void > // , public FunctorType // { ... }; @@ -316,10 +316,10 @@ public: KOKKOS_INLINE_FUNCTION static void apply_single( typename std::enable_if< std::is_same< ResultType , void >::value , TaskMember * >::type t ) { - typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; - // TaskMember< Kokkos::Qthread , ResultType , FunctorType > - // : public TaskMember< Kokkos::Qthread , ResultType , void > + // TaskMember< Kokkos::Qthreads , ResultType , FunctorType > + // : public TaskMember< Kokkos::Qthreads , ResultType , void > // , public FunctorType // { ... }; @@ -333,9 +333,9 @@ public: template< class FunctorType , class ResultType > KOKKOS_INLINE_FUNCTION static void apply_team( typename std::enable_if< ! std::is_same< ResultType , void >::value , TaskMember * >::type t - , Kokkos::Impl::QthreadTeamPolicyMember & member ) + , Kokkos::Impl::QthreadsTeamPolicyMember & member ) { - typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; derived_type & m = * static_cast< derived_type * >( t ); @@ -345,9 +345,9 @@ public: template< class FunctorType , class ResultType > KOKKOS_INLINE_FUNCTION static void apply_team( typename std::enable_if< std::is_same< ResultType , void >::value , TaskMember * >::type t - , Kokkos::Impl::QthreadTeamPolicyMember & member ) + , Kokkos::Impl::QthreadsTeamPolicyMember & member ) { - typedef TaskMember< Kokkos::Qthread , ResultType , FunctorType > derived_type ; + typedef TaskMember< Kokkos::Qthreads , ResultType , FunctorType > derived_type ; derived_type & m = * static_cast< derived_type * >( t ); @@ -356,7 +356,7 @@ public: }; //---------------------------------------------------------------------------- -/** \brief Base class for tasks with a result value in the Qthread execution space. +/** \brief Base class for tasks with a result value in the Qthreads execution space. * * The FunctorType must be void because this class is accessed by the * Future class for the task and result value. @@ -365,8 +365,8 @@ public: * can correctly static_cast from the 'root class' to this class. */ template < class ResultType > -class TaskMember< Kokkos::Qthread , ResultType , void > - : public TaskMember< Kokkos::Qthread , void , void > +class TaskMember< Kokkos::Qthreads , ResultType , void > + : public TaskMember< Kokkos::Qthreads , void , void > { public: @@ -379,7 +379,7 @@ public: protected: - typedef TaskMember< Kokkos::Qthread , void , void > task_root_type ; + typedef TaskMember< Kokkos::Qthreads , void , void > task_root_type ; typedef task_root_type::function_dealloc_type function_dealloc_type ; typedef task_root_type::function_single_type function_single_type ; typedef task_root_type::function_team_type function_team_type ; @@ -404,16 +404,16 @@ protected: }; template< class ResultType , class FunctorType > -class TaskMember< Kokkos::Qthread , ResultType , FunctorType > - : public TaskMember< Kokkos::Qthread , ResultType , void > +class TaskMember< Kokkos::Qthreads , ResultType , FunctorType > + : public TaskMember< Kokkos::Qthreads , ResultType , void > , public FunctorType { public: typedef FunctorType functor_type ; - typedef TaskMember< Kokkos::Qthread , void , void > task_root_type ; - typedef TaskMember< Kokkos::Qthread , ResultType , void > task_base_type ; + typedef TaskMember< Kokkos::Qthreads , void , void > task_root_type ; + typedef TaskMember< Kokkos::Qthreads , ResultType , void > task_base_type ; typedef task_root_type::function_dealloc_type function_dealloc_type ; typedef task_root_type::function_single_type function_single_type ; typedef task_root_type::function_team_type function_team_type ; @@ -447,16 +447,16 @@ public: namespace Kokkos { namespace Experimental { -void wait( TaskPolicy< Kokkos::Qthread > & ); +void wait( TaskPolicy< Kokkos::Qthreads > & ); template<> -class TaskPolicy< Kokkos::Qthread > +class TaskPolicy< Kokkos::Qthreads > { public: - typedef Kokkos::Qthread execution_space ; + typedef Kokkos::Qthreads execution_space ; typedef TaskPolicy execution_policy ; - typedef Kokkos::Impl::QthreadTeamPolicyMember member_type ; + typedef Kokkos::Impl::QthreadsTeamPolicyMember member_type ; private: @@ -650,7 +650,7 @@ public: static member_type & member_single(); - friend void wait( TaskPolicy< Kokkos::Qthread > & ); + friend void wait( TaskPolicy< Kokkos::Qthreads > & ); }; } /* namespace Experimental */ @@ -660,5 +660,5 @@ public: //---------------------------------------------------------------------------- #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ -#endif /* #define KOKKOS_QTHREAD_TASK_HPP */ +#endif /* #define KOKKOS_QTHREADS_TASK_HPP */ diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp new file mode 100644 index 0000000000000000000000000000000000000000..55235cd6d27a9df0e40bd28dff8caa13df94073e --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue.hpp @@ -0,0 +1,319 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +/** \brief Manage task allocation, deallocation, and scheduling. + * + * Task execution is handled here directly for the Qthread implementation. + */ +template<> +class TaskQueue< Kokkos::Qthread > { +private: + + using execution_space = Kokkos::Qthread ; + using memory_space = Kokkos::HostSpace + using device_type = Kokkos::Device< execution_space, memory_space > ; + using memory_pool = Kokkos::Experimental::MemoryPool< device_type > ; + using task_root_type = Kokkos::Impl::TaskBase< execution_space, void, void > ; + + friend class Kokkos::TaskScheduler< execution_space > ; + + struct Destroy { + TaskQueue * m_queue ; + void destroy_shared_allocation(); + }; + + //---------------------------------------- + + enum : int { TASK_STATE_NULL = 0, ///< Does not exist + TASK_STATE_CONSTRUCTING = 1, ///< Is under construction + TASK_STATE_WAITING = 2, ///< Is waiting for execution + TASK_STATE_EXECUTING = 4, ///< Is executing + TASK_STATE_RESPAWN = 8, ///< Requested respawn + TASK_STATE_COMPLETE = 16 ///< Execution is complete + }; + + // Queue is organized as [ priority ][ type ] + + memory_pool m_memory ; + unsigned m_team_size ; // Number of threads in a team + long m_accum_alloc ; // Accumulated number of allocations + int m_count_alloc ; // Current number of allocations + int m_max_alloc ; // Maximum number of allocations + int m_ready_count ; // Number of ready or executing + + //---------------------------------------- + + ~TaskQueue(); + TaskQueue() = delete ; + TaskQueue( TaskQueue && ) = delete ; + TaskQueue( TaskQueue const & ) = delete ; + TaskQueue & operator = ( TaskQueue && ) = delete ; + TaskQueue & operator = ( TaskQueue const & ) = delete ; + + TaskQueue + ( const memory_space & arg_space, + unsigned const arg_memory_pool_capacity, + unsigned const arg_memory_pool_superblock_capacity_log2 + ); + + // Schedule a task + // Precondition: + // task is not executing + // task->m_next is the dependence or zero + // Postcondition: + // task->m_next is linked list membership + KOKKOS_FUNCTION + void schedule( task_root_type * const ); + + // Reschedule a task + // Precondition: + // task is in Executing state + // task->m_next == LockTag + // Postcondition: + // task is in Executing-Respawn state + // task->m_next == 0 (no dependence) + KOKKOS_FUNCTION + void reschedule( task_root_type * ); + + // Complete a task + // Precondition: + // task is not executing + // task->m_next == LockTag => task is complete + // task->m_next != LockTag => task is respawn + // Postcondition: + // task->m_wait == LockTag => task is complete + // task->m_wait != LockTag => task is waiting + KOKKOS_FUNCTION + void complete( task_root_type * ); + +public: + + // If and only if the execution space is a single thread + // then execute ready tasks. + KOKKOS_INLINE_FUNCTION + void iff_single_thread_recursive_execute() + { +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + specialization::iff_single_thread_recursive_execute( this ); +#endif + } + + void execute() { specialization::execute( this ); } + + template< typename FunctorType > + void proc_set_apply( typename task_root_type::function_type * ptr ) + { + specialization::template proc_set_apply< FunctorType >( ptr ); + } + + // Assign task pointer with reference counting of assigned tasks + template< typename LV, typename RV > + KOKKOS_FUNCTION static + void assign( TaskBase< execution_space, LV, void > ** const lhs, + TaskBase< execution_space, RV, void > * const rhs ) + { + using task_lhs = TaskBase< execution_space, LV, void > ; +#if 0 + { + printf( "assign( 0x%lx { 0x%lx %d %d }, 0x%lx { 0x%lx %d %d } )\n", + uintptr_t( lhs ? *lhs : 0 ), + uintptr_t( lhs && *lhs ? (*lhs)->m_next : 0 ), + int( lhs && *lhs ? (*lhs)->m_task_type : 0 ), + int( lhs && *lhs ? (*lhs)->m_ref_count : 0 ), + uintptr_t(rhs), + uintptr_t( rhs ? rhs->m_next : 0 ), + int( rhs ? rhs->m_task_type : 0 ), + int( rhs ? rhs->m_ref_count : 0 ) + ); + fflush( stdout ); + } +#endif + + if ( *lhs ) + { + const int count = Kokkos::atomic_fetch_add( &((*lhs)->m_ref_count), -1 ); + + if ( ( 1 == count ) && ( (*lhs)->m_state == TASK_STATE_COMPLETE ) ) { + // Reference count is zero and task is complete, deallocate. + (*lhs)->m_queue->deallocate( *lhs, (*lhs)->m_alloc_size ); + } + else if ( count <= 1 ) { + Kokkos::abort("TaskScheduler task has negative reference count or is incomplete" ); + } + + // GEM: Should I check that there are no dependences here? Can the state + // be set to complete while there are still dependences? + } + + if ( rhs ) { Kokkos::atomic_fetch_add( &(rhs->m_ref_count), 1 ); } + + // Force write of *lhs + + *static_cast< task_lhs * volatile * >(lhs) = rhs ; + + Kokkos::memory_fence(); + } + + KOKKOS_FUNCTION + size_t allocate_block_size( size_t n ); ///< Actual block size allocated + + KOKKOS_FUNCTION + void * allocate( size_t n ); ///< Allocate from the memory pool + + KOKKOS_FUNCTION + void deallocate( void * p, size_t n ); ///< Deallocate to the memory pool +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +class TaskBase< Kokkos::Qthread, void, void > +{ +public: + + enum : int16_t { TaskTeam = TaskBase< void, void, void >::TaskTeam, + TaskSingle = TaskBase< void, void, void >::TaskSingle, + Aggregate = TaskBase< void, void, void >::Aggregate }; + + enum : uintptr_t { LockTag = TaskBase< void, void, void >::LockTag, + EndTag = TaskBase< void, void, void >::EndTag }; + + using execution_space = Kokkos::Qthread ; + using queue_type = TaskQueue< execution_space > ; + + template< typename > friend class Kokkos::TaskScheduler ; + + typedef void (* function_type) ( TaskBase *, void * ); + + // sizeof(TaskBase) == 48 + + function_type m_apply ; ///< Apply function pointer + queue_type * m_queue ; ///< Queue in which this task resides + TaskBase * m_dep ; ///< Dependence + int32_t m_ref_count ; ///< Reference count + int32_t m_alloc_size ; ///< Allocation size + int32_t m_dep_count ; ///< Aggregate's number of dependences + int16_t m_task_type ; ///< Type of task + int16_t m_priority ; ///< Priority of runnable task + aligned_t m_qfeb ; ///< Qthread full/empty bit + int m_state ; ///< State of the task + + TaskBase( TaskBase && ) = delete ; + TaskBase( const TaskBase & ) = delete ; + TaskBase & operator = ( TaskBase && ) = delete ; + TaskBase & operator = ( const TaskBase & ) = delete ; + + KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; + + KOKKOS_INLINE_FUNCTION + constexpr TaskBase() noexcept + : m_apply(0), + m_queue(0), + m_dep(0), + m_ref_count(0), + m_alloc_size(0), + m_dep_count(0), + m_task_type( TaskSingle ), + m_priority( 1 /* TaskRegularPriority */ ), + m_qfeb(0), + m_state( queue_type::TASK_STATE_CONSTRUCTING ) + { + qthread_empty( & m_qfeb ); // Set to full when complete + } + + //---------------------------------------- + + static aligned_t qthread_func( void * arg ); + + KOKKOS_INLINE_FUNCTION + TaskBase ** aggregate_dependences() + { return reinterpret_cast<TaskBase**>( this + 1 ); } + + KOKKOS_INLINE_FUNCTION + void requested_respawn() + { return m_state == queue_type::TASK_STATE_RESPAWN; } + + KOKKOS_INLINE_FUNCTION + void add_dependence( TaskBase* dep ) + { + // Assign dependence to m_dep. It will be processed in the subsequent + // call to schedule. Error if the dependence is reset. + if ( 0 != Kokkos::atomic_exchange( & m_dep, dep ) ) { + Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); + } + + if ( 0 != dep ) { + // The future may be destroyed upon returning from this call + // so increment reference count to track this assignment. + Kokkos::atomic_fetch_add( &(dep->m_ref_count), 1 ); + } + } + + using get_return_type = void ; + + KOKKOS_INLINE_FUNCTION + get_return_type get() const {} +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ diff --git a/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4a9190c731c6034724b63094c55967de78caab64 --- /dev/null +++ b/lib/kokkos/core/src/Qthreads/Kokkos_Qthreads_TaskQueue_impl.hpp @@ -0,0 +1,436 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#if defined( KOKKOS_ENABLE_TASKPOLICY ) + +namespace Kokkos { +namespace Impl { + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +void TaskQueue< ExecSpace >::Destroy::destroy_shared_allocation() +{ + m_queue->~TaskQueue(); +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +TaskQueue< ExecSpace >::TaskQueue + ( const TaskQueue< ExecSpace >::memory_space & arg_space, + unsigned const arg_memory_pool_capacity, + unsigned const arg_memory_pool_superblock_capacity_log2 ) + : m_memory( arg_space, + arg_memory_pool_capacity, + arg_memory_pool_superblock_capacity_log2 ) + m_team_size( unsigned( qthread_num_workers_local(NO_SHEPHERD) ) ), + m_accum_alloc(0), + m_count_alloc(0), + m_max_alloc(0), + m_ready_count(0) +{} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +TaskQueue< ExecSpace >::~TaskQueue() +{ + // Verify that ready count is zero. + if ( 0 != m_ready_count ) { + Kokkos::abort("TaskQueue::~TaskQueue ERROR: has ready or executing tasks"); + } +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +size_t TaskQueue< ExecSpace >::allocate_block_size( size_t n ) +{ + return m_memory.allocate_block_size( n ); +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void * TaskQueue< ExecSpace >::allocate( size_t n ) +{ + void * const p = m_memory.allocate(n); + + if ( p ) { + Kokkos::atomic_increment( & m_accum_alloc ); + Kokkos::atomic_increment( & m_count_alloc ); + + if ( m_max_alloc < m_count_alloc ) m_max_alloc = m_count_alloc ; + } + + return p ; +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::deallocate( void * p, size_t n ) +{ + m_memory.deallocate( p, n ); + Kokkos::atomic_decrement( & m_count_alloc ); +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::schedule + ( TaskQueue< ExecSpace >::task_root_type * const task ) +{ +#if 0 + printf( "schedule( 0x%lx { %d %d %d }\n", + uintptr_t(task), + task->m_task_type, + task->m_priority, + task->m_ref_count ); +#endif + + // The task has been constructed and is waiting to be executed. + task->m_state = TASK_STATE_WAITING ; + + if ( task->m_task_type != task_root_type::Aggregate ) { + // Scheduling a single or team task. + + // Increment active task count before spawning. + Kokkos::atomic_increment( m_ready_count ); + + if ( task->m_dep == 0 ) { + // Schedule a task with no dependences. + + if ( task_root_type::TaskTeam == task->m_task_type && m_team_size > 1 ) { + // If more than one shepherd spawn on a shepherd other than this shepherd + const int num_shepherd = qthread_num_shepherds(); + const int this_shepherd = qthread_shep(); + int spawn_shepherd = ( this_shepherd + 1 ) % num_shepherd ; + +#if 0 + fprintf( stdout, + "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n", + qthread_shep(), + qthread_worker_local(NULL), + reinterpret_cast<unsigned long>(this), + spawn_shepherd, + m_team_size - 1 + ); + fflush(stdout); +#endif + + qthread_spawn_cloneable( + & task_root_type::qthread_func, + task, + 0, + NULL, + 0, // no depenedences + 0, // dependences array + spawn_shepherd, + unsigned( QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY ), + m_team_size - 1 + ); + } + else { + qthread_spawn( + & task_root_type::qthread_func, + task, + 0, + NULL, + 0, // no depenedences + 0, // dependences array + NO_SHEPHERD, + QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */ + ); + } + } + else if ( task->m_dep->m_task_type != task_root_type::Aggregate ) + // Malloc the precondition array to pass to qthread_spawn(). For + // non-aggregate tasks, it is a single pointer since there are no + // dependences. Qthreads will eventually free this allocation so memory will + // not be leaked. Is malloc thread-safe? Should this call be guarded? The + // memory can't be allocated from the pool allocator because Qthreads frees + // it using free(). + aligned_t ** qprecon = (aligned_t **) malloc( sizeof(aligned_t *) ); + + *qprecon = reinterpret_cast<aligned_t *>( uintptr_t(m_dep_size) ); + + if ( task->m_task_type == task_root_type::TaskTeam && m_team_size > 1) { + // If more than one shepherd spawn on a shepherd other than this shepherd + const int num_shepherd = qthread_num_shepherds(); + const int this_shepherd = qthread_shep(); + int spawn_shepherd = ( this_shepherd + 1 ) % num_shepherd ; + +#if 0 + fprintf( stdout, + "worker(%d.%d) task 0x%.12lx spawning on shepherd(%d) clone(%d)\n", + qthread_shep(), + qthread_worker_local(NULL), + reinterpret_cast<unsigned long>(this), + spawn_shepherd, + m_team_size - 1 + ); + fflush(stdout); +#endif + + qthread_spawn_cloneable( + & Task::qthread_func, + this, + 0, + NULL, + m_dep_size, + qprecon, /* dependences */ + spawn_shepherd, + unsigned( QTHREAD_SPAWN_SIMPLE | QTHREAD_SPAWN_LOCAL_PRIORITY ), + m_team_size - 1 + ); + } + else { + qthread_spawn( + & Task::qthread_func, /* function */ + this, /* function argument */ + 0, + NULL, + m_dep_size, + qprecon, /* dependences */ + NO_SHEPHERD, + QTHREAD_SPAWN_SIMPLE /* allows optimization for non-blocking task */ + ); + } + } + else { + // GEM: How do I handle an aggregate (when_all) task? + } +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::reschedule( task_root_type * task ) +{ + // Precondition: + // task is in Executing state + // task->m_next == LockTag + // + // Postcondition: + // task is in Executing-Respawn state + // task->m_next == 0 (no dependence) + + task_root_type * const zero = (task_root_type *) 0 ; + task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; + + if ( lock != Kokkos::atomic_exchange( & task->m_next, zero ) ) { + Kokkos::abort("TaskScheduler::respawn ERROR: already respawned"); + } +} + +//---------------------------------------------------------------------------- + +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::complete + ( TaskQueue< ExecSpace >::task_root_type * task ) +{ + // Complete a runnable task that has finished executing + // or a when_all task when all of its dependeneces are complete. + + task_root_type * const zero = (task_root_type *) 0 ; + task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + +#if 0 + printf( "complete( 0x%lx { 0x%lx 0x%lx %d %d %d }\n", + uintptr_t(task), + uintptr_t(task->m_wait), + uintptr_t(task->m_next), + task->m_task_type, + task->m_priority, + task->m_ref_count + ); + fflush( stdout ); +#endif + + const bool runnable = task_root_type::Aggregate != task->m_task_type ; + + //---------------------------------------- + + if ( runnable && lock != task->m_next ) { + // Is a runnable task has finished executing and requested respawn. + // Schedule the task for subsequent execution. + + schedule( task ); + } + //---------------------------------------- + else { + // Is either an aggregate or a runnable task that executed + // and did not respawn. Transition this task to complete. + + // If 'task' is an aggregate then any of the runnable tasks that + // it depends upon may be attempting to complete this 'task'. + // Must only transition a task once to complete status. + // This is controled by atomically locking the wait queue. + + // Stop other tasks from adding themselves to this task's wait queue + // by locking the head of this task's wait queue. + + task_root_type * x = Kokkos::atomic_exchange( & task->m_wait, lock ); + + if ( x != (task_root_type *) lock ) { + + // This thread has transitioned this 'task' to complete. + // 'task' is no longer in a queue and is not executing + // so decrement the reference count from 'task's creation. + // If no other references to this 'task' then it will be deleted. + + TaskQueue::assign( & task, zero ); + + // This thread has exclusive access to the wait list so + // the concurrency-safe pop_task function is not needed. + // Schedule the tasks that have been waiting on the input 'task', + // which may have been deleted. + + while ( x != end ) { + + // Set x->m_next = zero <= no dependence + + task_root_type * const next = + (task_root_type *) Kokkos::atomic_exchange( & x->m_next, zero ); + + schedule( x ); + + x = next ; + } + } + } + + if ( runnable ) { + // A runnable task was popped from a ready queue and executed. + // If respawned into a ready queue then the ready count was incremented + // so decrement whether respawned or not. + Kokkos::atomic_decrement( & m_ready_count ); + } +} + +//---------------------------------------------------------------------------- + +template<> +aligned_t +TaskBase< Kokkos::Qthreads, void, void >::qthread_func( void * arg ) +{ + using execution_space = Kokkos::Qthreads ; + using task_root_type = TaskBase< execution_space , void , void > ; + using Member = Kokkos::Impl::QthreadsTeamPolicyMember; + + task_root_type * const task = reinterpret_cast< task_root_type * >( arg ); + + // First member of the team change state to executing. + // Use compare-exchange to avoid race condition with a respawn. + Kokkos::atomic_compare_exchange_strong( & task->m_state, + queue_type::TASK_STATE_WAITING, + queue_type::TASK_STATE_EXECUTING + ); + + if ( task_root_type::TaskTeam == task->m_task_type ) + { + if ( 1 < task->m_queue->m_team_size ) { + // Team task with team size of more than 1. + Member::TaskTeam task_team_tag ; + + // Initialize team size and rank with shephered info + Member member( task_team_tag ); + + (*task->m_apply)( task , & member ); + +#if 0 + fprintf( stdout, + "worker(%d.%d) task 0x%.12lx executed by member(%d:%d)\n", + qthread_shep(), + qthread_worker_local(NULL), + reinterpret_cast<unsigned long>(task), + member.team_rank(), + member.team_size() + ); + fflush(stdout); +#endif + + member.team_barrier(); + if ( member.team_rank() == 0 ) task->closeout(); + member.team_barrier(); + } + else { + // Team task with team size of 1. + Member member ; + (*task->m_apply)( task , & member ); + task->closeout(); + } + } + else { + (*task->m_apply)( task ); + task->closeout(); + } + +#if 0 +fprintf( stdout + , "worker(%d.%d) task 0x%.12lx return\n" + , qthread_shep() + , qthread_worker_local(NULL) + , reinterpret_cast<unsigned long>(task) + ); +fflush(stdout); +#endif + + return 0 ; +} + +} /* namespace Impl */ +} /* namespace Kokkos */ + + +#endif /* #if defined( KOKKOS_ENABLE_TASKPOLICY ) */ + diff --git a/lib/kokkos/core/src/Qthread/README b/lib/kokkos/core/src/Qthreads/README similarity index 99% rename from lib/kokkos/core/src/Qthread/README rename to lib/kokkos/core/src/Qthreads/README index 6e6c86a9efc2680916e2556bda28914833e6749d..e35b1f698ec7ca3e3ee020eeee4445de43023c78 100644 --- a/lib/kokkos/core/src/Qthread/README +++ b/lib/kokkos/core/src/Qthreads/README @@ -22,4 +22,3 @@ sh autogen.sh # install make install - diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp index 0f69be9ed4db6547d52e1c96b735069fb2332081..b1f53489f432ba093ea2222b16c88ee68e005374 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -264,7 +264,7 @@ void ThreadsExec::execute_sleep( ThreadsExec & exec , const void * ) const int rank_rev = exec.m_pool_size - ( exec.m_pool_rank + 1 ); for ( int i = 0 ; i < n ; ++i ) { - Impl::spinwait( exec.m_pool_base[ rank_rev + (1<<i) ]->m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( exec.m_pool_base[ rank_rev + (1<<i) ]->m_pool_state , ThreadsExec::Active ); } exec.m_pool_state = ThreadsExec::Inactive ; @@ -308,7 +308,7 @@ void ThreadsExec::fence() { if ( s_thread_pool_size[0] ) { // Wait for the root thread to complete: - Impl::spinwait( s_threads_exec[0]->m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( s_threads_exec[0]->m_pool_state , ThreadsExec::Active ); } s_current_function = 0 ; @@ -724,7 +724,7 @@ void ThreadsExec::initialize( unsigned thread_count , // Init the array for used for arbitrarily sized atomics Impl::init_lock_array_host_space(); - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #endif } @@ -777,7 +777,7 @@ void ThreadsExec::finalize() s_threads_process.m_pool_fan_size = 0 ; s_threads_process.m_pool_state = ThreadsExec::Inactive ; - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::finalize(); #endif } diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp index 385dd492d0e8cc9417b50dd817538abf4f27246c..a6db02ebac84b96a736519a22a537bdc53ea6b1a 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp @@ -187,13 +187,13 @@ public: // Fan-in reduction with highest ranking thread as the root for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { // Wait: Active -> Rendezvous - Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active ); } if ( rev_rank ) { m_pool_state = ThreadsExec::Rendezvous ; // Wait: Rendezvous -> Active - Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous ); + Impl::spinwait_while_equal( m_pool_state , ThreadsExec::Rendezvous ); } else { // Root thread does the reduction and broadcast @@ -229,13 +229,13 @@ public: // Fan-in reduction with highest ranking thread as the root for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { // Wait: Active -> Rendezvous - Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active ); } if ( rev_rank ) { m_pool_state = ThreadsExec::Rendezvous ; // Wait: Rendezvous -> Active - Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous ); + Impl::spinwait_while_equal( m_pool_state , ThreadsExec::Rendezvous ); } else { // Root thread does the reduction and broadcast @@ -264,7 +264,7 @@ public: ThreadsExec & fan = *m_pool_base[ rev_rank + ( 1 << i ) ] ; - Impl::spinwait( fan.m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( fan.m_pool_state , ThreadsExec::Active ); Join::join( f , reduce_memory() , fan.reduce_memory() ); } @@ -280,7 +280,7 @@ public: const int rev_rank = m_pool_size - ( m_pool_rank + 1 ); for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { - Impl::spinwait( m_pool_base[rev_rank+(1<<i)]->m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( m_pool_base[rev_rank+(1<<i)]->m_pool_state , ThreadsExec::Active ); } } @@ -312,7 +312,7 @@ public: ThreadsExec & fan = *m_pool_base[ rev_rank + (1<<i) ]; // Wait: Active -> ReductionAvailable (or ScanAvailable) - Impl::spinwait( fan.m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( fan.m_pool_state , ThreadsExec::Active ); Join::join( f , work_value , fan.reduce_memory() ); } @@ -330,8 +330,8 @@ public: // Wait: Active -> ReductionAvailable // Wait: ReductionAvailable -> ScanAvailable - Impl::spinwait( th.m_pool_state , ThreadsExec::Active ); - Impl::spinwait( th.m_pool_state , ThreadsExec::ReductionAvailable ); + Impl::spinwait_while_equal( th.m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( th.m_pool_state , ThreadsExec::ReductionAvailable ); Join::join( f , work_value + count , ((scalar_type *)th.reduce_memory()) + count ); } @@ -342,7 +342,7 @@ public: // Wait for all threads to complete inclusive scan // Wait: ScanAvailable -> Rendezvous - Impl::spinwait( m_pool_state , ThreadsExec::ScanAvailable ); + Impl::spinwait_while_equal( m_pool_state , ThreadsExec::ScanAvailable ); } //-------------------------------- @@ -350,7 +350,7 @@ public: for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { ThreadsExec & fan = *m_pool_base[ rev_rank + (1<<i) ]; // Wait: ReductionAvailable -> ScanAvailable - Impl::spinwait( fan.m_pool_state , ThreadsExec::ReductionAvailable ); + Impl::spinwait_while_equal( fan.m_pool_state , ThreadsExec::ReductionAvailable ); // Set: ScanAvailable -> Rendezvous fan.m_pool_state = ThreadsExec::Rendezvous ; } @@ -377,13 +377,13 @@ public: // Wait for all threads to copy previous thread's inclusive scan value // Wait for all threads: Rendezvous -> ScanCompleted for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { - Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Rendezvous ); + Impl::spinwait_while_equal( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Rendezvous ); } if ( rev_rank ) { // Set: ScanAvailable -> ScanCompleted m_pool_state = ThreadsExec::ScanCompleted ; // Wait: ScanCompleted -> Active - Impl::spinwait( m_pool_state , ThreadsExec::ScanCompleted ); + Impl::spinwait_while_equal( m_pool_state , ThreadsExec::ScanCompleted ); } // Set: ScanCompleted -> Active for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { @@ -410,7 +410,7 @@ public: // Fan-in reduction with highest ranking thread as the root for ( int i = 0 ; i < m_pool_fan_size ; ++i ) { // Wait: Active -> Rendezvous - Impl::spinwait( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active ); + Impl::spinwait_while_equal( m_pool_base[ rev_rank + (1<<i) ]->m_pool_state , ThreadsExec::Active ); } for ( unsigned i = 0 ; i < count ; ++i ) { work_value[i+count] = work_value[i]; } @@ -418,7 +418,7 @@ public: if ( rev_rank ) { m_pool_state = ThreadsExec::Rendezvous ; // Wait: Rendezvous -> Active - Impl::spinwait( m_pool_state , ThreadsExec::Rendezvous ); + Impl::spinwait_while_equal( m_pool_state , ThreadsExec::Rendezvous ); } else { // Root thread does the thread-scan before releasing threads diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp index b9edb64551f21d96f35a5276b06b501101b4e3e7..701495428193148f0efaf8dbf1cdededabd66460 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -49,6 +49,7 @@ #include <utility> #include <impl/Kokkos_spinwait.hpp> #include <impl/Kokkos_FunctorAdapter.hpp> +#include <impl/Kokkos_HostThreadTeam.hpp> #include <Kokkos_Atomic.hpp> @@ -103,13 +104,13 @@ public: // Wait for fan-in threads for ( n = 1 ; ( ! ( m_team_rank_rev & n ) ) && ( ( j = m_team_rank_rev + n ) < m_team_size ) ; n <<= 1 ) { - Impl::spinwait( m_team_base[j]->state() , ThreadsExec::Active ); + Impl::spinwait_while_equal( m_team_base[j]->state() , ThreadsExec::Active ); } // If not root then wait for release if ( m_team_rank_rev ) { m_exec->state() = ThreadsExec::Rendezvous ; - Impl::spinwait( m_exec->state() , ThreadsExec::Rendezvous ); + Impl::spinwait_while_equal( m_exec->state() , ThreadsExec::Rendezvous ); } return ! m_team_rank_rev ; @@ -350,6 +351,10 @@ public: const int team_rank_rev = pool_rank_rev % team.team_alloc(); const size_t pool_league_size = m_exec->pool_size() / team.team_alloc() ; const size_t pool_league_rank_rev = pool_rank_rev / team.team_alloc() ; + if(pool_league_rank_rev >= pool_league_size) { + m_invalid_thread = 1; + return; + } const size_t pool_league_rank = pool_league_size - ( pool_league_rank_rev + 1 ); const int pool_num_teams = m_exec->pool_size()/team.team_alloc(); @@ -505,7 +510,8 @@ private: , const int team_size_request ) { const int pool_size = traits::execution_space::thread_pool_size(0); - const int team_max = traits::execution_space::thread_pool_size(1); + const int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + const int team_max = pool_size<max_host_team_size?pool_size:max_host_team_size; const int team_grain = traits::execution_space::thread_pool_size(2); m_league_size = league_size_request ; @@ -552,8 +558,12 @@ public: template< class FunctorType > inline static - int team_size_max( const FunctorType & ) - { return traits::execution_space::thread_pool_size(1); } + int team_size_max( const FunctorType & ) { + int pool_size = traits::execution_space::thread_pool_size(1); + int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + return pool_size<max_host_team_size?pool_size:max_host_team_size; + } + template< class FunctorType > static int team_size_recommended( const FunctorType & ) @@ -819,9 +829,7 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::T #pragma ivdep #endif for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - result+=tmp; + lambda(i,result); } } @@ -835,18 +843,14 @@ void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::T template< typename iType, class Lambda, typename ValueType, class JoinType > KOKKOS_INLINE_FUNCTION void parallel_reduce(const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::ThreadsExecTeamMember >& - loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& init_result) { + loop_boundaries, const Lambda & lambda, const JoinType& join, ValueType& result ) { - ValueType result = init_result; #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP #pragma ivdep #endif for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); + lambda(i,result); } - init_result = result; } /** \brief Intra-thread vector parallel exclusive prefix sum. Executes lambda(iType i, ValueType & val, bool final) diff --git a/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp b/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c4db3e15ef4593422eca54ab5d295f5469d3a5ad --- /dev/null +++ b/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp @@ -0,0 +1,2356 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_HOST_EXP_ITERATE_TILE_HPP +#define KOKKOS_HOST_EXP_ITERATE_TILE_HPP + +#include <iostream> +#include <algorithm> +#include <stdio.h> + +#include <Kokkos_Macros.hpp> + +#if defined(KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION) && defined(KOKKOS_HAVE_PRAGMA_IVDEP) && !defined(__CUDA_ARCH__) +#define KOKKOS_MDRANGE_IVDEP +#endif + + +#ifdef KOKKOS_MDRANGE_IVDEP + #define KOKKOS_ENABLE_IVDEP_MDRANGE _Pragma("ivdep") +#else + #define KOKKOS_ENABLE_IVDEP_MDRANGE +#endif + + + +namespace Kokkos { namespace Experimental { namespace Impl { + +// Temporary, for testing new loop macros +#define KOKKOS_ENABLE_NEW_LOOP_MACROS 1 + + +#define LOOP_1L(type, tile) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0=0; i0<static_cast<type>(tile[0]); ++i0) + +#define LOOP_2L(type, tile) \ + for( type i1=0; i1<static_cast<type>(tile[1]); ++i1) \ + LOOP_1L(type, tile) + +#define LOOP_3L(type, tile) \ + for( type i2=0; i2<static_cast<type>(tile[2]); ++i2) \ + LOOP_2L(type, tile) + +#define LOOP_4L(type, tile) \ + for( type i3=0; i3<static_cast<type>(tile[3]); ++i3) \ + LOOP_3L(type, tile) + +#define LOOP_5L(type, tile) \ + for( type i4=0; i4<static_cast<type>(tile[4]); ++i4) \ + LOOP_4L(type, tile) + +#define LOOP_6L(type, tile) \ + for( type i5=0; i5<static_cast<type>(tile[5]); ++i5) \ + LOOP_5L(type, tile) + +#define LOOP_7L(type, tile) \ + for( type i6=0; i6<static_cast<type>(tile[6]); ++i6) \ + LOOP_6L(type, tile) + +#define LOOP_8L(type, tile) \ + for( type i7=0; i7<static_cast<type>(tile[7]); ++i7) \ + LOOP_7L(type, tile) + + +#define LOOP_1R(type, tile) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for ( type i0=0; i0<static_cast<type>(tile[0]); ++i0 ) + +#define LOOP_2R(type, tile) \ + LOOP_1R(type, tile) \ + for ( type i1=0; i1<static_cast<type>(tile[1]); ++i1 ) + +#define LOOP_3R(type, tile) \ + LOOP_2R(type, tile) \ + for ( type i2=0; i2<static_cast<type>(tile[2]); ++i2 ) + +#define LOOP_4R(type, tile) \ + LOOP_3R(type, tile) \ + for ( type i3=0; i3<static_cast<type>(tile[3]); ++i3 ) + +#define LOOP_5R(type, tile) \ + LOOP_4R(type, tile) \ + for ( type i4=0; i4<static_cast<type>(tile[4]); ++i4 ) + +#define LOOP_6R(type, tile) \ + LOOP_5R(type, tile) \ + for ( type i5=0; i5<static_cast<type>(tile[5]); ++i5 ) + +#define LOOP_7R(type, tile) \ + LOOP_6R(type, tile) \ + for ( type i6=0; i6<static_cast<type>(tile[6]); ++i6 ) + +#define LOOP_8R(type, tile) \ + LOOP_7R(type, tile) \ + for ( type i7=0; i7<static_cast<type>(tile[7]); ++i7 ) + + +#define LOOP_ARGS_1 i0 + m_offset[0] +#define LOOP_ARGS_2 LOOP_ARGS_1, i1 + m_offset[1] +#define LOOP_ARGS_3 LOOP_ARGS_2, i2 + m_offset[2] +#define LOOP_ARGS_4 LOOP_ARGS_3, i3 + m_offset[3] +#define LOOP_ARGS_5 LOOP_ARGS_4, i4 + m_offset[4] +#define LOOP_ARGS_6 LOOP_ARGS_5, i5 + m_offset[5] +#define LOOP_ARGS_7 LOOP_ARGS_6, i6 + m_offset[6] +#define LOOP_ARGS_8 LOOP_ARGS_7, i7 + m_offset[7] + + + +// New Loop Macros... +// parallel_for, non-tagged +#define APPLY( func, ... ) \ + func( __VA_ARGS__ ); + +// LayoutRight +// d = 0 to start +#define LOOP_R_1( func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[d]); ++i0) { \ + APPLY( func, __VA_ARGS__, i0 + m_offset[d] ) \ + } + +#define LOOP_R_2( func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[d]); ++i1) { \ + LOOP_R_1( func, type, m_offset, extent, d+1 , __VA_ARGS__, i1 + m_offset[d] ) \ + } + +#define LOOP_R_3( func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[d]); ++i2) { \ + LOOP_R_2( func, type, m_offset, extent, d+1 , __VA_ARGS__, i2 + m_offset[d] ) \ + } + +#define LOOP_R_4( func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[d]); ++i3) { \ + LOOP_R_3( func, type, m_offset, extent, d+1 , __VA_ARGS__, i3 + m_offset[d] ) \ + } + +#define LOOP_R_5( func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[d]); ++i4) { \ + LOOP_R_4( func, type, m_offset, extent, d+1 , __VA_ARGS__, i4 + m_offset[d] ) \ + } + +#define LOOP_R_6( func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[d]); ++i5) { \ + LOOP_R_5( func, type, m_offset, extent, d+1 , __VA_ARGS__, i5 + m_offset[d] ) \ + } + +#define LOOP_R_7( func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[d]); ++i6) { \ + LOOP_R_6( func, type, m_offset, extent, d+1 , __VA_ARGS__, i6 + m_offset[d] ) \ + } + +#define LOOP_R_8( func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[d]); ++i7) { \ + LOOP_R_7( func, type, m_offset, extent, d+1 , __VA_ARGS__, i7 + m_offset[d] ) \ + } + +//LayoutLeft +// d = rank-1 to start +#define LOOP_L_1( func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[d]); ++i0) { \ + APPLY( func, i0 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_2( func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[d]); ++i1) { \ + LOOP_L_1( func, type, m_offset, extent, d-1, i1 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_3( func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[d]); ++i2) { \ + LOOP_L_2( func, type, m_offset, extent, d-1, i2 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_4( func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[d]); ++i3) { \ + LOOP_L_3( func, type, m_offset, extent, d-1, i3 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_5( func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[d]); ++i4) { \ + LOOP_L_4( func, type, m_offset, extent, d-1, i4 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_6( func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[d]); ++i5) { \ + LOOP_L_5( func, type, m_offset, extent, d-1, i5 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_7( func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[d]); ++i6) { \ + LOOP_L_6( func, type, m_offset, extent, d-1, i6 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_8( func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[d]); ++i7) { \ + LOOP_L_7( func, type, m_offset, extent, d-1, i7 + m_offset[d] , __VA_ARGS__ ) \ + } + +// Left vs Right +// TODO: rank not necessary to pass through, can hardcode the values +#define LOOP_LAYOUT_1( func, type, is_left, m_offset, extent, rank ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[0]); ++i0) { \ + APPLY( func, i0 + m_offset[0] ) \ + } + +#define LOOP_LAYOUT_2( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[rank-1]); ++i1) { \ + LOOP_L_1( func, type, m_offset, extent, rank-2, i1 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[0]); ++i1) { \ + LOOP_R_1( func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_3( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[rank-1]); ++i2) { \ + LOOP_L_2( func, type, m_offset, extent, rank-2, i2 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[0]); ++i2) { \ + LOOP_R_2( func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_4( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[rank-1]); ++i3) { \ + LOOP_L_3( func, type, m_offset, extent, rank-2, i3 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[0]); ++i3) { \ + LOOP_R_3( func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_5( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[rank-1]); ++i4) { \ + LOOP_L_4( func, type, m_offset, extent, rank-2, i4 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[0]); ++i4) { \ + LOOP_R_4( func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_6( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[rank-1]); ++i5) { \ + LOOP_L_5( func, type, m_offset, extent, rank-2, i5 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[0]); ++i5) { \ + LOOP_R_5( func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_7( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[rank-1]); ++i6) { \ + LOOP_L_6( func, type, m_offset, extent, rank-2, i6 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[0]); ++i6) { \ + LOOP_R_6( func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_8( func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[rank-1]); ++i7) { \ + LOOP_L_7( func, type, m_offset, extent, rank-2, i7 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[0]); ++i7) { \ + LOOP_R_7( func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ + } \ + } + +// Partial vs Full Tile +#define TILE_LOOP_1( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_1( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_1( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_2( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_2( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_2( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_3( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_3( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_3( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_4( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_4( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_4( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_5( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_5( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_5( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_6( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_6( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_6( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_7( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_7( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_7( func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_8( func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_8( func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_8( func, type, is_left, m_offset, extent_partial, rank ) } + + +// parallel_reduce, non-tagged +// Reduction version +#define APPLY_REDUX( val, func, ... ) \ + func( __VA_ARGS__, val ); + +// LayoutRight +// d = 0 to start +#define LOOP_R_1_REDUX( val, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[d]); ++i0) { \ + APPLY_REDUX( val, func, __VA_ARGS__, i0 + m_offset[d] ) \ + } + +#define LOOP_R_2_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[d]); ++i1) { \ + LOOP_R_1_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i1 + m_offset[d] ) \ + } + +#define LOOP_R_3_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[d]); ++i2) { \ + LOOP_R_2_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i2 + m_offset[d] ) \ + } + +#define LOOP_R_4_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[d]); ++i3) { \ + LOOP_R_3_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i3 + m_offset[d] ) \ + } + +#define LOOP_R_5_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[d]); ++i4) { \ + LOOP_R_4_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i4 + m_offset[d] ) \ + } + +#define LOOP_R_6_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[d]); ++i5) { \ + LOOP_R_5_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i5 + m_offset[d] ) \ + } + +#define LOOP_R_7_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[d]); ++i6) { \ + LOOP_R_6_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i6 + m_offset[d] ) \ + } + +#define LOOP_R_8_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[d]); ++i7) { \ + LOOP_R_7_REDUX( val, func, type, m_offset, extent, d+1 , __VA_ARGS__, i7 + m_offset[d] ) \ + } + +//LayoutLeft +// d = rank-1 to start +#define LOOP_L_1_REDUX( val, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[d]); ++i0) { \ + APPLY_REDUX( val, func, i0 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_2_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[d]); ++i1) { \ + LOOP_L_1_REDUX( val, func, type, m_offset, extent, d-1, i1 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_3_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[d]); ++i2) { \ + LOOP_L_2_REDUX( val, func, type, m_offset, extent, d-1, i2 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_4_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[d]); ++i3) { \ + LOOP_L_3_REDUX( val, func, type, m_offset, extent, d-1, i3 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_5_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[d]); ++i4) { \ + LOOP_L_4_REDUX( val, func, type, m_offset, extent, d-1, i4 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_6_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[d]); ++i5) { \ + LOOP_L_5_REDUX( val, func, type, m_offset, extent, d-1, i5 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_7_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[d]); ++i6) { \ + LOOP_L_6_REDUX( val, func, type, m_offset, extent, d-1, i6 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define LOOP_L_8_REDUX( val, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[d]); ++i7) { \ + LOOP_L_7_REDUX( val, func, type, m_offset, extent, d-1, i7 + m_offset[d] , __VA_ARGS__ ) \ + } + +// Left vs Right +#define LOOP_LAYOUT_1_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[0]); ++i0) { \ + APPLY_REDUX( val, func, i0 + m_offset[0] ) \ + } + +#define LOOP_LAYOUT_2_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[rank-1]); ++i1) { \ + LOOP_L_1_REDUX( val, func, type, m_offset, extent, rank-2, i1 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[0]); ++i1) { \ + LOOP_R_1_REDUX( val, func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_3_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[rank-1]); ++i2) { \ + LOOP_L_2_REDUX( val, func, type, m_offset, extent, rank-2, i2 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[0]); ++i2) { \ + LOOP_R_2_REDUX( val, func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_4_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[rank-1]); ++i3) { \ + LOOP_L_3_REDUX( val, func, type, m_offset, extent, rank-2, i3 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[0]); ++i3) { \ + LOOP_R_3_REDUX( val, func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_5_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[rank-1]); ++i4) { \ + LOOP_L_4_REDUX( val, func, type, m_offset, extent, rank-2, i4 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[0]); ++i4) { \ + LOOP_R_4_REDUX( val, func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_6_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[rank-1]); ++i5) { \ + LOOP_L_5_REDUX( val, func, type, m_offset, extent, rank-2, i5 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[0]); ++i5) { \ + LOOP_R_5_REDUX( val, func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_7_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[rank-1]); ++i6) { \ + LOOP_L_6_REDUX( val, func, type, m_offset, extent, rank-2, i6 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[0]); ++i6) { \ + LOOP_R_6_REDUX( val, func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ + } \ + } + +#define LOOP_LAYOUT_8_REDUX( val, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[rank-1]); ++i7) { \ + LOOP_L_7_REDUX( val, func, type, m_offset, extent, rank-2, i7 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[0]); ++i7) { \ + LOOP_R_7_REDUX( val, func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ + } \ + } + +// Partial vs Full Tile +#define TILE_LOOP_1_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_1_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_1_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_2_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_2_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_2_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_3_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_3_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_3_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_4_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_4_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_4_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_5_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_5_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_5_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_6_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_6_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_6_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_7_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_7_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_7_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TILE_LOOP_8_REDUX( val, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { LOOP_LAYOUT_8_REDUX( val, func, type, is_left, m_offset, extent_full, rank ) } \ + else { LOOP_LAYOUT_8_REDUX( val, func, type, is_left, m_offset, extent_partial, rank ) } +// end New Loop Macros + + +// tagged macros +#define TAGGED_APPLY( tag, func, ... ) \ + func( tag, __VA_ARGS__ ); + +// LayoutRight +// d = 0 to start +#define TAGGED_LOOP_R_1( tag, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[d]); ++i0) { \ + TAGGED_APPLY( tag, func, __VA_ARGS__, i0 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_2( tag, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[d]); ++i1) { \ + TAGGED_LOOP_R_1( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i1 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_3( tag, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[d]); ++i2) { \ + TAGGED_LOOP_R_2( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i2 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_4( tag, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[d]); ++i3) { \ + TAGGED_LOOP_R_3( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i3 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_5( tag, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[d]); ++i4) { \ + TAGGED_LOOP_R_4( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i4 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_6( tag, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[d]); ++i5) { \ + TAGGED_LOOP_R_5( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i5 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_7( tag, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[d]); ++i6) { \ + TAGGED_LOOP_R_6( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i6 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_8( tag, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[d]); ++i7) { \ + TAGGED_LOOP_R_7( tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i7 + m_offset[d] ) \ + } + +//LayoutLeft +// d = rank-1 to start +#define TAGGED_LOOP_L_1( tag, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[d]); ++i0) { \ + TAGGED_APPLY( tag, func, i0 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_2( tag, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[d]); ++i1) { \ + TAGGED_LOOP_L_1( tag, func, type, m_offset, extent, d-1, i1 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_3( tag, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[d]); ++i2) { \ + TAGGED_LOOP_L_2( tag, func, type, m_offset, extent, d-1, i2 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_4( tag, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[d]); ++i3) { \ + TAGGED_LOOP_L_3( tag, func, type, m_offset, extent, d-1, i3 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_5( tag, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[d]); ++i4) { \ + TAGGED_LOOP_L_4( tag, func, type, m_offset, extent, d-1, i4 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_6( tag, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[d]); ++i5) { \ + TAGGED_LOOP_L_5( tag, func, type, m_offset, extent, d-1, i5 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_7( tag, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[d]); ++i6) { \ + TAGGED_LOOP_L_6( tag, func, type, m_offset, extent, d-1, i6 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_8( tag, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[d]); ++i7) { \ + TAGGED_LOOP_L_7( tag, func, type, m_offset, extent, d-1, i7 + m_offset[d] , __VA_ARGS__ ) \ + } + +// Left vs Right +// TODO: rank not necessary to pass through, can hardcode the values +#define TAGGED_LOOP_LAYOUT_1( tag, func, type, is_left, m_offset, extent, rank ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[0]); ++i0) { \ + TAGGED_APPLY( tag, func, i0 + m_offset[0] ) \ + } + +#define TAGGED_LOOP_LAYOUT_2( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[rank-1]); ++i1) { \ + TAGGED_LOOP_L_1( tag, func, type, m_offset, extent, rank-2, i1 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[0]); ++i1) { \ + TAGGED_LOOP_R_1( tag, func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_3( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[rank-1]); ++i2) { \ + TAGGED_LOOP_L_2( tag, func, type, m_offset, extent, rank-2, i2 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[0]); ++i2) { \ + TAGGED_LOOP_R_2( tag, func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_4( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[rank-1]); ++i3) { \ + TAGGED_LOOP_L_3( tag, func, type, m_offset, extent, rank-2, i3 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[0]); ++i3) { \ + TAGGED_LOOP_R_3( tag, func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_5( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[rank-1]); ++i4) { \ + TAGGED_LOOP_L_4( tag, func, type, m_offset, extent, rank-2, i4 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[0]); ++i4) { \ + TAGGED_LOOP_R_4( tag, func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_6( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[rank-1]); ++i5) { \ + TAGGED_LOOP_L_5( tag, func, type, m_offset, extent, rank-2, i5 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[0]); ++i5) { \ + TAGGED_LOOP_R_5( tag, func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_7( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[rank-1]); ++i6) { \ + TAGGED_LOOP_L_6( tag, func, type, m_offset, extent, rank-2, i6 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[0]); ++i6) { \ + TAGGED_LOOP_R_6( tag, func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_8( tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[rank-1]); ++i7) { \ + TAGGED_LOOP_L_7( tag, func, type, m_offset, extent, rank-2, i7 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[0]); ++i7) { \ + TAGGED_LOOP_R_7( tag, func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ + } \ + } + +// Partial vs Full Tile +#define TAGGED_TILE_LOOP_1( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_1( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_1( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_2( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_2( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_2( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_3( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_3( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_3( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_4( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_4( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_4( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_5( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_5( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_5( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_6( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_6( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_6( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_7( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_7( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_7( tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_8( tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_8( tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_8( tag, func, type, is_left, m_offset, extent_partial, rank ) } + + +// parallel_reduce, tagged +// Reduction version +#define TAGGED_APPLY_REDUX( val, tag, func, ... ) \ + func( tag, __VA_ARGS__, val ); + +// LayoutRight +// d = 0 to start +#define TAGGED_LOOP_R_1_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[d]); ++i0) { \ + TAGGED_APPLY_REDUX( val, tag, func, __VA_ARGS__, i0 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_2_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[d]); ++i1) { \ + TAGGED_LOOP_R_1_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i1 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_3_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[d]); ++i2) { \ + TAGGED_LOOP_R_2_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i2 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_4_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[d]); ++i3) { \ + TAGGED_LOOP_R_3_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i3 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_5_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[d]); ++i4) { \ + TAGGED_LOOP_R_4_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i4 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_6_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[d]); ++i5) { \ + TAGGED_LOOP_R_5_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i5 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_7_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[d]); ++i6) { \ + TAGGED_LOOP_R_6_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i6 + m_offset[d] ) \ + } + +#define TAGGED_LOOP_R_8_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[d]); ++i7) { \ + TAGGED_LOOP_R_7_REDUX( val, tag, func, type, m_offset, extent, d+1 , __VA_ARGS__, i7 + m_offset[d] ) \ + } + +//LayoutLeft +// d = rank-1 to start +#define TAGGED_LOOP_L_1_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[d]); ++i0) { \ + TAGGED_APPLY_REDUX( val, tag, func, i0 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_2_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[d]); ++i1) { \ + TAGGED_LOOP_L_1_REDUX( val, tag, func, type, m_offset, extent, d-1, i1 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_3_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[d]); ++i2) { \ + TAGGED_LOOP_L_2_REDUX( val, tag, func, type, m_offset, extent, d-1, i2 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_4_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[d]); ++i3) { \ + TAGGED_LOOP_L_3_REDUX( val, tag, func, type, m_offset, extent, d-1, i3 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_5_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[d]); ++i4) { \ + TAGGED_LOOP_L_4_REDUX( val, tag, func, type, m_offset, extent, d-1, i4 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_6_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[d]); ++i5) { \ + TAGGED_LOOP_L_5_REDUX( val, tag, func, type, m_offset, extent, d-1, i5 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_7_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[d]); ++i6) { \ + TAGGED_LOOP_L_6_REDUX( val, tag, func, type, m_offset, extent, d-1, i6 + m_offset[d] , __VA_ARGS__ ) \ + } + +#define TAGGED_LOOP_L_8_REDUX( val, tag, func, type, m_offset, extent, d, ... ) \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[d]); ++i7) { \ + TAGGED_LOOP_L_7_REDUX( val, tag, func, type, m_offset, extent, d-1, i7 + m_offset[d] , __VA_ARGS__ ) \ + } + +// Left vs Right +#define TAGGED_LOOP_LAYOUT_1_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + KOKKOS_ENABLE_IVDEP_MDRANGE \ + for( type i0 = (type)0; i0 < static_cast<type>(extent[0]); ++i0) { \ + TAGGED_APPLY_REDUX( val, tag, func, i0 + m_offset[0] ) \ + } + +#define TAGGED_LOOP_LAYOUT_2_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[rank-1]); ++i1) { \ + TAGGED_LOOP_L_1_REDUX( val, tag, func, type, m_offset, extent, rank-2, i1 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i1 = (type)0; i1 < static_cast<type>(extent[0]); ++i1) { \ + TAGGED_LOOP_R_1_REDUX( val, tag, func, type, m_offset, extent, 1 , i1 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_3_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[rank-1]); ++i2) { \ + TAGGED_LOOP_L_2_REDUX( val, tag, func, type, m_offset, extent, rank-2, i2 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i2 = (type)0; i2 < static_cast<type>(extent[0]); ++i2) { \ + TAGGED_LOOP_R_2_REDUX( val, tag, func, type, m_offset, extent, 1 , i2 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_4_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[rank-1]); ++i3) { \ + TAGGED_LOOP_L_3_REDUX( val, tag, func, type, m_offset, extent, rank-2, i3 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i3 = (type)0; i3 < static_cast<type>(extent[0]); ++i3) { \ + TAGGED_LOOP_R_3_REDUX( val, tag, func, type, m_offset, extent, 1 , i3 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_5_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[rank-1]); ++i4) { \ + TAGGED_LOOP_L_4_REDUX( val, tag, func, type, m_offset, extent, rank-2, i4 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i4 = (type)0; i4 < static_cast<type>(extent[0]); ++i4) { \ + TAGGED_LOOP_R_4_REDUX( val, tag, func, type, m_offset, extent, 1 , i4 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_6_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[rank-1]); ++i5) { \ + TAGGED_LOOP_L_5_REDUX( val, tag, func, type, m_offset, extent, rank-2, i5 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i5 = (type)0; i5 < static_cast<type>(extent[0]); ++i5) { \ + TAGGED_LOOP_R_5_REDUX( val, tag, func, type, m_offset, extent, 1 , i5 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_7_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[rank-1]); ++i6) { \ + TAGGED_LOOP_L_6_REDUX( val, tag, func, type, m_offset, extent, rank-2, i6 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i6 = (type)0; i6 < static_cast<type>(extent[0]); ++i6) { \ + TAGGED_LOOP_R_6_REDUX( val, tag, func, type, m_offset, extent, 1 , i6 + m_offset[0] ) \ + } \ + } + +#define TAGGED_LOOP_LAYOUT_8_REDUX( val, tag, func, type, is_left, m_offset, extent, rank ) \ + if (is_left) { \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[rank-1]); ++i7) { \ + TAGGED_LOOP_L_7_REDUX( val, tag, func, type, m_offset, extent, rank-2, i7 + m_offset[rank-1] ) \ + } \ + } \ + else { \ + for( type i7 = (type)0; i7 < static_cast<type>(extent[0]); ++i7) { \ + TAGGED_LOOP_R_7_REDUX( val, tag, func, type, m_offset, extent, 1 , i7 + m_offset[0] ) \ + } \ + } + +// Partial vs Full Tile +#define TAGGED_TILE_LOOP_1_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_1_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_1_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_2_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_2_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_2_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_3_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_3_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_3_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_4_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_4_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_4_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_5_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_5_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_5_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_6_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_6_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_6_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_7_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_7_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_7_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +#define TAGGED_TILE_LOOP_8_REDUX( val, tag, func, type, is_left, cond, m_offset, extent_full, extent_partial, rank ) \ + if (cond) { TAGGED_LOOP_LAYOUT_8_REDUX( val, tag, func, type, is_left, m_offset, extent_full, rank ) } \ + else { TAGGED_LOOP_LAYOUT_8_REDUX( val, tag, func, type, is_left, m_offset, extent_partial, rank ) } + +// end tagged macros + + + + +// Structs for calling loops +template < int Rank, bool IsLeft, typename IType, typename Tagged, typename Enable = void > +struct Tile_Loop_Type; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<1, IsLeft, IType, void, void > +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_1( func, IType, IsLeft, cond, offset, a, b, 1 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_1_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 1 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<2, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_2( func, IType, IsLeft, cond, offset, a, b, 2 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_2_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 2 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<3, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_3( func, IType, IsLeft, cond, offset, a, b, 3 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_3_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 3 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<4, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_4( func, IType, IsLeft, cond, offset, a, b, 4 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_4_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 4 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<5, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_5( func, IType, IsLeft, cond, offset, a, b, 5 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_5_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 5 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<6, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_6( func, IType, IsLeft, cond, offset, a, b, 6 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_6_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 6 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<7, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_7( func, IType, IsLeft, cond, offset, a, b, 7 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_7_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 7 ); + } +}; + +template < bool IsLeft, typename IType > +struct Tile_Loop_Type<8, IsLeft, IType, void, void> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_8( func, IType, IsLeft, cond, offset, a, b, 8 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TILE_LOOP_8_REDUX( value, func, IType, IsLeft, cond, offset, a, b, 8 ); + } +}; + +// tagged versions + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<1, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same<Tagged,void>::value>::type > +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_1( Tagged(), func, IType, IsLeft, cond, offset, a, b, 1 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_1_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 1 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<2, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same<Tagged,void>::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_2( Tagged(), func, IType, IsLeft, cond, offset, a, b, 2 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_2_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 2 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<3, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same<Tagged,void>::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_3( Tagged(), func, IType, IsLeft, cond, offset, a, b, 3 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_3_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 3 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<4, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same<Tagged,void>::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_4( Tagged(), func, IType, IsLeft, cond, offset, a, b, 4 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_4_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 4 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<5, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same<Tagged,void>::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_5( Tagged(), func, IType, IsLeft, cond, offset, a, b, 5 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_5_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 5 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<6, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same<Tagged,void>::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_6( Tagged(), func, IType, IsLeft, cond, offset, a, b, 6 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_6_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 6 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<7, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same<Tagged,void>::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_7( Tagged(), func, IType, IsLeft, cond, offset, a, b, 7 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_7_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 7 ); + } +}; + +template < bool IsLeft, typename IType, typename Tagged > +struct Tile_Loop_Type<8, IsLeft, IType, Tagged, typename std::enable_if< !std::is_same<Tagged,void>::value>::type> +{ + template < typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_8( Tagged(), func, IType, IsLeft, cond, offset, a, b, 8 ); + } + + template < typename ValType, typename Func, typename Offset, typename ExtentA, typename ExtentB > + static void apply(ValType &value, Func const& func, bool cond, Offset const& offset, ExtentA const& a, ExtentB const& b) + { + TAGGED_TILE_LOOP_8_REDUX( value, Tagged(), func, IType, IsLeft, cond, offset, a, b, 8 ); + } +}; +// end Structs for calling loops + + +template <typename T> +using is_void = std::is_same< T , void >; + +template < typename RP + , typename Functor + , typename Tag = void + , typename ValueType = void + , typename Enable = void + > +struct HostIterateTile; + +//For ParallelFor +template < typename RP + , typename Functor + , typename Tag + , typename ValueType + > +struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< is_void<ValueType >::value >::type > +{ + using index_type = typename RP::index_type; + using point_type = typename RP::point_type; + + using value_type = ValueType; + + inline + HostIterateTile( RP const& rp, Functor const& func ) + : m_rp(rp) + , m_func(func) + { + } + + inline + bool check_iteration_bounds( point_type& partial_tile , point_type& offset ) const { + bool is_full_tile = true; + + for ( int i = 0; i < RP::rank; ++i ) { + if ((offset[i] + m_rp.m_tile[i]) <= m_rp.m_upper[i]) { + partial_tile[i] = m_rp.m_tile[i] ; + } + else { + is_full_tile = false ; + partial_tile[i] = (m_rp.m_upper[i] - 1 - offset[i]) == 0 ? 1 + : (m_rp.m_upper[i] - m_rp.m_tile[i]) > 0 ? (m_rp.m_upper[i] - offset[i]) + : (m_rp.m_upper[i] - m_rp.m_lower[i]) ; // when single tile encloses range + } + } + + return is_full_tile ; + } // end check bounds + + + template <int Rank> + struct RankTag + { + typedef RankTag type; + enum { value = (int)Rank }; + }; + +#if KOKKOS_ENABLE_NEW_LOOP_MACROS + template <typename IType> + inline + void + operator()(IType tile_idx) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + Tile_Loop_Type< RP::rank, (RP::inner_direction == RP::Left), index_type, Tag >::apply( m_func, full_tile, m_offset, m_rp.m_tile, m_tiledims ); + + } + +#else + template <typename IType> + inline + void + operator()(IType tile_idx) const + { operator_impl( tile_idx , RankTag<RP::rank>() ); } + // added due to compiler error when using sfinae to choose operator based on rank w/ cuda+serial + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<2> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_2L(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } else { +// #pragma simd + LOOP_2L(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_2R(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } else { +// #pragma simd + LOOP_2R(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } + } // end RP::Right + + } //end op() rank == 2 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<3> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_3L(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } else { +// #pragma simd + LOOP_3L(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_3R(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } else { +// #pragma simd + LOOP_3R(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } + } // end RP::Right + + } //end op() rank == 3 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<4> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_4L(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } else { +// #pragma simd + LOOP_4L(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_4R(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } else { +// #pragma simd + LOOP_4R(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } + } // end RP::Right + + } //end op() rank == 4 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<5> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_5L(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } else { +// #pragma simd + LOOP_5L(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_5R(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } else { +// #pragma simd + LOOP_5R(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } + } // end RP::Right + + } //end op() rank == 5 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<6> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_6L(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } else { +// #pragma simd + LOOP_6L(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_6R(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } else { +// #pragma simd + LOOP_6R(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } + } // end RP::Right + + } //end op() rank == 6 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<7> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_7L(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } else { +// #pragma simd + LOOP_7L(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_7R(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } else { +// #pragma simd + LOOP_7R(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } + } // end RP::Right + + } //end op() rank == 7 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<8> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_8L(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } else { +// #pragma simd + LOOP_8L(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_8R(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } else { +// #pragma simd + LOOP_8R(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } + } // end RP::Right + + } //end op() rank == 8 +#endif + + + template <typename... Args> + typename std::enable_if<( sizeof...(Args) == RP::rank && std::is_same<Tag,void>::value), void>::type + apply(Args &&... args) const + { + m_func(args...); + } + + template <typename... Args> + typename std::enable_if<( sizeof...(Args) == RP::rank && !std::is_same<Tag,void>::value), void>::type + apply(Args &&... args) const + { + m_func( m_tag, args...); + } + + + RP const& m_rp; + Functor const& m_func; + typename std::conditional< std::is_same<Tag,void>::value,int,Tag>::type m_tag; +// value_type & m_v; + +}; + + +// ValueType: For reductions +template < typename RP + , typename Functor + , typename Tag + , typename ValueType + > +struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void<ValueType >::value >::type > +{ + using index_type = typename RP::index_type; + using point_type = typename RP::point_type; + + using value_type = ValueType; + + inline + HostIterateTile( RP const& rp, Functor const& func, value_type & v ) + : m_rp(rp) //Cuda 7.0 does not like braces... + , m_func(func) + , m_v(v) // use with non-void ValueType struct + { +// Errors due to braces rather than parenthesis for init (with cuda 7.0) +// /home/ndellin/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp:1216:98: error: too many braces around initializer for ‘int’ [-fpermissive] +// /home/ndellin/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp:1216:98: error: aggregate value used where an integer was expected + } + + inline + bool check_iteration_bounds( point_type& partial_tile , point_type& offset ) const { + bool is_full_tile = true; + + for ( int i = 0; i < RP::rank; ++i ) { + if ((offset[i] + m_rp.m_tile[i]) <= m_rp.m_upper[i]) { + partial_tile[i] = m_rp.m_tile[i] ; + } + else { + is_full_tile = false ; + partial_tile[i] = (m_rp.m_upper[i] - 1 - offset[i]) == 0 ? 1 + : (m_rp.m_upper[i] - m_rp.m_tile[i]) > 0 ? (m_rp.m_upper[i] - offset[i]) + : (m_rp.m_upper[i] - m_rp.m_lower[i]) ; // when single tile encloses range + } + } + + return is_full_tile ; + } // end check bounds + + + template <int Rank> + struct RankTag + { + typedef RankTag type; + enum { value = (int)Rank }; + }; + + +#if KOKKOS_ENABLE_NEW_LOOP_MACROS + template <typename IType> + inline + void + operator()(IType tile_idx) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + Tile_Loop_Type< RP::rank, (RP::inner_direction == RP::Left), index_type, Tag >::apply( m_v, m_func, full_tile, m_offset, m_rp.m_tile, m_tiledims ); + + } + +#else + template <typename IType> + inline + void + operator()(IType tile_idx) const + { operator_impl( tile_idx , RankTag<RP::rank>() ); } + // added due to compiler error when using sfinae to choose operator based on rank + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<2> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_2L(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } else { +// #pragma simd + LOOP_2L(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_2R(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } else { +// #pragma simd + LOOP_2R(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } + } // end RP::Right + + } //end op() rank == 2 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<3> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_3L(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } else { +// #pragma simd + LOOP_3L(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_3R(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } else { +// #pragma simd + LOOP_3R(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } + } // end RP::Right + + } //end op() rank == 3 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<4> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_4L(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } else { +// #pragma simd + LOOP_4L(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_4R(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } else { +// #pragma simd + LOOP_4R(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } + } // end RP::Right + + } //end op() rank == 4 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<5> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_5L(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } else { +// #pragma simd + LOOP_5L(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_5R(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } else { +// #pragma simd + LOOP_5R(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } + } // end RP::Right + + } //end op() rank == 5 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<6> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_6L(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } else { +// #pragma simd + LOOP_6L(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_6R(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } else { +// #pragma simd + LOOP_6R(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } + } // end RP::Right + + } //end op() rank == 6 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<7> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_7L(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } else { +// #pragma simd + LOOP_7L(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_7R(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } else { +// #pragma simd + LOOP_7R(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } + } // end RP::Right + + } //end op() rank == 7 + + + template <typename IType> + inline + void operator_impl( IType tile_idx , const RankTag<8> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i<RP::rank; ++i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + else { + for (int i=RP::rank-1; i>=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_8L(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } else { +// #pragma simd + LOOP_8L(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_8R(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } else { +// #pragma simd + LOOP_8R(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } + } // end RP::Right + + } //end op() rank == 8 +#endif + + + template <typename... Args> + typename std::enable_if<( sizeof...(Args) == RP::rank && std::is_same<Tag,void>::value), void>::type + apply(Args &&... args) const + { + m_func(args... , m_v); + } + + template <typename... Args> + typename std::enable_if<( sizeof...(Args) == RP::rank && !std::is_same<Tag,void>::value), void>::type + apply(Args &&... args) const + { + m_func( m_tag, args... , m_v); + } + + + RP const& m_rp; + Functor const& m_func; + value_type & m_v; + typename std::conditional< std::is_same<Tag,void>::value,int,Tag>::type m_tag; + +}; + + +// ------------------------------------------------------------------ // + +// MDFunctor - wraps the range_policy and functor to pass to IterateTile +// Serial, Threads, OpenMP +// Cuda uses DeviceIterateTile directly within md_parallel_for +// ParallelReduce +template < typename MDRange, typename Functor, typename ValueType = void > +struct MDFunctor +{ + using range_policy = MDRange; + using functor_type = Functor; + using value_type = ValueType; + using work_tag = typename range_policy::work_tag; + using index_type = typename range_policy::index_type; + using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange + , Functor + , work_tag + , value_type + >; + + + inline + MDFunctor( MDRange const& range, Functor const& f, ValueType & v ) + : m_range( range ) + , m_func( f ) + {} + + inline + MDFunctor( MDFunctor const& ) = default; + + inline + MDFunctor& operator=( MDFunctor const& ) = default; + + inline + MDFunctor( MDFunctor && ) = default; + + inline + MDFunctor& operator=( MDFunctor && ) = default; + +// KOKKOS_FORCEINLINE_FUNCTION //Caused cuda warning - __host__ warning + inline + void operator()(index_type t, value_type & v) const + { + iterate_type(m_range, m_func, v)(t); + } + + MDRange m_range; + Functor m_func; +}; + +// ParallelFor +template < typename MDRange, typename Functor > +struct MDFunctor< MDRange, Functor, void > +{ + using range_policy = MDRange; + using functor_type = Functor; + using work_tag = typename range_policy::work_tag; + using index_type = typename range_policy::index_type; + using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange + , Functor + , work_tag + , void + >; + + + inline + MDFunctor( MDRange const& range, Functor const& f ) + : m_range( range ) + , m_func( f ) + {} + + inline + MDFunctor( MDFunctor const& ) = default; + + inline + MDFunctor& operator=( MDFunctor const& ) = default; + + inline + MDFunctor( MDFunctor && ) = default; + + inline + MDFunctor& operator=( MDFunctor && ) = default; + + inline + void operator()(index_type t) const + { + iterate_type(m_range, m_func)(t); + } + + MDRange m_range; + Functor m_func; +}; + +#undef KOKKOS_ENABLE_NEW_LOOP_MACROS + +} } } //end namespace Kokkos::Experimental::Impl + + +#endif diff --git a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp index 0ffbc0548ab663c9b6afa8799f162e3c7bbd7510..7d7fd3d1334901f1cc57e554f6c46f7f17ca09c4 100644 --- a/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_BitOps.hpp @@ -56,12 +56,13 @@ int bit_scan_forward( unsigned i ) { #if defined( __CUDA_ARCH__ ) return __ffs(i) - 1; -#elif defined( __GNUC__ ) || defined( __GNUG__ ) - return __builtin_ffs(i) - 1; -#elif defined( __INTEL_COMPILER ) +#elif defined( KOKKOS_COMPILER_INTEL ) return _bit_scan_forward(i); +#elif defined( KOKKOS_COMPILER_IBM ) + return __cnttz4(i); +#elif defined( KOKKOS_COMPILER_GNU ) || defined( __GNUC__ ) || defined( __GNUG__ ) + return __builtin_ffs(i) - 1; #else - unsigned t = 1u; int r = 0; while ( i && ( i & t == 0 ) ) @@ -79,10 +80,12 @@ int bit_scan_reverse( unsigned i ) enum { shift = static_cast<int>( sizeof(unsigned) * CHAR_BIT - 1 ) }; #if defined( __CUDA_ARCH__ ) return shift - __clz(i); +#elif defined( KOKKOS_COMPILER_INTEL ) + return _bit_scan_reverse(i); +#elif defined( KOKKOS_COMPILER_IBM ) + return shift - __cntlz4(i); #elif defined( __GNUC__ ) || defined( __GNUG__ ) return shift - __builtin_clz(i); -#elif defined( __INTEL_COMPILER ) - return _bit_scan_reverse(i); #else unsigned t = 1u << shift; int r = 0; @@ -101,10 +104,12 @@ int bit_count( unsigned i ) { #if defined( __CUDA_ARCH__ ) return __popc(i); -#elif defined( __GNUC__ ) || defined( __GNUG__ ) - return __builtin_popcount(i); #elif defined ( __INTEL_COMPILER ) return _popcnt32(i); +#elif defined( KOKKOS_COMPILER_IBM ) + return __popcnt4(i); +#elif defined( __GNUC__ ) || defined( __GNUG__ ) + return __builtin_popcount(i); #else // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetNaive i = i - ( ( i >> 1 ) & ~0u / 3u ); // temp diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index cd38eaa9da867a31a9274684f235456b30590d92..7c38430c44986d5dcffad9c03c9f587ffdc91863 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -147,7 +147,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0); } #endif -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #endif } @@ -155,7 +155,7 @@ setenv("MEMKIND_HBW_NODES", "1", 0); void finalize_internal( const bool all_spaces = false ) { -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::finalize(); #endif @@ -449,5 +449,323 @@ void fence() Impl::fence_internal(); } +void print_configuration( std::ostream & out , const bool detail ) +{ + std::ostringstream msg; + + msg << "Compiler:" << std::endl; +#ifdef KOKKOS_COMPILER_APPLECC + msg << " KOKKOS_COMPILER_APPLECC: " << KOKKOS_COMPILER_APPLECC << std::endl; +#endif +#ifdef KOKKOS_COMPILER_CLANG + msg << " KOKKOS_COMPILER_CLANG: " << KOKKOS_COMPILER_CLANG << std::endl; +#endif +#ifdef KOKKOS_COMPILER_CRAYC + msg << " KOKKOS_COMPILER_CRAYC: " << KOKKOS_COMPILER_CRAYC << std::endl; +#endif +#ifdef KOKKOS_COMPILER_GNU + msg << " KOKKOS_COMPILER_GNU: " << KOKKOS_COMPILER_GNU << std::endl; +#endif +#ifdef KOKKOS_COMPILER_IBM + msg << " KOKKOS_COMPILER_IBM: " << KOKKOS_COMPILER_IBM << std::endl; +#endif +#ifdef KOKKOS_COMPILER_INTEL + msg << " KOKKOS_COMPILER_INTEL: " << KOKKOS_COMPILER_INTEL << std::endl; +#endif +#ifdef KOKKOS_COMPILER_NVCC + msg << " KOKKOS_COMPILER_NVCC: " << KOKKOS_COMPILER_NVCC << std::endl; +#endif +#ifdef KOKKOS_COMPILER_PGI + msg << " KOKKOS_COMPILER_PGI: " << KOKKOS_COMPILER_PGI << std::endl; +#endif + + + msg << "Architecture:" << std::endl; +#ifdef KOKKOS_ENABLE_ISA_KNC + msg << " KOKKOS_ENABLE_ISA_KNC: yes" << std::endl; +#else + msg << " KOKKOS_ENABLE_ISA_KNC: no" << std::endl; +#endif +#ifdef KOKKOS_ENABLE_ISA_POWERPCLE + msg << " KOKKOS_ENABLE_ISA_POWERPCLE: yes" << std::endl; +#else + msg << " KOKKOS_ENABLE_ISA_POWERPCLE: no" << std::endl; +#endif +#ifdef KOKKOS_ENABLE_ISA_X86_64 + msg << " KOKKOS_ENABLE_ISA_X86_64: yes" << std::endl; +#else + msg << " KOKKOS_ENABLE_ISA_X86_64: no" << std::endl; +#endif + + + msg << "Devices:" << std::endl; + msg << " KOKKOS_ENABLE_CUDA: "; +#ifdef KOKKOS_ENABLE_CUDA + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_OPENMP: "; +#ifdef KOKKOS_ENABLE_OPENMP + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PTHREAD: "; +#ifdef KOKKOS_ENABLE_PTHREAD + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_STDTHREAD: "; +#ifdef KOKKOS_ENABLE_STDTHREAD + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_WINTHREAD: "; +#ifdef KOKKOS_ENABLE_WINTHREAD + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_QTHREADS: "; +#ifdef KOKKOS_ENABLE_QTHREADS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_SERIAL: "; +#ifdef KOKKOS_ENABLE_SERIAL + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + + + msg << "Default Device:" << std::endl; + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL: "; +#ifdef KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + + + msg << "Atomics:" << std::endl; + msg << " KOKKOS_ENABLE_CUDA_ATOMICS: "; +#ifdef KOKKOS_ENABLE_CUDA_ATOMICS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_GNU_ATOMICS: "; +#ifdef KOKKOS_ENABLE_GNU_ATOMICS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_INTEL_ATOMICS: "; +#ifdef KOKKOS_ENABLE_INTEL_ATOMICS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_OPENMP_ATOMICS: "; +#ifdef KOKKOS_ENABLE_OPENMP_ATOMICS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_WINDOWS_ATOMICS: "; +#ifdef KOKKOS_ENABLE_WINDOWS_ATOMICS + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + + + msg << "Vectorization:" << std::endl; + msg << " KOKKOS_ENABLE_PRAGMA_IVDEP: "; +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PRAGMA_LOOPCOUNT: "; +#ifdef KOKKOS_ENABLE_PRAGMA_LOOPCOUNT + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PRAGMA_SIMD: "; +#ifdef KOKKOS_ENABLE_PRAGMA_SIMD + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PRAGMA_UNROLL: "; +#ifdef KOKKOS_ENABLE_PRAGMA_UNROLL + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PRAGMA_VECTOR: "; +#ifdef KOKKOS_ENABLE_PRAGMA_VECTOR + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + + msg << "Memory:" << std::endl; + msg << " KOKKOS_ENABLE_HBWSPACE: "; +#ifdef KOKKOS_ENABLE_HBWSPACE + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_INTEL_MM_ALLOC: "; +#ifdef KOKKOS_ENABLE_INTEL_MM_ALLOC + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_POSIX_MEMALIGN: "; +#ifdef KOKKOS_ENABLE_POSIX_MEMALIGN + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + + + msg << "Options:" << std::endl; + msg << " KOKKOS_ENABLE_ASM: "; +#ifdef KOKKOS_ENABLE_ASM + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CXX1Z: "; +#ifdef KOKKOS_ENABLE_CXX1Z + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK: "; +#ifdef KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_HWLOC: "; +#ifdef KOKKOS_ENABLE_HWLOC + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_LIBRT: "; +#ifdef KOKKOS_ENABLE_LIBRT + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_MPI: "; +#ifdef KOKKOS_ENABLE_MPI + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_PROFILING: "; +#ifdef KOKKOS_ENABLE_PROFILING + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + +#ifdef KOKKOS_ENABLE_CUDA + msg << "Cuda Options:" << std::endl; + msg << " KOKKOS_ENABLE_CUDA_LAMBDA: "; +#ifdef KOKKOS_ENABLE_CUDA_LAMBDA + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC: "; +#ifdef KOKKOS_ENABLE_CUDA_LDG_INTRINSIC + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE: "; +#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CUDA_UVM: "; +#ifdef KOKKOS_ENABLE_CUDA_UVM + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CUSPARSE: "; +#ifdef KOKKOS_ENABLE_CUSPARSE + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + msg << " KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA: "; +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA + msg << "yes" << std::endl; +#else + msg << "no" << std::endl; +#endif + +#endif + + msg << "\nRuntime Configuration:" << std::endl; +#ifdef KOKKOS_ENABLE_CUDA + Cuda::print_configuration(msg, detail); +#endif +#ifdef KOKKOS_ENABLE_OPENMP + OpenMP::print_configuration(msg, detail); +#endif +#if defined( KOKKOS_ENABLE_PTHREAD ) || defined( WINTHREAD ) + Threads::print_configuration(msg, detail); +#endif +#ifdef KOKKOS_ENABLE_QTHREADS + Qthreads::print_configuration(msg, detail); +#endif +#ifdef KOKKOS_ENABLE_SERIAL + Serial::print_configuration(msg, detail); +#endif + + out << msg.str() << std::endl; +} + } // namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp b/lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b425b3f19fa159925364d20ac6d5bc85b45bebae --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_FunctorAnalysis.hpp @@ -0,0 +1,653 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_FUNCTORANALYSIS_HPP +#define KOKKOS_FUNCTORANALYSIS_HPP + +#include <cstddef> +#include <Kokkos_Core_fwd.hpp> +#include <impl/Kokkos_Traits.hpp> +#include <impl/Kokkos_Tags.hpp> +#include <impl/Kokkos_Reducer.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +struct FunctorPatternInterface { + struct FOR {}; + struct REDUCE {}; + struct SCAN {}; +}; + +/** \brief Query Functor and execution policy argument tag for value type. + * + * If 'value_type' is not explicitly declared in the functor + * then attempt to deduce the type from FunctorType::operator() + * interface used by the pattern and policy. + * + * For the REDUCE pattern generate a Reducer and finalization function + * derived from what is available within the functor. + */ +template< typename PatternInterface , class Policy , class Functor > +struct FunctorAnalysis { +private: + + using FOR = FunctorPatternInterface::FOR ; + using REDUCE = FunctorPatternInterface::REDUCE ; + using SCAN = FunctorPatternInterface::SCAN ; + + //---------------------------------------- + + struct VOID {}; + + template< typename P = Policy , typename = std::false_type > + struct has_work_tag + { + using type = void ; + using wtag = VOID ; + }; + + template< typename P > + struct has_work_tag + < P , typename std::is_same< typename P::work_tag , void >::type > + { + using type = typename P::work_tag ; + using wtag = typename P::work_tag ; + }; + + using Tag = typename has_work_tag<>::type ; + using WTag = typename has_work_tag<>::wtag ; + + //---------------------------------------- + // Check for Functor::value_type, which is either a simple type T or T[] + + template< typename F , typename = std::false_type > + struct has_value_type { using type = void ; }; + + template< typename F > + struct has_value_type + < F , typename std::is_same< typename F::value_type , void >::type > + { + using type = typename F::value_type ; + + static_assert( ! std::is_reference< type >::value && + std::rank< type >::value <= 1 && + std::extent< type >::value == 0 + , "Kokkos Functor::value_type is T or T[]" ); + }; + + //---------------------------------------- + // If Functor::value_type does not exist then evaluate operator(), + // depending upon the pattern and whether the policy has a work tag, + // to determine the reduction or scan value_type. + + template< typename F + , typename P = PatternInterface + , typename V = typename has_value_type<F>::type + , bool T = std::is_same< Tag , void >::value + > + struct deduce_value_type { using type = V ; }; + + template< typename F > + struct deduce_value_type< F , REDUCE , void , true > { + + template< typename M , typename A > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( M , A & ) const ); + + using type = decltype( deduce( & F::operator() ) ); + }; + + template< typename F > + struct deduce_value_type< F , REDUCE , void , false > { + + template< typename M , typename A > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( WTag , M , A & ) const ); + + template< typename M , typename A > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( WTag const & , M , A & ) const ); + + using type = decltype( deduce( & F::operator() ) ); + }; + + template< typename F > + struct deduce_value_type< F , SCAN , void , true > { + + template< typename M , typename A , typename I > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( M , A & , I ) const ); + + using type = decltype( deduce( & F::operator() ) ); + }; + + template< typename F > + struct deduce_value_type< F , SCAN , void , false > { + + template< typename M , typename A , typename I > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( WTag , M , A & , I ) const ); + + template< typename M , typename A , typename I > + KOKKOS_INLINE_FUNCTION static + A deduce( void (Functor::*)( WTag const & , M , A & , I ) const ); + + using type = decltype( deduce( & F::operator() ) ); + }; + + //---------------------------------------- + + using candidate_type = typename deduce_value_type< Functor >::type ; + + enum { candidate_is_void = std::is_same< candidate_type , void >::value + , candidate_is_array = std::rank< candidate_type >::value == 1 }; + + //---------------------------------------- + +public: + + using value_type = typename std::remove_extent< candidate_type >::type ; + + static_assert( ! std::is_const< value_type >::value + , "Kokkos functor operator reduce argument cannot be const" ); + +private: + + // Stub to avoid defining a type 'void &' + using ValueType = typename + std::conditional< candidate_is_void , VOID , value_type >::type ; + +public: + + using pointer_type = typename + std::conditional< candidate_is_void , void , ValueType * >::type ; + + using reference_type = typename + std::conditional< candidate_is_array , ValueType * , typename + std::conditional< ! candidate_is_void , ValueType & , void > + ::type >::type ; + +private: + + template< bool IsArray , class FF > + KOKKOS_INLINE_FUNCTION static + typename std::enable_if< IsArray , unsigned >::type + get_length( FF const & f ) { return f.value_count ; } + + template< bool IsArray , class FF > + KOKKOS_INLINE_FUNCTION static + typename std::enable_if< ! IsArray , unsigned >::type + get_length( FF const & ) { return 1 ; } + +public: + + enum { StaticValueSize = ! candidate_is_void && + ! candidate_is_array + ? sizeof(ValueType) : 0 }; + + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_count( const Functor & f ) + { return FunctorAnalysis::template get_length< candidate_is_array >(f); } + + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_size( const Functor & f ) + { return FunctorAnalysis::template get_length< candidate_is_array >(f) * sizeof(ValueType); } + + //---------------------------------------- + + template< class Unknown > + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_count( const Unknown & ) + { return 1 ; } + + template< class Unknown > + KOKKOS_FORCEINLINE_FUNCTION static + unsigned value_size( const Unknown & ) + { return sizeof(ValueType); } + +private: + + enum INTERFACE : int + { DISABLE = 0 + , NO_TAG_NOT_ARRAY = 1 + , NO_TAG_IS_ARRAY = 2 + , HAS_TAG_NOT_ARRAY = 3 + , HAS_TAG_IS_ARRAY = 4 + , DEDUCED = + ! std::is_same< PatternInterface , REDUCE >::value ? DISABLE : ( + std::is_same<Tag,void>::value + ? (candidate_is_array ? NO_TAG_IS_ARRAY : NO_TAG_NOT_ARRAY) + : (candidate_is_array ? HAS_TAG_IS_ARRAY : HAS_TAG_NOT_ARRAY) ) + }; + + //---------------------------------------- + // parallel_reduce join operator + + template< class F , INTERFACE > + struct has_join_function ; + + template< class F > + struct has_join_function< F , NO_TAG_NOT_ARRAY > + { + typedef volatile ValueType & vref_type ; + typedef volatile const ValueType & cvref_type ; + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void join( F const & f + , ValueType volatile * dst + , ValueType volatile const * src ) + { f.join( *dst , *src ); } + }; + + template< class F > + struct has_join_function< F , NO_TAG_IS_ARRAY > + { + typedef volatile ValueType * vref_type ; + typedef volatile const ValueType * cvref_type ; + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void join( F const & f + , ValueType volatile * dst + , ValueType volatile const * src ) + { f.join( dst , src ); } + }; + + template< class F > + struct has_join_function< F , HAS_TAG_NOT_ARRAY > + { + typedef volatile ValueType & vref_type ; + typedef volatile const ValueType & cvref_type ; + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void join( F const & f + , ValueType volatile * dst + , ValueType volatile const * src ) + { f.join( WTag() , *dst , *src ); } + }; + + template< class F > + struct has_join_function< F , HAS_TAG_IS_ARRAY > + { + typedef volatile ValueType * vref_type ; + typedef volatile const ValueType * cvref_type ; + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , vref_type , cvref_type ) ); + + KOKKOS_INLINE_FUNCTION static + void join( F const & f + , ValueType volatile * dst + , ValueType volatile const * src ) + { f.join( WTag() , dst , src ); } + }; + + + template< class F = Functor + , INTERFACE = DEDUCED + , typename = void > + struct DeduceJoin + { + KOKKOS_INLINE_FUNCTION static + void join( F const & f + , ValueType volatile * dst + , ValueType volatile const * src ) + { + const int n = FunctorAnalysis::value_count( f ); + for ( int i = 0 ; i < n ; ++i ) dst[i] += src[i]; + } + }; + + template< class F > + struct DeduceJoin< F , DISABLE , void > + { + KOKKOS_INLINE_FUNCTION static + void join( F const & + , ValueType volatile * + , ValueType volatile const * ) {} + }; + + template< class F , INTERFACE I > + struct DeduceJoin< F , I , + decltype( has_join_function<F,I>::enable_if( & F::join ) ) > + : public has_join_function<F,I> {}; + + //---------------------------------------- + + template< class , INTERFACE > + struct has_init_function ; + + template< class F > + struct has_init_function< F , NO_TAG_NOT_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void init( F const & f , ValueType * dst ) + { f.init( *dst ); } + }; + + template< class F > + struct has_init_function< F , NO_TAG_IS_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void init( F const & f , ValueType * dst ) + { f.init( dst ); } + }; + + template< class F > + struct has_init_function< F , HAS_TAG_NOT_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void init( F const & f , ValueType * dst ) + { f.init( WTag(), *dst ); } + }; + + template< class F > + struct has_init_function< F , HAS_TAG_IS_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void init( F const & f , ValueType * dst ) + { f.init( WTag(), dst ); } + }; + + template< class F = Functor + , INTERFACE = DEDUCED + , typename = void > + struct DeduceInit + { + KOKKOS_INLINE_FUNCTION static + void init( F const & , ValueType * dst ) { new(dst) ValueType(); } + }; + + template< class F > + struct DeduceInit< F , DISABLE , void > + { + KOKKOS_INLINE_FUNCTION static + void init( F const & , ValueType * ) {} + }; + + template< class F , INTERFACE I > + struct DeduceInit< F , I , + decltype( has_init_function<F,I>::enable_if( & F::init ) ) > + : public has_init_function<F,I> {}; + + //---------------------------------------- + +public: + + struct Reducer + { + private: + + Functor const & m_functor ; + ValueType * const m_result ; + int const m_length ; + + public: + + using reducer = Reducer ; + using value_type = FunctorAnalysis::value_type ; + using memory_space = void ; + using reference_type = FunctorAnalysis::reference_type ; + + KOKKOS_INLINE_FUNCTION + void join( ValueType volatile * dst + , ValueType volatile const * src ) const noexcept + { DeduceJoin<>::join( m_functor , dst , src ); } + + KOKKOS_INLINE_FUNCTION + void init( ValueType * dst ) const noexcept + { DeduceInit<>::init( m_functor , dst ); } + + KOKKOS_INLINE_FUNCTION explicit + constexpr Reducer( Functor const & arg_functor + , ValueType * arg_value = 0 + , int arg_length = 0 ) noexcept + : m_functor( arg_functor ), m_result(arg_value), m_length(arg_length) {} + + KOKKOS_INLINE_FUNCTION + constexpr int length() const noexcept { return m_length ; } + + KOKKOS_INLINE_FUNCTION + ValueType & operator[]( int i ) const noexcept + { return m_result[i]; } + + private: + + template< bool IsArray > + constexpr + typename std::enable_if< IsArray , ValueType * >::type + ref() const noexcept { return m_result ; } + + template< bool IsArray > + constexpr + typename std::enable_if< ! IsArray , ValueType & >::type + ref() const noexcept { return *m_result ; } + + public: + + KOKKOS_INLINE_FUNCTION + auto result() const noexcept + -> decltype( Reducer::template ref< candidate_is_array >() ) + { return Reducer::template ref< candidate_is_array >(); } + }; + + //---------------------------------------- + +private: + + template< class , INTERFACE > + struct has_final_function ; + + // No tag, not array + template< class F > + struct has_final_function< F , NO_TAG_NOT_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void final( F const & f , ValueType * dst ) + { f.final( *dst ); } + }; + + // No tag, is array + template< class F > + struct has_final_function< F , NO_TAG_IS_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void final( F const & f , ValueType * dst ) + { f.final( dst ); } + }; + + // Has tag, not array + template< class F > + struct has_final_function< F , HAS_TAG_NOT_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , ValueType & ) ); + + KOKKOS_INLINE_FUNCTION static + void final( F const & f , ValueType * dst ) + { f.final( WTag(), *dst ); } + }; + + // Has tag, is array + template< class F > + struct has_final_function< F , HAS_TAG_IS_ARRAY > + { + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (F::*)( WTag const & , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void enable_if( void (*)( WTag const & , ValueType * ) ); + + KOKKOS_INLINE_FUNCTION static + void final( F const & f , ValueType * dst ) + { f.final( WTag(), dst ); } + }; + + template< class F = Functor + , INTERFACE = DEDUCED + , typename = void > + struct DeduceFinal + { + KOKKOS_INLINE_FUNCTION + static void final( F const & , ValueType * ) {} + }; + + template< class F , INTERFACE I > + struct DeduceFinal< F , I , + decltype( has_final_function<F,I>::enable_if( & F::final ) ) > + : public has_init_function<F,I> {}; + +public: + + static void final( Functor const & f , ValueType * result ) + { DeduceFinal<>::final( f , result ); } + +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* KOKKOS_FUNCTORANALYSIS_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp index 96d30d0c4acac8af49f6b2c25ef2bb1c04508a28..eb1f5ce96c28fa05d70dd2bf840133688d82b247 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -62,7 +62,7 @@ #include <memkind.h> #endif -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include <impl/Kokkos_Profiling_Interface.hpp> #endif @@ -198,7 +198,7 @@ void * HBWSpace::allocate( const size_t arg_alloc_size ) const case STD_MALLOC: msg << "STD_MALLOC" ; break ; } msg << " ]( " << arg_alloc_size << " ) FAILED" ; - if ( ptr == NULL ) { msg << " NULL" ; } + if ( ptr == NULL ) { msg << " NULL" ; } else { msg << " NOT ALIGNED " << ptr ; } std::cerr << msg.str() << std::endl ; @@ -218,7 +218,7 @@ void HBWSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_s if ( m_alloc_mech == STD_MALLOC ) { void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1); memkind_free(MEMKIND_TYPE, alloc_ptr ); - } + } } } @@ -249,7 +249,7 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec ) SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: ~SharedAllocationRecord() { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::deallocateData( Kokkos::Profiling::SpaceHandle(Kokkos::Experimental::HBWSpace::name()),RecordBase::m_alloc_ptr->m_label, @@ -278,7 +278,7 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space ) , m_space( arg_space ) { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); } @@ -297,7 +297,7 @@ SharedAllocationRecord( const Kokkos::Experimental::HBWSpace & arg_space void * SharedAllocationRecord< Kokkos::Experimental::HBWSpace , void >:: allocate_tracked( const Kokkos::Experimental::HBWSpace & arg_space - , const std::string & arg_alloc_label + , const std::string & arg_alloc_label , const size_t arg_alloc_size ) { if ( ! arg_alloc_size ) return (void *) 0 ; diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp index 3cd603728e52f1b851219a01f91eb0d5358e4c86..67be86c9a3ed8595a35915f06a4b8e4ea5ded0b3 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,14 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ #include <algorithm> #include <Kokkos_Macros.hpp> -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include <impl/Kokkos_Profiling_Interface.hpp> #endif /*--------------------------------------------------------------------------*/ @@ -292,7 +292,7 @@ void * HostSpace::allocate( const size_t arg_alloc_size ) const case INTEL_MM_ALLOC: msg << "INTEL_MM_ALLOC" ; break ; } msg << " ]( " << arg_alloc_size << " ) FAILED" ; - if ( ptr == NULL ) { msg << " NULL" ; } + if ( ptr == NULL ) { msg << " NULL" ; } else { msg << " NOT ALIGNED " << ptr ; } std::cerr << msg.str() << std::endl ; @@ -312,7 +312,7 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_ if ( m_alloc_mech == STD_MALLOC ) { void * alloc_ptr = *(reinterpret_cast<void **>(arg_alloc_ptr) -1); free( alloc_ptr ); - } + } #if defined( KOKKOS_ENABLE_INTEL_MM_ALLOC ) else if ( m_alloc_mech == INTEL_MM_ALLOC ) { @@ -359,7 +359,7 @@ deallocate( SharedAllocationRecord< void , void > * arg_rec ) SharedAllocationRecord< Kokkos::HostSpace , void >:: ~SharedAllocationRecord() { - #if (KOKKOS_ENABLE_PROFILING) + #if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::deallocateData( Kokkos::Profiling::SpaceHandle(Kokkos::HostSpace::name()),RecordBase::m_alloc_ptr->m_label, @@ -388,7 +388,7 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space ) , m_space( arg_space ) { -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) if(Kokkos::Profiling::profileLibraryLoaded()) { Kokkos::Profiling::allocateData(Kokkos::Profiling::SpaceHandle(arg_space.name()),arg_label,data(),arg_alloc_size); } @@ -406,7 +406,7 @@ SharedAllocationRecord( const Kokkos::HostSpace & arg_space void * SharedAllocationRecord< Kokkos::HostSpace , void >:: allocate_tracked( const Kokkos::HostSpace & arg_space - , const std::string & arg_alloc_label + , const std::string & arg_alloc_label , const size_t arg_alloc_size ) { if ( ! arg_alloc_size ) return (void *) 0 ; diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ac200209c72bca381f60b9564944bc444748f0fb --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp @@ -0,0 +1,463 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <limits> +#include <Kokkos_Macros.hpp> +#include <impl/Kokkos_HostThreadTeam.hpp> +#include <impl/Kokkos_Error.hpp> +#include <impl/Kokkos_spinwait.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +void HostThreadTeamData::organize_pool + ( HostThreadTeamData * members[] , const int size ) +{ + bool ok = true ; + + // Verify not already a member of a pool: + for ( int rank = 0 ; rank < size && ok ; ++rank ) { + ok = ( 0 != members[rank] ) && ( 0 == members[rank]->m_pool_scratch ); + } + + if ( ok ) { + + int64_t * const root_scratch = members[0]->m_scratch ; + + for ( int i = m_pool_rendezvous ; i < m_pool_reduce ; ++i ) { + root_scratch[i] = 0 ; + } + + { + HostThreadTeamData ** const pool = + (HostThreadTeamData **) (root_scratch + m_pool_members); + + // team size == 1, league size == pool_size + + for ( int rank = 0 ; rank < size ; ++rank ) { + HostThreadTeamData * const mem = members[ rank ] ; + mem->m_pool_scratch = root_scratch ; + mem->m_team_scratch = mem->m_scratch ; + mem->m_pool_rank = rank ; + mem->m_pool_size = size ; + mem->m_team_base = rank ; + mem->m_team_rank = 0 ; + mem->m_team_size = 1 ; + mem->m_team_alloc = 1 ; + mem->m_league_rank = rank ; + mem->m_league_size = size ; + mem->m_pool_rendezvous_step = 0 ; + mem->m_team_rendezvous_step = 0 ; + pool[ rank ] = mem ; + } + } + + Kokkos::memory_fence(); + } + else { + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::HostThreadTeamData::organize_pool ERROR pool already exists"); + } +} + +void HostThreadTeamData::disband_pool() +{ + m_work_range.first = -1 ; + m_work_range.second = -1 ; + m_pool_scratch = 0 ; + m_team_scratch = 0 ; + m_pool_rank = 0 ; + m_pool_size = 1 ; + m_team_base = 0 ; + m_team_rank = 0 ; + m_team_size = 1 ; + m_team_alloc = 1 ; + m_league_rank = 0 ; + m_league_size = 1 ; + m_pool_rendezvous_step = 0 ; + m_team_rendezvous_step = 0 ; +} + +int HostThreadTeamData::organize_team( const int team_size ) +{ + // Pool is initialized + const bool ok_pool = 0 != m_pool_scratch ; + + // Team is not set + const bool ok_team = + m_team_scratch == m_scratch && + m_team_base == m_pool_rank && + m_team_rank == 0 && + m_team_size == 1 && + m_team_alloc == 1 && + m_league_rank == m_pool_rank && + m_league_size == m_pool_size ; + + if ( ok_pool && ok_team ) { + + if ( team_size <= 0 ) return 0 ; // No teams to organize + + if ( team_size == 1 ) return 1 ; // Already organized in teams of one + + HostThreadTeamData * const * const pool = + (HostThreadTeamData **) (m_pool_scratch + m_pool_members); + + // "league_size" in this context is the number of concurrent teams + // that the pool can accommodate. Excess threads are idle. + const int league_size = m_pool_size / team_size ; + const int team_alloc_size = m_pool_size / league_size ; + const int team_alloc_rank = m_pool_rank % team_alloc_size ; + const int league_rank = m_pool_rank / team_alloc_size ; + const int team_base_rank = league_rank * team_alloc_size ; + + m_team_scratch = pool[ team_base_rank ]->m_scratch ; + m_team_base = team_base_rank ; + // This needs to check overflow, if m_pool_size % team_alloc_size !=0 + // there are two corner cases: + // (i) if team_alloc_size == team_size there might be a non-full + // zombi team around (for example m_pool_size = 5 and team_size = 2 + // (ii) if team_alloc > team_size then the last team might have less + // threads than the others + m_team_rank = ( team_base_rank + team_size <= m_pool_size ) && + ( team_alloc_rank < team_size ) ? + team_alloc_rank : -1; + m_team_size = team_size ; + m_team_alloc = team_alloc_size ; + m_league_rank = league_rank ; + m_league_size = league_size ; + m_team_rendezvous_step = 0 ; + + if ( team_base_rank == m_pool_rank ) { + // Initialize team's rendezvous memory + for ( int i = m_team_rendezvous ; i < m_pool_reduce ; ++i ) { + m_scratch[i] = 0 ; + } + // Make sure team's rendezvous memory initialized + // is written before proceeding. + Kokkos::memory_fence(); + } + + // Organizing threads into a team performs a barrier across the + // entire pool to insure proper initialization of the team + // rendezvous mechanism before a team rendezvous can be performed. + + if ( pool_rendezvous() ) { + pool_rendezvous_release(); + } + } + else { + Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::HostThreadTeamData::organize_team ERROR"); + } + + return 0 <= m_team_rank ; +} + +void HostThreadTeamData::disband_team() +{ + m_team_scratch = m_scratch ; + m_team_base = m_pool_rank ; + m_team_rank = 0 ; + m_team_size = 1 ; + m_team_alloc = 1 ; + m_league_rank = m_pool_rank ; + m_league_size = m_pool_size ; + m_team_rendezvous_step = 0 ; +} + +//---------------------------------------------------------------------------- +/* pattern for rendezvous + * + * if ( rendezvous() ) { + * ... all other threads are still in team_rendezvous() ... + * rendezvous_release(); + * ... all other threads are released from team_rendezvous() ... + * } + */ + +int HostThreadTeamData::rendezvous( int64_t * const buffer + , int & rendezvous_step + , int const size + , int const rank ) noexcept +{ + enum : int { shift_byte = 3 }; + enum : int { size_byte = ( 01 << shift_byte ) }; // == 8 + enum : int { mask_byte = size_byte - 1 }; + + enum : int { shift_mem_cycle = 2 }; + enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4 + enum : int { mask_mem_cycle = size_mem_cycle - 1 }; + + // Cycle step values: 1 <= step <= size_val_cycle + // An odd multiple of memory cycle so that when a memory location + // is reused it has a different value. + // Must be representable within a single byte: size_val_cycle < 16 + + enum : int { size_val_cycle = 3 * size_mem_cycle }; + + // Requires: + // Called by rank = [ 0 .. size ) + // buffer aligned to int64_t[4] + + // A sequence of rendezvous uses four cycled locations in memory + // and non-equal cycled synchronization values to + // 1) prevent rendezvous from overtaking one another and + // 2) give each spin wait location an int64_t[4] span + // so that it has its own cache line. + + const int step = ( rendezvous_step % size_val_cycle ) + 1 ; + + rendezvous_step = step ; + + // The leading int64_t[4] span is for thread 0 to write + // and all other threads to read spin-wait. + // sync_offset is the index into this array for this step. + + const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle ; + + union { + int64_t full ; + int8_t byte[8] ; + } value ; + + if ( rank ) { + + const int group_begin = rank << shift_byte ; // == rank * size_byte + + if ( group_begin < size ) { + + // This thread waits for threads + // [ group_begin .. group_begin + 8 ) + // [ rank*8 .. rank*8 + 8 ) + // to write to their designated bytes. + + const int end = group_begin + size_byte < size + ? size_byte : size - group_begin ; + + value.full = 0 ; + for ( int i = 0 ; i < end ; ++i ) value.byte[i] = int8_t( step ); + + store_fence(); // This should not be needed but fixes #742 + + spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ] + , value.full ); + } + + { + // This thread sets its designated byte. + // ( rank % size_byte ) + + // ( ( rank / size_byte ) * size_byte * size_mem_cycle ) + + // ( sync_offset * size_byte ) + const int offset = ( rank & mask_byte ) + + ( ( rank & ~mask_byte ) << shift_mem_cycle ) + + ( sync_offset << shift_byte ); + + // All of this thread's previous memory stores must be complete before + // this thread stores the step value at this thread's designated byte + // in the shared synchronization array. + + Kokkos::memory_fence(); + + ((volatile int8_t*) buffer)[ offset ] = int8_t( step ); + + // Memory fence to push the previous store out + Kokkos::memory_fence(); + } + + // Wait for thread 0 to release all other threads + + spinwait_until_equal( buffer[ step & mask_mem_cycle ] , int64_t(step) ); + + } + else { + // Thread 0 waits for threads [1..7] + // to write to their designated bytes. + + const int end = size_byte < size ? 8 : size ; + + value.full = 0 ; + for ( int i = 1 ; i < end ; ++i ) value.byte[i] = int8_t( step ); + + spinwait_until_equal( buffer[ sync_offset ], value.full ); + } + + return rank ? 0 : 1 ; +} + +void HostThreadTeamData:: + rendezvous_release( int64_t * const buffer + , int const rendezvous_step ) noexcept +{ + enum : int { shift_mem_cycle = 2 }; + enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4 + enum : int { mask_mem_cycle = size_mem_cycle - 1 }; + + // Requires: + // Called after team_rendezvous + // Called only by true == team_rendezvous(root) + + // Memory fence to be sure all previous writes are complete: + Kokkos::memory_fence(); + + ((volatile int64_t*) buffer)[ rendezvous_step & mask_mem_cycle ] = + int64_t( rendezvous_step ); + + // Memory fence to push the store out + Kokkos::memory_fence(); +} + +//---------------------------------------------------------------------------- + +int HostThreadTeamData::get_work_stealing() noexcept +{ + pair_int_t w( -1 , -1 ); + + if ( 1 == m_team_size || team_rendezvous() ) { + + // Attempt first from beginning of my work range + for ( int attempt = m_work_range.first < m_work_range.second ; attempt ; ) { + + // Query and attempt to update m_work_range + // from: [ w.first , w.second ) + // to: [ w.first + 1 , w.second ) = w_new + // + // If w is invalid then is just a query. + + const pair_int_t w_new( w.first + 1 , w.second ); + + w = Kokkos::atomic_compare_exchange( & m_work_range, w, w_new ); + + if ( w.first < w.second ) { + // m_work_range is viable + + // If steal is successful then don't repeat attempt to steal + attempt = ! ( w_new.first == w.first + 1 && + w_new.second == w.second ); + } + else { + // m_work_range is not viable + w.first = -1 ; + w.second = -1 ; + + attempt = 0 ; + } + } + + if ( w.first == -1 && m_steal_rank != m_pool_rank ) { + + HostThreadTeamData * const * const pool = + (HostThreadTeamData**)( m_pool_scratch + m_pool_members ); + + // Attempt from begining failed, try to steal from end of neighbor + + pair_int_t volatile * steal_range = + & ( pool[ m_steal_rank ]->m_work_range ); + + for ( int attempt = true ; attempt ; ) { + + // Query and attempt to update steal_work_range + // from: [ w.first , w.second ) + // to: [ w.first , w.second - 1 ) = w_new + // + // If w is invalid then is just a query. + + const pair_int_t w_new( w.first , w.second - 1 ); + + w = Kokkos::atomic_compare_exchange( steal_range, w, w_new ); + + if ( w.first < w.second ) { + // steal_work_range is viable + + // If steal is successful then don't repeat attempt to steal + attempt = ! ( w_new.first == w.first && + w_new.second == w.second - 1 ); + } + else { + // steal_work_range is not viable, move to next member + w.first = -1 ; + w.second = -1 ; + + // We need to figure out whether the next team is active + // m_steal_rank + m_team_alloc could be the next base_rank to steal from + // but only if there are another m_team_size threads available so that that + // base rank has a full team. + m_steal_rank = m_steal_rank + m_team_alloc + m_team_size <= m_pool_size ? + m_steal_rank + m_team_alloc : 0; + + steal_range = & ( pool[ m_steal_rank ]->m_work_range ); + + // If tried all other members then don't repeat attempt to steal + attempt = m_steal_rank != m_pool_rank ; + } + } + + if ( w.first != -1 ) w.first = w.second - 1 ; + } + + if ( 1 < m_team_size ) { + // Must share the work index + *((int volatile *) team_reduce()) = w.first ; + + team_rendezvous_release(); + } + } + else if ( 1 < m_team_size ) { + w.first = *((int volatile *) team_reduce()); + } + + // May exit because successfully stole work and w is good. + // May exit because no work left to steal and w = (-1,-1). + +#if 0 +fprintf(stdout,"HostThreadTeamData::get_work_stealing() pool(%d of %d) %d\n" + , m_pool_rank , m_pool_size , w.first ); +fflush(stdout); +#endif + + return w.first ; +} + +} // namespace Impl +} // namespace Kokkos + diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6b5918eaefc2ee74e951b8caabdeb0d4e8c488c0 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -0,0 +1,1090 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_HOSTTHREADTEAM_HPP +#define KOKKOS_IMPL_HOSTTHREADTEAM_HPP + +#include <Kokkos_Core_fwd.hpp> +#include <Kokkos_Pair.hpp> +#include <Kokkos_Atomic.hpp> +#include <Kokkos_ExecPolicy.hpp> +#include <impl/Kokkos_FunctorAdapter.hpp> +#include <impl/Kokkos_Reducer.hpp> +#include <impl/Kokkos_FunctorAnalysis.hpp> + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< class HostExecSpace > +class HostThreadTeamMember ; + +class HostThreadTeamData { +public: + + template< class > friend class HostThreadTeamMember ; + + // Assume upper bounds on number of threads: + // pool size <= 1024 threads + // pool rendezvous <= ( 1024 / 8 ) * 4 + 4 = 2052 + // team size <= 64 threads + // team rendezvous <= ( 64 / 8 ) * 4 + 4 = 36 + + enum : int { max_pool_members = 1024 }; + enum : int { max_team_members = 64 }; + enum : int { max_pool_rendezvous = ( max_pool_members / 8 ) * 4 + 4 }; + enum : int { max_team_rendezvous = ( max_team_members / 8 ) * 4 + 4 }; + +private: + + // per-thread scratch memory buffer chunks: + // + // [ pool_members ] = [ m_pool_members .. m_pool_rendezvous ) + // [ pool_rendezvous ] = [ m_pool_rendezvous .. m_team_rendezvous ) + // [ team_rendezvous ] = [ m_team_rendezvous .. m_pool_reduce ) + // [ pool_reduce ] = [ m_pool_reduce .. m_team_reduce ) + // [ team_reduce ] = [ m_team_reduce .. m_team_shared ) + // [ team_shared ] = [ m_team_shared .. m_thread_local ) + // [ thread_local ] = [ m_thread_local .. m_scratch_size ) + + enum : int { m_pool_members = 0 }; + enum : int { m_pool_rendezvous = m_pool_members + max_pool_members }; + enum : int { m_team_rendezvous = m_pool_rendezvous + max_pool_rendezvous }; + enum : int { m_pool_reduce = m_team_rendezvous + max_team_rendezvous }; + + using pair_int_t = Kokkos::pair<int,int> ; + + pair_int_t m_work_range ; + int64_t m_work_end ; + int64_t * m_scratch ; // per-thread buffer + int64_t * m_pool_scratch ; // == pool[0]->m_scratch + int64_t * m_team_scratch ; // == pool[ 0 + m_team_base ]->m_scratch + int m_pool_rank ; + int m_pool_size ; + int m_team_reduce ; + int m_team_shared ; + int m_thread_local ; + int m_scratch_size ; + int m_team_base ; + int m_team_rank ; + int m_team_size ; + int m_team_alloc ; + int m_league_rank ; + int m_league_size ; + int m_work_chunk ; + int m_steal_rank ; // work stealing rank + int mutable m_pool_rendezvous_step ; + int mutable m_team_rendezvous_step ; + + HostThreadTeamData * team_member( int r ) const noexcept + { return ((HostThreadTeamData**)(m_pool_scratch+m_pool_members))[m_team_base+r]; } + + // Rendezvous pattern: + // if ( rendezvous(root) ) { + // ... only root thread here while all others wait ... + // rendezvous_release(); + // } + // else { + // ... all other threads release here ... + // } + // + // Requires: buffer[ ( max_threads / 8 ) * 4 + 4 ]; 0 == max_threads % 8 + // + static + int rendezvous( int64_t * const buffer + , int & rendezvous_step + , int const size + , int const rank ) noexcept ; + + static + void rendezvous_release( int64_t * const buffer + , int const rendezvous_step ) noexcept ; + +public: + + inline + int team_rendezvous( int const root ) const noexcept + { + return 1 == m_team_size ? 1 : + rendezvous( m_team_scratch + m_team_rendezvous + , m_team_rendezvous_step + , m_team_size + , ( m_team_rank + m_team_size - root ) % m_team_size ); + } + + inline + int team_rendezvous() const noexcept + { + return 1 == m_team_size ? 1 : + rendezvous( m_team_scratch + m_team_rendezvous + , m_team_rendezvous_step + , m_team_size + , m_team_rank ); + } + + inline + void team_rendezvous_release() const noexcept + { + if ( 1 < m_team_size ) { + rendezvous_release( m_team_scratch + m_team_rendezvous + , m_team_rendezvous_step ); + } + } + + inline + int pool_rendezvous() const noexcept + { + return 1 == m_pool_size ? 1 : + rendezvous( m_pool_scratch + m_pool_rendezvous + , m_pool_rendezvous_step + , m_pool_size + , m_pool_rank ); + } + + inline + void pool_rendezvous_release() const noexcept + { + if ( 1 < m_pool_size ) { + rendezvous_release( m_pool_scratch + m_pool_rendezvous + , m_pool_rendezvous_step ); + } + } + + //---------------------------------------- + + constexpr HostThreadTeamData() noexcept + : m_work_range(-1,-1) + , m_work_end(0) + , m_scratch(0) + , m_pool_scratch(0) + , m_team_scratch(0) + , m_pool_rank(0) + , m_pool_size(1) + , m_team_reduce(0) + , m_team_shared(0) + , m_thread_local(0) + , m_scratch_size(0) + , m_team_base(0) + , m_team_rank(0) + , m_team_size(1) + , m_team_alloc(1) + , m_league_rank(0) + , m_league_size(1) + , m_work_chunk(0) + , m_steal_rank(0) + , m_pool_rendezvous_step(0) + , m_team_rendezvous_step(0) + {} + + //---------------------------------------- + // Organize array of members into a pool. + // The 0th member is the root of the pool. + // Requires: members are not already in a pool. + // Requires: called by one thread. + // Pool members are ordered as "close" - sorted by NUMA and then CORE + // Each thread is its own team with team_size == 1. + static void organize_pool( HostThreadTeamData * members[] + , const int size ); + + // Called by each thread within the pool + void disband_pool(); + + //---------------------------------------- + // Each thread within a pool organizes itself into a team. + // Must be called by all threads of the pool. + // Organizing threads into a team performs a barrier across the + // entire pool to insure proper initialization of the team + // rendezvous mechanism before a team rendezvous can be performed. + // + // Return true if a valid member of a team. + // Return false if not a member and thread should be idled. + int organize_team( const int team_size ); + + // Each thread within a pool disbands itself from current team. + // Each thread becomes its own team with team_size == 1. + // Must be called by all threads of the pool. + void disband_team(); + + //---------------------------------------- + + constexpr int pool_rank() const { return m_pool_rank ; } + constexpr int pool_size() const { return m_pool_size ; } + + HostThreadTeamData * pool_member( int r ) const noexcept + { return ((HostThreadTeamData**)(m_pool_scratch+m_pool_members))[r]; } + + //---------------------------------------- + +private: + + enum : int { mask_to_16 = 0x0f }; // align to 16 bytes + enum : int { shift_to_8 = 3 }; // size to 8 bytes + +public: + + static constexpr int align_to_int64( int n ) + { return ( ( n + mask_to_16 ) & ~mask_to_16 ) >> shift_to_8 ; } + + constexpr int pool_reduce_bytes() const + { return m_scratch_size ? sizeof(int64_t) * ( m_team_reduce - m_pool_reduce ) : 0 ; } + + constexpr int team_reduce_bytes() const + { return sizeof(int64_t) * ( m_team_shared - m_team_reduce ); } + + constexpr int team_shared_bytes() const + { return sizeof(int64_t) * ( m_thread_local - m_team_shared ); } + + constexpr int thread_local_bytes() const + { return sizeof(int64_t) * ( m_scratch_size - m_thread_local ); } + + constexpr int scratch_bytes() const + { return sizeof(int64_t) * m_scratch_size ; } + + // Memory chunks: + + int64_t * scratch_buffer() const noexcept + { return m_scratch ; } + + int64_t * pool_reduce() const noexcept + { return m_pool_scratch + m_pool_reduce ; } + + int64_t * pool_reduce_local() const noexcept + { return m_scratch + m_pool_reduce ; } + + int64_t * team_reduce() const noexcept + { return m_team_scratch + m_team_reduce ; } + + int64_t * team_reduce_local() const noexcept + { return m_scratch + m_team_reduce ; } + + int64_t * team_shared() const noexcept + { return m_team_scratch + m_team_shared ; } + + int64_t * local_scratch() const noexcept + { return m_scratch + m_thread_local ; } + + // Given: + // pool_reduce_size = number bytes for pool reduce + // team_reduce_size = number bytes for team reduce + // team_shared_size = number bytes for team shared memory + // thread_local_size = number bytes for thread local memory + // Return: + // total number of bytes that must be allocated + static + size_t scratch_size( int pool_reduce_size + , int team_reduce_size + , int team_shared_size + , int thread_local_size ) + { + pool_reduce_size = align_to_int64( pool_reduce_size ); + team_reduce_size = align_to_int64( team_reduce_size ); + team_shared_size = align_to_int64( team_shared_size ); + thread_local_size = align_to_int64( thread_local_size ); + + const size_t total_bytes = ( + m_pool_reduce + + pool_reduce_size + + team_reduce_size + + team_shared_size + + thread_local_size ) * sizeof(int64_t); + + return total_bytes ; + } + + // Given: + // alloc_ptr = pointer to allocated memory + // alloc_size = number bytes of allocated memory + // pool_reduce_size = number bytes for pool reduce/scan operations + // team_reduce_size = number bytes for team reduce/scan operations + // team_shared_size = number bytes for team-shared memory + // thread_local_size = number bytes for thread-local memory + // Return: + // total number of bytes that must be allocated + void scratch_assign( void * const alloc_ptr + , size_t const alloc_size + , int pool_reduce_size + , int team_reduce_size + , int team_shared_size + , int /* thread_local_size */ ) + { + pool_reduce_size = align_to_int64( pool_reduce_size ); + team_reduce_size = align_to_int64( team_reduce_size ); + team_shared_size = align_to_int64( team_shared_size ); + // thread_local_size = align_to_int64( thread_local_size ); + + m_scratch = (int64_t *) alloc_ptr ; + m_team_reduce = m_pool_reduce + pool_reduce_size ; + m_team_shared = m_team_reduce + team_reduce_size ; + m_thread_local = m_team_shared + team_shared_size ; + m_scratch_size = align_to_int64( alloc_size ); + +#if 0 +fprintf(stdout,"HostThreadTeamData::scratch_assign { %d %d %d %d %d %d %d }\n" + , int(m_pool_members) + , int(m_pool_rendezvous) + , int(m_pool_reduce) + , int(m_team_reduce) + , int(m_team_shared) + , int(m_thread_local) + , int(m_scratch_size) + ); +fflush(stdout); +#endif + + } + + //---------------------------------------- + // Get a work index within the range. + // First try to steal from beginning of own teams's partition. + // If that fails then try to steal from end of another teams' partition. + int get_work_stealing() noexcept ; + + //---------------------------------------- + // Set the initial work partitioning of [ 0 .. length ) among the teams + // with granularity of chunk + + void set_work_partition( int64_t const length + , int const chunk ) noexcept + { + // Minimum chunk size to insure that + // m_work_end < std::numeric_limits<int>::max() * m_work_chunk + + int const chunk_min = ( length + std::numeric_limits<int>::max() ) + / std::numeric_limits<int>::max(); + + m_work_end = length ; + m_work_chunk = std::max( chunk , chunk_min ); + + // Number of work chunks and partitioning of that number: + int const num = ( m_work_end + m_work_chunk - 1 ) / m_work_chunk ; + int const part = ( num + m_league_size - 1 ) / m_league_size ; + + m_work_range.first = part * m_league_rank ; + m_work_range.second = m_work_range.first + part ; + + // Steal from next team, round robin + // The next team is offset by m_team_alloc if it fits in the pool. + + m_steal_rank = m_team_base + m_team_alloc + m_team_size <= m_pool_size ? + m_team_base + m_team_alloc : 0 ; + } + + std::pair<int64_t,int64_t> get_work_partition() noexcept + { + return std::pair<int64_t,int64_t> + ( m_work_range.first * m_work_chunk + , m_work_range.second * m_work_chunk < m_work_end + ? m_work_range.second * m_work_chunk : m_work_end ); + } + + std::pair<int64_t,int64_t> get_work_stealing_chunk() noexcept + { + std::pair<int64_t,int64_t> x(-1,-1); + + const int i = get_work_stealing(); + + if ( 0 <= i ) { + x.first = m_work_chunk * i ; + x.second = x.first + m_work_chunk < m_work_end + ? x.first + m_work_chunk : m_work_end ; + } + + return x ; + } +}; + +//---------------------------------------------------------------------------- + +template< class HostExecSpace > +class HostThreadTeamMember { +public: + + using scratch_memory_space = typename HostExecSpace::scratch_memory_space ; + +private: + + scratch_memory_space m_scratch ; + HostThreadTeamData & m_data ; + int const m_league_rank ; + int const m_league_size ; + +public: + + constexpr HostThreadTeamMember( HostThreadTeamData & arg_data ) noexcept + : m_scratch( arg_data.team_shared() , arg_data.team_shared_bytes() ) + , m_data( arg_data ) + , m_league_rank(0) + , m_league_size(1) + {} + + constexpr HostThreadTeamMember( HostThreadTeamData & arg_data + , int const arg_league_rank + , int const arg_league_size + ) noexcept + : m_scratch( arg_data.team_shared() + , arg_data.team_shared_bytes() + , arg_data.team_shared() + , arg_data.team_shared_bytes() ) + , m_data( arg_data ) + , m_league_rank( arg_league_rank ) + , m_league_size( arg_league_size ) + {} + + ~HostThreadTeamMember() = default ; + HostThreadTeamMember() = delete ; + HostThreadTeamMember( HostThreadTeamMember && ) = default ; + HostThreadTeamMember( HostThreadTeamMember const & ) = default ; + HostThreadTeamMember & operator = ( HostThreadTeamMember && ) = default ; + HostThreadTeamMember & operator = ( HostThreadTeamMember const & ) = default ; + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + int team_rank() const noexcept { return m_data.m_team_rank ; } + + KOKKOS_INLINE_FUNCTION + int team_size() const noexcept { return m_data.m_team_size ; } + + KOKKOS_INLINE_FUNCTION + int league_rank() const noexcept { return m_league_rank ; } + + KOKKOS_INLINE_FUNCTION + int league_size() const noexcept { return m_league_size ; } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & team_shmem() const + { return m_scratch.set_team_thread_mode(0,1,0); } + + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & team_scratch(int) const + { return m_scratch.set_team_thread_mode(0,1,0); } + + KOKKOS_INLINE_FUNCTION + const scratch_memory_space & thread_scratch(int) const + { return m_scratch.set_team_thread_mode(0,m_data.m_team_size,m_data.m_team_rank); } + + //---------------------------------------- + // Team collectives + + KOKKOS_INLINE_FUNCTION void team_barrier() const noexcept +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { + if ( m_data.team_rendezvous() ) m_data.team_rendezvous_release(); + } +#else + {} +#endif + + template< class Closure > + KOKKOS_INLINE_FUNCTION + void team_barrier( Closure const & f ) const noexcept + { + if ( m_data.team_rendezvous() ) { + + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + + f(); + + m_data.team_rendezvous_release(); + } + } + + //-------------------------------------------------------------------------- + + template< typename T > + KOKKOS_INLINE_FUNCTION + void team_broadcast( T & value , const int source_team_rank ) const noexcept +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { + if ( 1 < m_data.m_team_size ) { + T volatile * const shared_value = (T*) m_data.team_reduce(); + + // Don't overwrite shared memory until all threads arrive + + if ( m_data.team_rendezvous( source_team_rank ) ) { + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + + *shared_value = value ; + + m_data.team_rendezvous_release(); + // This thread released all other threads from 'team_rendezvous' + // with a return value of 'false' + } + else { + value = *shared_value ; + } + } + } +#else + { Kokkos::abort("HostThreadTeamMember team_broadcast\n"); } +#endif + + //-------------------------------------------------------------------------- + + template< class Closure , typename T > + KOKKOS_INLINE_FUNCTION + void team_broadcast( Closure const & f , T & value , const int source_team_rank) const noexcept + { + T volatile * const shared_value = (T*) m_data.team_reduce(); + + // Don't overwrite shared memory until all threads arrive + + if ( m_data.team_rendezvous(source_team_rank) ) { + + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + + f( value ); + + if ( 1 < m_data.m_team_size ) { *shared_value = value ; } + + m_data.team_rendezvous_release(); + // This thread released all other threads from 'team_rendezvous' + // with a return value of 'false' + } + else { + value = *shared_value ; + } + } + + //-------------------------------------------------------------------------- + // team_reduce( Sum(result) ); + // team_reduce( Min(result) ); + // team_reduce( Max(result) ); + + template< typename ReducerType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< is_reducer< ReducerType >::value >::type + team_reduce( ReducerType const & reducer ) const noexcept +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { + if ( 1 < m_data.m_team_size ) { + + using value_type = typename ReducerType::value_type ; + + if ( 0 != m_data.m_team_rank ) { + // Non-root copies to their local buffer: + reducer.copy( (value_type*) m_data.team_reduce_local() + , reducer.data() ); + } + + // Root does not overwrite shared memory until all threads arrive + // and copy to their local buffer. + + if ( m_data.team_rendezvous() ) { + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + // + // This thread sums contributed values + for ( int i = 1 ; i < m_data.m_team_size ; ++i ) { + value_type * const src = + (value_type*) m_data.team_member(i)->team_reduce_local(); + + reducer.join( reducer.data() , src ); + } + + // Copy result to root member's buffer: + reducer.copy( (value_type*) m_data.team_reduce() , reducer.data() ); + + m_data.team_rendezvous_release(); + // This thread released all other threads from 'team_rendezvous' + // with a return value of 'false' + } + else { + // Copy from root member's buffer: + reducer.copy( reducer.data() , (value_type*) m_data.team_reduce() ); + } + } + } +#else + { Kokkos::abort("HostThreadTeamMember team_reduce\n"); } +#endif + + //-------------------------------------------------------------------------- + + template< typename ValueType , class JoinOp > + KOKKOS_INLINE_FUNCTION + ValueType + team_reduce( ValueType const & value + , JoinOp const & join ) const noexcept +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { + if ( 0 != m_data.m_team_rank ) { + // Non-root copies to their local buffer: + *((ValueType*) m_data.team_reduce_local()) = value ; + } + + // Root does not overwrite shared memory until all threads arrive + // and copy to their local buffer. + + if ( m_data.team_rendezvous() ) { + const Impl::Reducer< ValueType , JoinOp > reducer( join ); + + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + // + // This thread sums contributed values + + ValueType * const dst = (ValueType*) m_data.team_reduce_local(); + + *dst = value ; + + for ( int i = 1 ; i < m_data.m_team_size ; ++i ) { + ValueType * const src = + (ValueType*) m_data.team_member(i)->team_reduce_local(); + + reducer.join( dst , src ); + } + + m_data.team_rendezvous_release(); + // This thread released all other threads from 'team_rendezvous' + // with a return value of 'false' + } + + return *((ValueType*) m_data.team_reduce()); + } +#else + { Kokkos::abort("HostThreadTeamMember team_reduce\n"); return ValueType(); } +#endif + + + template< typename T > + KOKKOS_INLINE_FUNCTION + T team_scan( T const & value , T * const global = 0 ) const noexcept +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + { + if ( 0 != m_data.m_team_rank ) { + // Non-root copies to their local buffer: + ((T*) m_data.team_reduce_local())[1] = value ; + } + + // Root does not overwrite shared memory until all threads arrive + // and copy to their local buffer. + + if ( m_data.team_rendezvous() ) { + // All threads have entered 'team_rendezvous' + // only this thread returned from 'team_rendezvous' + // with a return value of 'true' + // + // This thread scans contributed values + + { + T * prev = (T*) m_data.team_reduce_local(); + + prev[0] = 0 ; + prev[1] = value ; + + for ( int i = 1 ; i < m_data.m_team_size ; ++i ) { + T * const ptr = (T*) m_data.team_member(i)->team_reduce_local(); + + ptr[0] = prev[0] + prev[1] ; + + prev = ptr ; + } + } + + // If adding to global value then atomic_fetch_add to that value + // and sum previous value to every entry of the scan. + if ( global ) { + T * prev = (T*) m_data.team_reduce_local(); + + { + T * ptr = (T*) m_data.team_member( m_data.m_team_size - 1 )->team_reduce_local(); + prev[0] = Kokkos::atomic_fetch_add( global , ptr[0] + ptr[1] ); + } + + for ( int i = 1 ; i < m_data.m_team_size ; ++i ) { + T * ptr = (T*) m_data.team_member(i)->team_reduce_local(); + ptr[0] += prev[0] ; + } + } + + m_data.team_rendezvous_release(); + } + + return ((T*) m_data.team_reduce_local())[0]; + } +#else + { Kokkos::abort("HostThreadTeamMember team_scan\n"); return T(); } +#endif + +}; + + +}} /* namespace Kokkos::Impl */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template<class Space,typename iType> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > +TeamThreadRange( Impl::HostThreadTeamMember<Space> const & member + , iType const & count ) +{ + return + Impl::TeamThreadRangeBoundariesStruct + <iType,Impl::HostThreadTeamMember<Space> >(member,0,count); +} + +template<class Space, typename iType1, typename iType2> +KOKKOS_INLINE_FUNCTION +Impl::TeamThreadRangeBoundariesStruct + < typename std::common_type< iType1, iType2 >::type + , Impl::HostThreadTeamMember<Space> > +TeamThreadRange( Impl::HostThreadTeamMember<Space> const & member + , iType1 const & begin , iType2 const & end ) +{ + return + Impl::TeamThreadRangeBoundariesStruct + < typename std::common_type< iType1, iType2 >::type + , Impl::HostThreadTeamMember<Space> >( member , begin , end ); +} + +template<class Space, typename iType> +KOKKOS_INLINE_FUNCTION +Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > +ThreadVectorRange + ( Impl::HostThreadTeamMember<Space> const & member + , const iType & count ) +{ + return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >(member,count); +} + +//---------------------------------------------------------------------------- +/** \brief Inter-thread parallel_for. + * + * Executes lambda(iType i) for each i=[0..N) + * + * The range [0..N) is mapped to all threads of the the calling thread team. +*/ +template<typename iType, class Space, class Closure> +KOKKOS_INLINE_FUNCTION +void parallel_for + ( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries + , Closure const & closure + ) +{ + for( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure (i); + } +} + +template<typename iType, class Space, class Closure> +KOKKOS_INLINE_FUNCTION +void parallel_for + ( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries + , Closure const & closure + ) +{ + #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP + #pragma ivdep + #endif + for( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure (i); + } +} + +//---------------------------------------------------------------------------- + +template< typename iType, class Space, class Closure, class Reducer > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< Kokkos::is_reducer< Reducer >::value >::type +parallel_reduce + ( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > + const & loop_boundaries + , Closure const & closure + , Reducer const & reducer + ) +{ + reducer.init( reducer.data() ); + + for( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure( i , reducer.reference() ); + } + + loop_boundaries.thread.team_reduce( reducer ); +} + +template< typename iType, class Space, typename Closure, typename ValueType > +KOKKOS_INLINE_FUNCTION +typename std::enable_if< ! Kokkos::is_reducer<ValueType>::value >::type +parallel_reduce + ( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > + const & loop_boundaries + , Closure const & closure + , ValueType & result + ) +{ + Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > > reducer( & result ); + + reducer.init( reducer.data() ); + + for( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure( i , reducer.reference() ); + } + + loop_boundaries.thread.team_reduce( reducer ); +} + +template< typename iType, class Space + , class Closure, class Joiner , typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + ( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > + const & loop_boundaries + , Closure const & closure + , Joiner const & joiner + , ValueType & result + ) +{ + Impl::Reducer< ValueType , Joiner > reducer( joiner , & result ); + + reducer.init( reducer.data() ); + + for( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure( i , reducer.reference() ); + } + + loop_boundaries.thread.team_reduce( reducer ); +} + +//---------------------------------------------------------------------------- +/** \brief Inter-thread vector parallel_reduce. + * + * Executes lambda(iType i, ValueType & val) for each i=[0..N) + * + * The range [0..N) is mapped to all threads of the + * calling thread team and a summation of val is + * performed and put into result. + */ +template< typename iType, class Space , class Lambda, typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >& loop_boundaries, + const Lambda & lambda, + ValueType& result) +{ + result = ValueType(); +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start ; + i < loop_boundaries.end ; + i += loop_boundaries.increment) { + lambda(i,result); + } +} + +/** \brief Intra-thread vector parallel_reduce. + * + * Executes lambda(iType i, ValueType & val) for each i=[0..N) + * + * The range [0..N) is mapped to all vector lanes of the the + * calling thread and a reduction of val is performed using + * JoinType(ValueType& val, const ValueType& update) + * and put into init_result. + * The input value of init_result is used as initializer for + * temporary variables of ValueType. Therefore * the input + * value should be the neutral element with respect to the + * join operation (e.g. '0 for +-' or * '1 for *'). + */ +template< typename iType, class Space + , class Lambda, class JoinType , typename ValueType > +KOKKOS_INLINE_FUNCTION +void parallel_reduce + (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> >& loop_boundaries, + const Lambda & lambda, + const JoinType & join, + ValueType& result) +{ +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for( iType i = loop_boundaries.start ; + i < loop_boundaries.end ; + i += loop_boundaries.increment ) { + lambda(i,result); + } +} + +//---------------------------------------------------------------------------- + +template< typename iType, class Space, class Closure > +KOKKOS_INLINE_FUNCTION +void parallel_scan + ( Impl::TeamThreadRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries + , Closure const & closure + ) +{ + // Extract ValueType from the closure + + using value_type = + typename Kokkos::Impl::FunctorAnalysis + < Kokkos::Impl::FunctorPatternInterface::SCAN + , void + , Closure >::value_type ; + + value_type accum = 0 ; + + // Intra-member scan + for ( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure(i,accum,false); + } + + // 'accum' output is the exclusive prefix sum + accum = loop_boundaries.thread.team_scan(accum); + + for ( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure(i,accum,true); + } +} + + +template< typename iType, class Space, class ClosureType > +KOKKOS_INLINE_FUNCTION +void parallel_scan + ( Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::HostThreadTeamMember<Space> > const & loop_boundaries + , ClosureType const & closure + ) +{ + using value_type = typename + Kokkos::Impl::FunctorAnalysis + < Impl::FunctorPatternInterface::SCAN + , void + , ClosureType >::value_type ; + + value_type scan_val = value_type(); + +#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP +#pragma ivdep +#endif + for ( iType i = loop_boundaries.start + ; i < loop_boundaries.end + ; i += loop_boundaries.increment ) { + closure(i,scan_val,true); + } +} + +//---------------------------------------------------------------------------- + +template< class Space > +KOKKOS_INLINE_FUNCTION +Impl::ThreadSingleStruct<Impl::HostThreadTeamMember<Space> > +PerTeam(const Impl::HostThreadTeamMember<Space> & member ) +{ + return Impl::ThreadSingleStruct<Impl::HostThreadTeamMember<Space> >(member); +} + +template< class Space > +KOKKOS_INLINE_FUNCTION +Impl::VectorSingleStruct<Impl::HostThreadTeamMember<Space> > +PerThread(const Impl::HostThreadTeamMember<Space> & member) +{ + return Impl::VectorSingleStruct<Impl::HostThreadTeamMember<Space> >(member); +} + +template< class Space , class FunctorType > +KOKKOS_INLINE_FUNCTION +void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember<Space> > & single , const FunctorType & functor ) +{ + if ( single.team_member.team_rank() == 0 ) functor(); + // 'single' does not perform a barrier. + // single.team_member.team_barrier( functor ); +} + +template< class Space , class FunctorType , typename ValueType > +KOKKOS_INLINE_FUNCTION +void single( const Impl::ThreadSingleStruct< Impl::HostThreadTeamMember<Space> > & single , const FunctorType & functor , ValueType & val ) +{ + single.team_member.team_broadcast( functor , val , 0 ); +} + +template< class Space , class FunctorType > +KOKKOS_INLINE_FUNCTION +void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember<Space> > & , const FunctorType & functor ) +{ + functor(); +} + +template< class Space , class FunctorType , typename ValueType > +KOKKOS_INLINE_FUNCTION +void single( const Impl::VectorSingleStruct< Impl::HostThreadTeamMember<Space> > & , const FunctorType & functor , ValueType & val ) +{ + functor(val); +} + +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +#endif /* #ifndef KOKKOS_IMPL_HOSTTHREADTEAM_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp index 84cf536bb7adf86be20459f36f64f4ced027188e..7489018ac641b70e97b6eba879d4c08aa0776fb9 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp @@ -52,6 +52,10 @@ void memory_fence() { #if defined( __CUDA_ARCH__ ) __threadfence(); +#elif defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) + asm volatile ( + "mfence" ::: "memory" + ); #elif defined( KOKKOS_ENABLE_GNU_ATOMICS ) || \ ( defined( KOKKOS_COMPILER_NVCC ) && defined( KOKKOS_ENABLE_INTEL_ATOMICS ) ) __sync_synchronize(); @@ -76,8 +80,8 @@ void store_fence() { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) asm volatile ( - "sfence" ::: "memory" - ); + "sfence" ::: "memory" + ); #else memory_fence(); #endif @@ -93,8 +97,8 @@ void load_fence() { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) asm volatile ( - "lfence" ::: "memory" - ); + "lfence" ::: "memory" + ); #else memory_fence(); #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp b/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp index da95c943fe96acbeda0a8d44525f9f9fd2d65076..5852efb011f357ace9df66c5d330f9e2a3f39dd1 100644 --- a/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp @@ -129,8 +129,8 @@ #endif #ifdef KOKKOS_HAVE_CUDA_RDC -#ifndef KOKKOS_ENABLE_CUDA_RDC -#define KOKKOS_ENABLE_CUDA_RDC KOKKOS_HAVE_CUDA_RDC +#ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE +#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE KOKKOS_HAVE_CUDA_RDC #endif #endif @@ -242,9 +242,9 @@ #endif #endif -#ifdef KOKKOS_HAVE_QTHREAD -#ifndef KOKKOS_ENABLE_QTHREAD -#define KOKKOS_ENABLE_QTHREAD KOKKOS_HAVE_QTHREAD +#ifdef KOKKOS_HAVE_QTHREADS +#ifndef KOKKOS_ENABLE_QTHREADS +#define KOKKOS_ENABLE_QTHREADS KOKKOS_HAVE_QTHREADS #endif #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp index 99c5df4db31001b42f56337938f5a7ea73941157..0c006a8c008390e330f35d849f9b93facfeb1879 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp @@ -43,7 +43,7 @@ #include <impl/Kokkos_Profiling_Interface.hpp> -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include <string.h> namespace Kokkos { @@ -84,21 +84,21 @@ namespace Kokkos { (*endScanCallee)(kernelID); } } - + void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { if(NULL != beginReduceCallee) { Kokkos::fence(); (*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID); } } - + void endParallelReduce(const uint64_t kernelID) { if(NULL != endReduceCallee) { Kokkos::fence(); (*endReduceCallee)(kernelID); } } - + void pushRegion(const std::string& kName) { if( NULL != pushRegionCallee ) { diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp index 3d6a3892524ee3234a33f14cf7727cac5512e455..139a20d8f9ea99b88d21436726fa9c55fe063622 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -50,7 +50,7 @@ #include <string> #include <cinttypes> -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) #include <impl/Kokkos_Profiling_DeviceInfo.hpp> #include <dlfcn.h> #include <iostream> @@ -59,7 +59,7 @@ #define KOKKOSP_INTERFACE_VERSION 20150628 -#if (KOKKOS_ENABLE_PROFILING) +#if defined(KOKKOS_ENABLE_PROFILING) namespace Kokkos { namespace Profiling { diff --git a/lib/kokkos/core/src/impl/Kokkos_Reducer.hpp b/lib/kokkos/core/src/impl/Kokkos_Reducer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b3ed5f151439c659305773f1cd997376300ccf3e --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Reducer.hpp @@ -0,0 +1,317 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_REDUCER_HPP +#define KOKKOS_IMPL_REDUCER_HPP + +#include <impl/Kokkos_Traits.hpp> + +//---------------------------------------------------------------------------- +/* Reducer abstraction: + * 1) Provides 'join' operation + * 2) Provides 'init' operation + * 3) Provides 'copy' operation + * 4) Optionally provides result value in a memory space + * + * Created from: + * 1) Functor::operator()( destination , source ) + * 2) Functor::{ join , init ) + */ +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template< typename value_type > +struct ReduceSum +{ + KOKKOS_INLINE_FUNCTION static + void copy( value_type & dest + , value_type const & src ) noexcept + { dest = src ; } + + KOKKOS_INLINE_FUNCTION static + void init( value_type & dest ) noexcept + { new( &dest ) value_type(); } + + KOKKOS_INLINE_FUNCTION static + void join( value_type volatile & dest + , value_type const volatile & src ) noexcept + { dest += src ; } + + KOKKOS_INLINE_FUNCTION static + void join( value_type & dest + , value_type const & src ) noexcept + { dest += src ; } +}; + +template< typename T + , class ReduceOp = ReduceSum< T > + , typename MemorySpace = void > +struct Reducer + : private ReduceOp + , private integral_nonzero_constant + < int , ( std::rank<T>::value == 1 ? std::extent<T>::value : 1 )> +{ +private: + + // Determine if T is simple array + + enum : int { rank = std::rank<T>::value }; + + static_assert( rank <= 1 , "Kokkos::Impl::Reducer type is at most rank-one" ); + + using length_t = + integral_nonzero_constant<int,( rank == 1 ? std::extent<T>::value : 1 )> ; + +public: + + using reducer = Reducer ; + using memory_space = MemorySpace ; + using value_type = typename std::remove_extent<T>::type ; + using reference_type = + typename std::conditional< ( rank != 0 ) + , value_type * + , value_type & + >::type ; +private: + + //-------------------------------------------------------------------------- + // Determine what functions 'ReduceOp' provides: + // copy( destination , source ) + // init( destination ) + // + // operator()( destination , source ) + // join( destination , source ) + // + // Provide defaults for missing optional operations + + template< class R , typename = void> + struct COPY { + KOKKOS_INLINE_FUNCTION static + void copy( R const & + , value_type * dst + , value_type const * src ) { *dst = *src ; } + }; + + template< class R > + struct COPY< R , decltype( ((R*)0)->copy( *((value_type*)0) + , *((value_type const *)0) ) ) > + { + KOKKOS_INLINE_FUNCTION static + void copy( R const & r + , value_type * dst + , value_type const * src ) { r.copy( *dst , *src ); } + }; + + template< class R , typename = void > + struct INIT { + KOKKOS_INLINE_FUNCTION static + void init( R const & , value_type * dst ) { new(dst) value_type(); } + }; + + template< class R > + struct INIT< R , decltype( ((R*)0)->init( *((value_type*)0 ) ) ) > + { + KOKKOS_INLINE_FUNCTION static + void init( R const & r , value_type * dst ) { r.init( *dst ); } + }; + + template< class R , typename V , typename = void > struct JOIN + { + // If no join function then try operator() + KOKKOS_INLINE_FUNCTION static + void join( R const & r , V * dst , V const * src ) + { r.operator()(*dst,*src); } + }; + + template< class R , typename V > + struct JOIN< R , V , decltype( ((R*)0)->join ( *((V *)0) , *((V const *)0) ) ) > + { + // If has join function use it + KOKKOS_INLINE_FUNCTION static + void join( R const & r , V * dst , V const * src ) + { r.join(*dst,*src); } + }; + + //-------------------------------------------------------------------------- + + value_type * const m_result ; + + template< int Rank > + KOKKOS_INLINE_FUNCTION + static constexpr + typename std::enable_if< ( 0 != Rank ) , reference_type >::type + ref( value_type * p ) noexcept { return p ; } + + template< int Rank > + KOKKOS_INLINE_FUNCTION + static constexpr + typename std::enable_if< ( 0 == Rank ) , reference_type >::type + ref( value_type * p ) noexcept { return *p ; } + +public: + + //-------------------------------------------------------------------------- + + KOKKOS_INLINE_FUNCTION + constexpr int length() const noexcept + { return length_t::value ; } + + KOKKOS_INLINE_FUNCTION + value_type * data() const noexcept + { return m_result ; } + + KOKKOS_INLINE_FUNCTION + reference_type reference() const noexcept + { return Reducer::template ref< rank >( m_result ); } + + //-------------------------------------------------------------------------- + + KOKKOS_INLINE_FUNCTION + void copy( value_type * const dest + , value_type const * const src ) const noexcept + { + for ( int i = 0 ; i < length() ; ++i ) { + Reducer::template COPY<ReduceOp>::copy( (ReduceOp &) *this , dest + i , src + i ); + } + } + + KOKKOS_INLINE_FUNCTION + void init( value_type * dest ) const noexcept + { + for ( int i = 0 ; i < length() ; ++i ) { + Reducer::template INIT<ReduceOp>::init( (ReduceOp &) *this , dest + i ); + } + } + + KOKKOS_INLINE_FUNCTION + void join( value_type * const dest + , value_type const * const src ) const noexcept + { + for ( int i = 0 ; i < length() ; ++i ) { + Reducer::template JOIN<ReduceOp,value_type>::join( (ReduceOp &) *this , dest + i , src + i ); + } + } + + KOKKOS_INLINE_FUNCTION + void join( value_type volatile * const dest + , value_type volatile const * const src ) const noexcept + { + for ( int i = 0 ; i < length() ; ++i ) { + Reducer::template JOIN<ReduceOp,value_type volatile>::join( (ReduceOp &) *this , dest + i , src + i ); + } + } + + //-------------------------------------------------------------------------- + + template< typename ArgT > + KOKKOS_INLINE_FUNCTION explicit + constexpr Reducer + ( ArgT * arg_value + , typename std::enable_if + < std::is_same<ArgT,value_type>::value && + std::is_default_constructible< ReduceOp >::value + , int >::type arg_length = 1 + ) noexcept + : ReduceOp(), length_t( arg_length ), m_result( arg_value ) {} + + KOKKOS_INLINE_FUNCTION explicit + constexpr Reducer( ReduceOp const & arg_op + , value_type * arg_value = 0 + , int arg_length = 1 ) noexcept + : ReduceOp( arg_op ), length_t( arg_length ), m_result( arg_value ) {} + + KOKKOS_INLINE_FUNCTION explicit + constexpr Reducer( ReduceOp && arg_op + , value_type * arg_value = 0 + , int arg_length = 1 ) noexcept + : ReduceOp( arg_op ), length_t( arg_length ), m_result( arg_value ) {} + + Reducer( Reducer const & ) = default ; + Reducer( Reducer && ) = default ; + Reducer & operator = ( Reducer const & ) = default ; + Reducer & operator = ( Reducer && ) = default ; +}; + +} // namespace Impl +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +template< typename ValueType > +constexpr +Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > > +Sum( ValueType & arg_value ) +{ + static_assert( std::is_trivial<ValueType>::value + , "Kokkos reducer requires trivial value type" ); + return Impl::Reducer< ValueType , Impl::ReduceSum< ValueType > >( & arg_value ); +} + +template< typename ValueType > +constexpr +Impl::Reducer< ValueType[] , Impl::ReduceSum< ValueType > > +Sum( ValueType * arg_value , int arg_length ) +{ + static_assert( std::is_trivial<ValueType>::value + , "Kokkos reducer requires trivial value type" ); + return Impl::Reducer< ValueType[] , Impl::ReduceSum< ValueType > >( arg_value , arg_length ); +} + +//---------------------------------------------------------------------------- + +template< typename ValueType , class JoinType > +Impl::Reducer< ValueType , JoinType > +reducer( ValueType & value , JoinType const & lambda ) +{ + return Impl::Reducer< ValueType , JoinType >( lambda , & value ); +} + +} // namespace Kokkos + +#endif /* #ifndef KOKKOS_IMPL_REDUCER_HPP */ + diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp index 76161c10f1a8b4ed493772a59e086362b9e2723c..79496133061145aee8786aecb21aa86117b1dbc4 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -53,63 +53,126 @@ namespace Kokkos { namespace Impl { -namespace SerialImpl { +namespace { -Sentinel::Sentinel() : m_scratch(0), m_reduce_end(0), m_shared_end(0) {} +HostThreadTeamData g_serial_thread_team_data ; -Sentinel::~Sentinel() -{ - if ( m_scratch ) { free( m_scratch ); } - m_scratch = 0 ; - m_reduce_end = 0 ; - m_shared_end = 0 ; } -Sentinel & Sentinel::singleton() +// Resize thread team data scratch memory +void serial_resize_thread_team_data( size_t pool_reduce_bytes + , size_t team_reduce_bytes + , size_t team_shared_bytes + , size_t thread_local_bytes ) { - static Sentinel s ; return s ; + if ( pool_reduce_bytes < 512 ) pool_reduce_bytes = 512 ; + if ( team_reduce_bytes < 512 ) team_reduce_bytes = 512 ; + + const size_t old_pool_reduce = g_serial_thread_team_data.pool_reduce_bytes(); + const size_t old_team_reduce = g_serial_thread_team_data.team_reduce_bytes(); + const size_t old_team_shared = g_serial_thread_team_data.team_shared_bytes(); + const size_t old_thread_local = g_serial_thread_team_data.thread_local_bytes(); + const size_t old_alloc_bytes = g_serial_thread_team_data.scratch_bytes(); + + // Allocate if any of the old allocation is tool small: + + const bool allocate = ( old_pool_reduce < pool_reduce_bytes ) || + ( old_team_reduce < team_reduce_bytes ) || + ( old_team_shared < team_shared_bytes ) || + ( old_thread_local < thread_local_bytes ); + + if ( allocate ) { + + Kokkos::HostSpace space ; + + if ( old_alloc_bytes ) { + g_serial_thread_team_data.disband_team(); + g_serial_thread_team_data.disband_pool(); + + space.deallocate( g_serial_thread_team_data.scratch_buffer() + , g_serial_thread_team_data.scratch_bytes() ); + } + + if ( pool_reduce_bytes < old_pool_reduce ) { pool_reduce_bytes = old_pool_reduce ; } + if ( team_reduce_bytes < old_team_reduce ) { team_reduce_bytes = old_team_reduce ; } + if ( team_shared_bytes < old_team_shared ) { team_shared_bytes = old_team_shared ; } + if ( thread_local_bytes < old_thread_local ) { thread_local_bytes = old_thread_local ; } + + const size_t alloc_bytes = + HostThreadTeamData::scratch_size( pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes ); + + void * const ptr = space.allocate( alloc_bytes ); + + g_serial_thread_team_data. + scratch_assign( ((char *)ptr) + , alloc_bytes + , pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes ); + + HostThreadTeamData * pool[1] = { & g_serial_thread_team_data }; + + g_serial_thread_team_data.organize_pool( pool , 1 ); + g_serial_thread_team_data.organize_team(1); + } } -inline -unsigned align( unsigned n ) +// Get thread team data structure for omp_get_thread_num() +HostThreadTeamData * serial_get_thread_team_data() { - enum { ALIGN = 0x0100 /* 256 */ , MASK = ALIGN - 1 }; - return ( n + MASK ) & ~MASK ; + return & g_serial_thread_team_data ; } -} // namespace +} // namespace Impl +} // namespace Kokkos -SerialTeamMember::SerialTeamMember( int arg_league_rank - , int arg_league_size - , int arg_shared_size - ) - : m_space( ((char *) SerialImpl::Sentinel::singleton().m_scratch) + SerialImpl::Sentinel::singleton().m_reduce_end - , arg_shared_size ) - , m_league_rank( arg_league_rank ) - , m_league_size( arg_league_size ) -{} +/*--------------------------------------------------------------------------*/ -} // namespace Impl +namespace Kokkos { -void * Serial::scratch_memory_resize( unsigned reduce_size , unsigned shared_size ) +int Serial::is_initialized() { - static Impl::SerialImpl::Sentinel & s = Impl::SerialImpl::Sentinel::singleton(); + return 1 ; +} - reduce_size = Impl::SerialImpl::align( reduce_size ); - shared_size = Impl::SerialImpl::align( shared_size ); +void Serial::initialize( unsigned threads_count + , unsigned use_numa_count + , unsigned use_cores_per_numa + , bool allow_asynchronous_threadpool ) +{ + (void) threads_count; + (void) use_numa_count; + (void) use_cores_per_numa; + (void) allow_asynchronous_threadpool; + + // Init the array of locks used for arbitrarily sized atomics + Impl::init_lock_array_host_space(); + #if defined(KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::initialize(); + #endif +} - if ( ( s.m_reduce_end < reduce_size ) || - ( s.m_shared_end < s.m_reduce_end + shared_size ) ) { +void Serial::finalize() +{ + if ( Impl::g_serial_thread_team_data.scratch_buffer() ) { + Impl::g_serial_thread_team_data.disband_team(); + Impl::g_serial_thread_team_data.disband_pool(); - if ( s.m_scratch ) { free( s.m_scratch ); } + Kokkos::HostSpace space ; - if ( s.m_reduce_end < reduce_size ) s.m_reduce_end = reduce_size ; - if ( s.m_shared_end < s.m_reduce_end + shared_size ) s.m_shared_end = s.m_reduce_end + shared_size ; + space.deallocate( Impl::g_serial_thread_team_data.scratch_buffer() + , Impl::g_serial_thread_team_data.scratch_bytes() ); - s.m_scratch = malloc( s.m_shared_end ); + Impl::g_serial_thread_team_data.scratch_assign( (void*) 0, 0, 0, 0, 0, 0 ); } - return s.m_scratch ; + #if defined(KOKKOS_ENABLE_PROFILING) + Kokkos::Profiling::finalize(); + #endif } } // namespace Kokkos diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp index 19f3abe71ae7049ce0c2674ee2638c07679aa5b0..d22d604fbc2f02e2f18c6c24d69840e7f33e7e98 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp @@ -62,11 +62,13 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute using execution_space = Kokkos::Serial ; using queue_type = TaskQueue< execution_space > ; using task_root_type = TaskBase< execution_space , void , void > ; - using Member = TaskExec< execution_space > ; + using Member = Impl::HostThreadTeamMember< execution_space > ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - Member exec ; + Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); + + Member exec( *data ); // Loop until all queues are empty while ( 0 < queue->m_ready_count ) { @@ -75,13 +77,13 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_task( & queue->m_ready[i][j] ); + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); } } if ( end != task ) { - // pop_task resulted in lock == task->m_next + // pop_ready_task resulted in lock == task->m_next // In the executing state (*task->m_apply)( task , & exec ); @@ -113,11 +115,13 @@ void TaskQueueSpecialization< Kokkos::Serial > :: using execution_space = Kokkos::Serial ; using queue_type = TaskQueue< execution_space > ; using task_root_type = TaskBase< execution_space , void , void > ; - using Member = TaskExec< execution_space > ; + using Member = Impl::HostThreadTeamMember< execution_space > ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - Member exec ; + Impl::HostThreadTeamData * const data = Impl::serial_get_thread_team_data(); + + Member exec( *data ); // Loop until no runnable task @@ -129,7 +133,7 @@ void TaskQueueSpecialization< Kokkos::Serial > :: for ( int i = 0 ; i < queue_type::NumQueue && end == task ; ++i ) { for ( int j = 0 ; j < 2 && end == task ; ++j ) { - task = queue_type::pop_task( & queue->m_ready[i][j] ); + task = queue_type::pop_ready_task( & queue->m_ready[i][j] ); } } diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp index 178305c5d3c97da52535324a14333e1878cea730..ac7f17c0ea9e314137560626e0b0467faf5ff90d 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp @@ -65,6 +65,7 @@ public: using memory_space = Kokkos::HostSpace ; using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ; + using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ; static void iff_single_thread_recursive_execute( queue_type * const ); @@ -72,237 +73,19 @@ public: static void execute( queue_type * const ); - template< typename FunctorType > + template< typename TaskType > static - void proc_set_apply( task_base_type::function_type * ptr ) - { - using TaskType = TaskBase< Kokkos::Serial - , typename FunctorType::value_type - , FunctorType - > ; - *ptr = TaskType::apply ; - } + typename TaskType::function_type + get_function_pointer() { return TaskType::apply ; } }; extern template class TaskQueue< Kokkos::Serial > ; -//---------------------------------------------------------------------------- - -template<> -class TaskExec< Kokkos::Serial > -{ -public: - - KOKKOS_INLINE_FUNCTION void team_barrier() const {} - KOKKOS_INLINE_FUNCTION int team_rank() const { return 0 ; } - KOKKOS_INLINE_FUNCTION int team_size() const { return 1 ; } -}; - -template<typename iType> -struct TeamThreadRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > > -{ - typedef iType index_type; - const iType start ; - const iType end ; - enum {increment = 1}; - //const TaskExec< Kokkos::Serial > & thread; - TaskExec< Kokkos::Serial > & thread; - - KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct - //( const TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count) - ( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count) - : start(0) - , end(arg_count) - , thread(arg_thread) - {} - - KOKKOS_INLINE_FUNCTION - TeamThreadRangeBoundariesStruct - //( const TaskExec< Kokkos::Serial > & arg_thread - ( TaskExec< Kokkos::Serial > & arg_thread - , const iType& arg_start - , const iType & arg_end - ) - : start( arg_start ) - , end( arg_end) - , thread( arg_thread ) - {} -}; - -//---------------------------------------------------------------------------- - -template<typename iType> -struct ThreadVectorRangeBoundariesStruct<iType, TaskExec< Kokkos::Serial > > -{ - typedef iType index_type; - const iType start ; - const iType end ; - enum {increment = 1}; - TaskExec< Kokkos::Serial > & thread; - - KOKKOS_INLINE_FUNCTION - ThreadVectorRangeBoundariesStruct - ( TaskExec< Kokkos::Serial > & arg_thread, const iType& arg_count) - : start( 0 ) - , end(arg_count) - , thread(arg_thread) - {} -}; - }} /* namespace Kokkos::Impl */ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { - -// OMP version needs non-const TaskExec -template< typename iType > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > > -TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType & count ) -{ - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >( thread, count ); -} - -// OMP version needs non-const TaskExec -template< typename iType1, typename iType2 > -KOKKOS_INLINE_FUNCTION -Impl::TeamThreadRangeBoundariesStruct< typename std::common_type< iType1, iType2 >::type, - Impl::TaskExec< Kokkos::Serial > > -TeamThreadRange( Impl::TaskExec< Kokkos::Serial > & thread, const iType1 & start, const iType2 & end ) -{ - typedef typename std::common_type< iType1, iType2 >::type iType; - return Impl::TeamThreadRangeBoundariesStruct< iType, Impl::TaskExec< Kokkos::Serial > >( - thread, iType(start), iType(end) ); -} - -// OMP version needs non-const TaskExec -template<typename iType> -KOKKOS_INLINE_FUNCTION -Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > > -ThreadVectorRange - ( Impl::TaskExec< Kokkos::Serial > & thread - , const iType & count ) -{ - return Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >(thread,count); -} - - /** \brief Inter-thread parallel_for. Executes lambda(iType i) for each i=0..N-1. - * - * The range i=0..N-1 is mapped to all threads of the the calling thread team. - * This functionality requires C++11 support.*/ -template<typename iType, class Lambda> -KOKKOS_INLINE_FUNCTION -void parallel_for(const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, const Lambda& lambda) { - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i); -} - -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, - const Lambda & lambda, - ValueType& initialized_result) -{ - - ValueType result = initialized_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i, result); - - initialized_result = result; -} - -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, - const Lambda & lambda, - const JoinType & join, - ValueType& initialized_result) -{ - ValueType result = initialized_result; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) - lambda(i, result); - - initialized_result = result; -} - -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, - const Lambda & lambda, - ValueType& initialized_result) -{ - initialized_result = ValueType(); -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - initialized_result+=tmp; - } -} - -template< typename iType, class Lambda, typename ValueType, class JoinType > -KOKKOS_INLINE_FUNCTION -void parallel_reduce - (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, - const Lambda & lambda, - const JoinType & join, - ValueType& initialized_result) -{ - ValueType result = initialized_result; -#ifdef KOKKOS_ENABLE_PRAGMA_IVDEP -#pragma ivdep -#endif - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - ValueType tmp = ValueType(); - lambda(i,tmp); - join(result,tmp); - } - initialized_result = result; -} - -template< typename ValueType, typename iType, class Lambda > -KOKKOS_INLINE_FUNCTION -void parallel_scan - (const Impl::TeamThreadRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, - const Lambda & lambda) -{ - ValueType accum = 0 ; - ValueType val, local_total; - - for( iType i = loop_boundaries.start; i < loop_boundaries.end; i+=loop_boundaries.increment) { - local_total = 0; - lambda(i,local_total,false); - val = accum; - lambda(i,val,true); - accum += local_total; - } - -} - -// placeholder for future function -template< typename iType, class Lambda, typename ValueType > -KOKKOS_INLINE_FUNCTION -void parallel_scan - (const Impl::ThreadVectorRangeBoundariesStruct<iType,Impl::TaskExec< Kokkos::Serial > >& loop_boundaries, - const Lambda & lambda) -{ -} - -} /* namespace Kokkos */ - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ #endif /* #ifndef KOKKOS_IMPL_SERIAL_TASK_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp deleted file mode 100644 index b2aea14df44ea55b8c86a70c9907792b51525918..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp +++ /dev/null @@ -1,693 +0,0 @@ -/* - -Copyright (c) 2014, NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef KOKKOS_SYNCHRONIC_HPP -#define KOKKOS_SYNCHRONIC_HPP - -#include <impl/Kokkos_Synchronic_Config.hpp> - -#include <atomic> -#include <chrono> -#include <thread> -#include <functional> -#include <algorithm> - -namespace Kokkos { -namespace Impl { - -enum notify_hint { - notify_all, - notify_one, - notify_none -}; -enum expect_hint { - expect_urgent, - expect_delay -}; - -namespace Details { - -template <class S, class T> -bool __synchronic_spin_wait_for_update(S const& arg, T const& nval, int attempts) noexcept { - int i = 0; - for(;i < __SYNCHRONIC_SPIN_RELAX(attempts); ++i) - if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1)) - return true; - else - __synchronic_relax(); - for(;i < attempts; ++i) - if(__builtin_expect(arg.load(std::memory_order_relaxed) != nval,1)) - return true; - else - __synchronic_yield(); - return false; -} - -struct __exponential_backoff { - __exponential_backoff(int arg_maximum=512) : maximum(arg_maximum), microseconds(8), x(123456789), y(362436069), z(521288629) { - } - static inline void sleep_for(std::chrono::microseconds const& time) { - auto t = time.count(); - if(__builtin_expect(t > 75,0)) { - portable_sleep(time); - } - else if(__builtin_expect(t > 25,0)) - __synchronic_yield(); - else - __synchronic_relax(); - } - void sleep_for_step() { - sleep_for(step()); - } - std::chrono::microseconds step() { - float const f = ranfu(); - int const t = int(microseconds * f); - if(__builtin_expect(f >= 0.95f,0)) - microseconds = 8; - else - microseconds = (std::min)(microseconds>>1,maximum); - return std::chrono::microseconds(t); - } -private : - int maximum, microseconds, x, y, z; - int xorshf96() { - int t; - x ^= x << 16; x ^= x >> 5; x ^= x << 1; - t = x; x = y; y = z; z = t ^ x ^ y; - return z; - } - float ranfu() { - return (float)(xorshf96()&(~0UL>>1)) / (float)(~0UL>>1); - } -}; - -template <class T, class Enable = void> -struct __synchronic_base { - -protected: - std::atomic<T> atom; - - void notify(notify_hint = notify_all) noexcept { - } - void notify(notify_hint = notify_all) volatile noexcept { - } - -public : - __synchronic_base() noexcept = default; - constexpr __synchronic_base(T v) noexcept : atom(v) { } - __synchronic_base(const __synchronic_base&) = delete; - ~__synchronic_base() { } - __synchronic_base& operator=(const __synchronic_base&) = delete; - __synchronic_base& operator=(const __synchronic_base&) volatile = delete; - - void expect_update(T val, expect_hint = expect_urgent) const noexcept { - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) - return; - __exponential_backoff b; - while(atom.load(std::memory_order_relaxed) == val) { - __do_backoff(b); - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) - return; - } - } - void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept { - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) - return; - __exponential_backoff b; - while(atom.load(std::memory_order_relaxed) == val) { - __do_backoff(b); - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) - return; - } - } - - template <class Clock, class Duration> - void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const { - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) - return; - __exponential_backoff b; - std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); - while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) { - __do_backoff(b); - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) - return; - remains = then - std::chrono::high_resolution_clock::now(); - } - } - template <class Clock, class Duration> - void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile { - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_A)) - return; - __exponential_backoff b; - std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); - while(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val) { - __do_backoff(b); - if(__synchronic_spin_wait_for_update(atom, val, __SYNCHRONIC_SPIN_COUNT_B)) - return; - remains = then - std::chrono::high_resolution_clock::now(); - } - } -}; - -#ifdef __SYNCHRONIC_COMPATIBLE -template <class T> -struct __synchronic_base<T, typename std::enable_if<__SYNCHRONIC_COMPATIBLE(T)>::type> { - -public: - std::atomic<T> atom; - - void notify(notify_hint hint = notify_all) noexcept { - if(__builtin_expect(hint == notify_none,1)) - return; - auto const x = count.fetch_add(0,std::memory_order_acq_rel); - if(__builtin_expect(x,0)) { - if(__builtin_expect(hint == notify_all,1)) - __synchronic_wake_all(&atom); - else - __synchronic_wake_one(&atom); - } - } - void notify(notify_hint hint = notify_all) volatile noexcept { - if(__builtin_expect(hint == notify_none,1)) - return; - auto const x = count.fetch_add(0,std::memory_order_acq_rel); - if(__builtin_expect(x,0)) { - if(__builtin_expect(hint == notify_all,1)) - __synchronic_wake_all_volatile(&atom); - else - __synchronic_wake_one_volatile(&atom); - } - } - -public : - __synchronic_base() noexcept : count(0) { } - constexpr __synchronic_base(T v) noexcept : atom(v), count(0) { } - __synchronic_base(const __synchronic_base&) = delete; - ~__synchronic_base() { } - __synchronic_base& operator=(const __synchronic_base&) = delete; - __synchronic_base& operator=(const __synchronic_base&) volatile = delete; - - void expect_update(T val, expect_hint = expect_urgent) const noexcept { - if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) - return; - while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) { - count.fetch_add(1,std::memory_order_release); - __synchronic_wait(&atom,val); - count.fetch_add(-1,std::memory_order_acquire); - } - } - void expect_update(T val, expect_hint = expect_urgent) const volatile noexcept { - if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) - return; - while(__builtin_expect(atom.load(std::memory_order_relaxed) == val,1)) { - count.fetch_add(1,std::memory_order_release); - __synchronic_wait_volatile(&atom,val); - count.fetch_add(-1,std::memory_order_acquire); - } - } - - template <class Clock, class Duration> - void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const { - if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) - return; - std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); - while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) { - count.fetch_add(1,std::memory_order_release); - __synchronic_wait_timed(&atom,val,remains); - count.fetch_add(-1,std::memory_order_acquire); - remains = then - std::chrono::high_resolution_clock::now(); - } - } - template <class Clock, class Duration> - void expect_update_until(T val, std::chrono::time_point<Clock,Duration> const& then, expect_hint = expect_urgent) const volatile { - if(__builtin_expect(__synchronic_spin_wait_for_update(atom, val,__SYNCHRONIC_SPIN_COUNT_A),1)) - return; - std::chrono::milliseconds remains = then - std::chrono::high_resolution_clock::now(); - while(__builtin_expect(remains > std::chrono::milliseconds::zero() && atom.load(std::memory_order_relaxed) == val,1)) { - count.fetch_add(1,std::memory_order_release); - __synchronic_wait_timed_volatile(&atom,val,remains); - count.fetch_add(-1,std::memory_order_acquire); - remains = then - std::chrono::high_resolution_clock::now(); - } - } -private: - mutable std::atomic<int> count; -}; -#endif - -template <class T, class Enable = void> -struct __synchronic : public __synchronic_base<T> { - - __synchronic() noexcept = default; - constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { } - __synchronic(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) volatile = delete; -}; - -template <class T> -struct __synchronic<T,typename std::enable_if<std::is_integral<T>::value>::type> : public __synchronic_base<T> { - - T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_add(v,m); - this->notify(n); - return t; - } - T fetch_add(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_add(v,m); - this->notify(n); - return t; - } - T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_sub(v,m); - this->notify(n); - return t; - } - T fetch_sub(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_sub(v,m); - this->notify(n); - return t; - } - T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_and(v,m); - this->notify(n); - return t; - } - T fetch_and(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_and(v,m); - this->notify(n); - return t; - } - T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_or(v,m); - this->notify(n); - return t; - } - T fetch_or(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_or(v,m); - this->notify(n); - return t; - } - T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_xor(v,m); - this->notify(n); - return t; - } - T fetch_xor(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_xor(v,m); - this->notify(n); - return t; - } - - __synchronic() noexcept = default; - constexpr __synchronic(T v) noexcept : __synchronic_base<T>(v) { } - __synchronic(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) volatile = delete; - - T operator=(T v) volatile noexcept { - auto const t = this->atom = v; - this->notify(); - return t; - } - T operator=(T v) noexcept { - auto const t = this->atom = v; - this->notify(); - return t; - } - T operator++(int) volatile noexcept { - auto const t = ++this->atom; - this->notify(); - return t; - } - T operator++(int) noexcept { - auto const t = ++this->atom; - this->notify(); - return t; - } - T operator--(int) volatile noexcept { - auto const t = --this->atom; - this->notify(); - return t; - } - T operator--(int) noexcept { - auto const t = --this->atom; - this->notify(); - return t; - } - T operator++() volatile noexcept { - auto const t = this->atom++; - this->notify(); - return t; - } - T operator++() noexcept { - auto const t = this->atom++; - this->notify(); - return t; - } - T operator--() volatile noexcept { - auto const t = this->atom--; - this->notify(); - return t; - } - T operator--() noexcept { - auto const t = this->atom--; - this->notify(); - return t; - } - T operator+=(T v) volatile noexcept { - auto const t = this->atom += v; - this->notify(); - return t; - } - T operator+=(T v) noexcept { - auto const t = this->atom += v; - this->notify(); - return t; - } - T operator-=(T v) volatile noexcept { - auto const t = this->atom -= v; - this->notify(); - return t; - } - T operator-=(T v) noexcept { - auto const t = this->atom -= v; - this->notify(); - return t; - } - T operator&=(T v) volatile noexcept { - auto const t = this->atom &= v; - this->notify(); - return t; - } - T operator&=(T v) noexcept { - auto const t = this->atom &= v; - this->notify(); - return t; - } - T operator|=(T v) volatile noexcept { - auto const t = this->atom |= v; - this->notify(); - return t; - } - T operator|=(T v) noexcept { - auto const t = this->atom |= v; - this->notify(); - return t; - } - T operator^=(T v) volatile noexcept { - auto const t = this->atom ^= v; - this->notify(); - return t; - } - T operator^=(T v) noexcept { - auto const t = this->atom ^= v; - this->notify(); - return t; - } -}; - -template <class T> -struct __synchronic<T*> : public __synchronic_base<T*> { - - T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_add(v,m); - this->notify(n); - return t; - } - T* fetch_add(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_add(v,m); - this->notify(n); - return t; - } - T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.fetch_sub(v,m); - this->notify(n); - return t; - } - T* fetch_sub(ptrdiff_t v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.fetch_sub(v,m); - this->notify(n); - return t; - } - - __synchronic() noexcept = default; - constexpr __synchronic(T* v) noexcept : __synchronic_base<T*>(v) { } - __synchronic(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) = delete; - __synchronic& operator=(const __synchronic&) volatile = delete; - - T* operator=(T* v) volatile noexcept { - auto const t = this->atom = v; - this->notify(); - return t; - } - T* operator=(T* v) noexcept { - auto const t = this->atom = v; - this->notify(); - return t; - } - T* operator++(int) volatile noexcept { - auto const t = ++this->atom; - this->notify(); - return t; - } - T* operator++(int) noexcept { - auto const t = ++this->atom; - this->notify(); - return t; - } - T* operator--(int) volatile noexcept { - auto const t = --this->atom; - this->notify(); - return t; - } - T* operator--(int) noexcept { - auto const t = --this->atom; - this->notify(); - return t; - } - T* operator++() volatile noexcept { - auto const t = this->atom++; - this->notify(); - return t; - } - T* operator++() noexcept { - auto const t = this->atom++; - this->notify(); - return t; - } - T* operator--() volatile noexcept { - auto const t = this->atom--; - this->notify(); - return t; - } - T* operator--() noexcept { - auto const t = this->atom--; - this->notify(); - return t; - } - T* operator+=(ptrdiff_t v) volatile noexcept { - auto const t = this->atom += v; - this->notify(); - return t; - } - T* operator+=(ptrdiff_t v) noexcept { - auto const t = this->atom += v; - this->notify(); - return t; - } - T* operator-=(ptrdiff_t v) volatile noexcept { - auto const t = this->atom -= v; - this->notify(); - return t; - } - T* operator-=(ptrdiff_t v) noexcept { - auto const t = this->atom -= v; - this->notify(); - return t; - } -}; - -} //namespace Details - -template <class T> -struct synchronic : public Details::__synchronic<T> { - - bool is_lock_free() const volatile noexcept { return this->atom.is_lock_free(); } - bool is_lock_free() const noexcept { return this->atom.is_lock_free(); } - void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - this->atom.store(v,m); - this->notify(n); - } - void store(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - this->atom.store(v,m); - this->notify(n); - } - T load(std::memory_order m = std::memory_order_seq_cst) const volatile noexcept { return this->atom.load(m); } - T load(std::memory_order m = std::memory_order_seq_cst) const noexcept { return this->atom.load(m); } - - operator T() const volatile noexcept { return (T)this->atom; } - operator T() const noexcept { return (T)this->atom; } - - T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.exchange(v,m); - this->notify(n); - return t; - } - T exchange(T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.exchange(v,m); - this->notify(n); - return t; - } - bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.compare_exchange_weak(r,v,m1,m2); - this->notify(n); - return t; - } - bool compare_exchange_weak(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept { - auto const t = this->atom.compare_exchange_weak(r,v,m1, m2); - this->notify(n); - return t; - } - bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.compare_exchange_strong(r,v,m1,m2); - this->notify(n); - return t; - } - bool compare_exchange_strong(T& r, T v, std::memory_order m1, std::memory_order m2, notify_hint n = notify_all) noexcept { - auto const t = this->atom.compare_exchange_strong(r,v,m1,m2); - this->notify(n); - return t; - } - bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.compare_exchange_weak(r,v,m); - this->notify(n); - return t; - } - bool compare_exchange_weak(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.compare_exchange_weak(r,v,m); - this->notify(n); - return t; - } - bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) volatile noexcept { - auto const t = this->atom.compare_exchange_strong(r,v,m); - this->notify(n); - return t; - } - bool compare_exchange_strong(T& r, T v, std::memory_order m = std::memory_order_seq_cst, notify_hint n = notify_all) noexcept { - auto const t = this->atom.compare_exchange_strong(r,v,m); - this->notify(n); - return t; - } - - synchronic() noexcept = default; - constexpr synchronic(T val) noexcept : Details::__synchronic<T>(val) { } - synchronic(const synchronic&) = delete; - ~synchronic() { } - synchronic& operator=(const synchronic&) = delete; - synchronic& operator=(const synchronic&) volatile = delete; - T operator=(T val) noexcept { - return Details::__synchronic<T>::operator=(val); - } - T operator=(T val) volatile noexcept { - return Details::__synchronic<T>::operator=(val); - } - - T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept { - Details::__synchronic<T>::expect_update(val,h); - return load(order); - } - T load_when_not_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept { - Details::__synchronic<T>::expect_update(val,h); - return load(order); - } - T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const noexcept { - for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed)) - Details::__synchronic<T>::expect_update(nval,h); - return load(order); - } - T load_when_equal(T val, std::memory_order order = std::memory_order_seq_cst, expect_hint h = expect_urgent) const volatile noexcept { - for(T nval = load(std::memory_order_relaxed); nval != val; nval = load(std::memory_order_relaxed)) - expect_update(nval,h); - return load(order); - } - template <class Rep, class Period> - void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const { - Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h); - } - template < class Rep, class Period> - void expect_update_for(T val, std::chrono::duration<Rep,Period> const& delta, expect_hint h = expect_urgent) const volatile { - Details::__synchronic<T>::expect_update_until(val, std::chrono::high_resolution_clock::now() + delta,h); - } -}; - -#include <inttypes.h> - -typedef synchronic<char> synchronic_char; -typedef synchronic<char> synchronic_schar; -typedef synchronic<unsigned char> synchronic_uchar; -typedef synchronic<short> synchronic_short; -typedef synchronic<unsigned short> synchronic_ushort; -typedef synchronic<int> synchronic_int; -typedef synchronic<unsigned int> synchronic_uint; -typedef synchronic<long> synchronic_long; -typedef synchronic<unsigned long> synchronic_ulong; -typedef synchronic<long long> synchronic_llong; -typedef synchronic<unsigned long long> synchronic_ullong; -//typedef synchronic<char16_t> synchronic_char16_t; -//typedef synchronic<char32_t> synchronic_char32_t; -typedef synchronic<wchar_t> synchronic_wchar_t; - -typedef synchronic<int_least8_t> synchronic_int_least8_t; -typedef synchronic<uint_least8_t> synchronic_uint_least8_t; -typedef synchronic<int_least16_t> synchronic_int_least16_t; -typedef synchronic<uint_least16_t> synchronic_uint_least16_t; -typedef synchronic<int_least32_t> synchronic_int_least32_t; -typedef synchronic<uint_least32_t> synchronic_uint_least32_t; -//typedef synchronic<int_least_64_t> synchronic_int_least_64_t; -typedef synchronic<uint_least64_t> synchronic_uint_least64_t; -typedef synchronic<int_fast8_t> synchronic_int_fast8_t; -typedef synchronic<uint_fast8_t> synchronic_uint_fast8_t; -typedef synchronic<int_fast16_t> synchronic_int_fast16_t; -typedef synchronic<uint_fast16_t> synchronic_uint_fast16_t; -typedef synchronic<int_fast32_t> synchronic_int_fast32_t; -typedef synchronic<uint_fast32_t> synchronic_uint_fast32_t; -typedef synchronic<int_fast64_t> synchronic_int_fast64_t; -typedef synchronic<uint_fast64_t> synchronic_uint_fast64_t; -typedef synchronic<intptr_t> synchronic_intptr_t; -typedef synchronic<uintptr_t> synchronic_uintptr_t; -typedef synchronic<size_t> synchronic_size_t; -typedef synchronic<ptrdiff_t> synchronic_ptrdiff_t; -typedef synchronic<intmax_t> synchronic_intmax_t; -typedef synchronic<uintmax_t> synchronic_uintmax_t; - -} -} - -#endif //__SYNCHRONIC_H diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp deleted file mode 100644 index 0a6dd6e715edad752f56756ccdc6fba3d43e30fb..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp +++ /dev/null @@ -1,169 +0,0 @@ -/* - -Copyright (c) 2014, NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef KOKKOS_SYNCHRONIC_CONFIG_H -#define KOKKOS_SYNCHRONIC_CONFIG_H - -#include <thread> -#include <chrono> - -namespace Kokkos { -namespace Impl { - -//the default yield function used inside the implementation is the Standard one -#define __synchronic_yield std::this_thread::yield -#define __synchronic_relax __synchronic_yield - -#if defined(_MSC_VER) - //this is a handy GCC optimization that I use inside the implementation - #define __builtin_expect(condition,common) condition - #if _MSC_VER <= 1800 - //using certain keywords that VC++ temporarily doesn't support - #define _ALLOW_KEYWORD_MACROS - #define noexcept - #define constexpr - #endif - //yes, I define multiple assignment operators - #pragma warning(disable:4522) - //I don't understand how Windows is so bad at timing functions, but is OK - //with straight-up yield loops - #define __do_backoff(b) __synchronic_yield() -#else -#define __do_backoff(b) b.sleep_for_step() -#endif - -//certain platforms have efficient support for spin-waiting built into the operating system -#if defined(__linux__) || (defined(_WIN32_WINNT) && _WIN32_WINNT >= 0x0602) -#if defined(_WIN32_WINNT) -#include <winsock2.h> -#include <Windows.h> - //the combination of WaitOnAddress and WakeByAddressAll is supported on Windows 8.1+ - #define __synchronic_wait(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1) - #define __synchronic_wait_timed(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count()) - #define __synchronic_wake_one(x) WakeByAddressSingle((PVOID)x) - #define __synchronic_wake_all(x) WakeByAddressAll((PVOID)x) - #define __synchronic_wait_volatile(x,v) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),-1) - #define __synchronic_wait_timed_volatile(x,v,t) WaitOnAddress((PVOID)x,(PVOID)&v,sizeof(v),std::chrono::duration_cast<std::chrono::milliseconds>(t).count()) - #define __synchronic_wake_one_volatile(x) WakeByAddressSingle((PVOID)x) - #define __synchronic_wake_all_volatile(x) WakeByAddressAll((PVOID)x) - #define __SYNCHRONIC_COMPATIBLE(x) (std::is_pod<x>::value && (sizeof(x) <= 8)) - - inline void native_sleep(unsigned long microseconds) - { - // What to do if microseconds is < 1000? - Sleep(microseconds / 1000); - } - - inline void native_yield() - { - SwitchToThread(); - } -#elif defined(__linux__) - #include <chrono> - #include <time.h> - #include <unistd.h> - #include <pthread.h> - #include <linux/futex.h> - #include <sys/syscall.h> - #include <climits> - #include <cassert> - template < class Rep, class Period> - inline timespec to_timespec(std::chrono::duration<Rep,Period> const& delta) { - struct timespec ts; - ts.tv_sec = static_cast<long>(std::chrono::duration_cast<std::chrono::seconds>(delta).count()); - assert(!ts.tv_sec); - ts.tv_nsec = static_cast<long>(std::chrono::duration_cast<std::chrono::nanoseconds>(delta).count()); - return ts; - } - inline long futex(void const* addr1, int op, int val1) { - return syscall(SYS_futex, addr1, op, val1, 0, 0, 0); - } - inline long futex(void const* addr1, int op, int val1, struct timespec timeout) { - return syscall(SYS_futex, addr1, op, val1, &timeout, 0, 0); - } - inline void native_sleep(unsigned long microseconds) - { - usleep(microseconds); - } - inline void native_yield() - { - pthread_yield(); - } - - //the combination of SYS_futex(WAIT) and SYS_futex(WAKE) is supported on all recent Linux distributions - #define __synchronic_wait(x,v) futex(x, FUTEX_WAIT_PRIVATE, v) - #define __synchronic_wait_timed(x,v,t) futex(x, FUTEX_WAIT_PRIVATE, v, to_timespec(t)) - #define __synchronic_wake_one(x) futex(x, FUTEX_WAKE_PRIVATE, 1) - #define __synchronic_wake_all(x) futex(x, FUTEX_WAKE_PRIVATE, INT_MAX) - #define __synchronic_wait_volatile(x,v) futex(x, FUTEX_WAIT, v) - #define __synchronic_wait_volatile_timed(x,v,t) futex(x, FUTEX_WAIT, v, to_timespec(t)) - #define __synchronic_wake_one_volatile(x) futex(x, FUTEX_WAKE, 1) - #define __synchronic_wake_all_volatile(x) futex(x, FUTEX_WAKE, INT_MAX) - #define __SYNCHRONIC_COMPATIBLE(x) (std::is_integral<x>::value && (sizeof(x) <= 4)) - - //the yield function on Linux is better replaced by sched_yield, which is tuned for spin-waiting - #undef __synchronic_yield - #define __synchronic_yield sched_yield - - //for extremely short wait times, just let another hyper-thread run - #undef __synchronic_relax - #define __synchronic_relax() asm volatile("rep; nop" ::: "memory") - -#endif -#endif - -#ifdef _GLIBCXX_USE_NANOSLEEP -inline void portable_sleep(std::chrono::microseconds const& time) -{ std::this_thread::sleep_for(time); } -#else -inline void portable_sleep(std::chrono::microseconds const& time) -{ native_sleep(time.count()); } -#endif - -#ifdef _GLIBCXX_USE_SCHED_YIELD -inline void portable_yield() -{ std::this_thread::yield(); } -#else -inline void portable_yield() -{ native_yield(); } -#endif - -//this is the number of times we initially spin, on the first wait attempt -#define __SYNCHRONIC_SPIN_COUNT_A 16 - -//this is how decide to yield instead of just spinning, 'c' is the current trip count -//#define __SYNCHRONIC_SPIN_YIELD(c) true -#define __SYNCHRONIC_SPIN_RELAX(c) (c>>3) - -//this is the number of times we normally spin, on every subsequent wait attempt -#define __SYNCHRONIC_SPIN_COUNT_B 8 - -} -} - -#endif //__SYNCHRONIC_CONFIG_H diff --git a/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp b/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp deleted file mode 100644 index facc8d6d8e67a4828aa94bd75fb7590f454b41f6..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp +++ /dev/null @@ -1,162 +0,0 @@ -/* - -Copyright (c) 2014, NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef KOKKOS_SYNCHRONIC_N3998_HPP -#define KOKKOS_SYNCHRONIC_N3998_HPP - -#include <impl/Kokkos_Synchronic.hpp> -#include <functional> - -/* -In the section below, a synchronization point represents a point at which a -thread may block until a given synchronization condition has been reached or -at which it may notify other threads that a synchronization condition has -been achieved. -*/ -namespace Kokkos { namespace Impl { - - /* - A latch maintains an internal counter that is initialized when the latch - is created. The synchronization condition is reached when the counter is - decremented to 0. Threads may block at a synchronization point waiting - for the condition to be reached. When the condition is reached, any such - blocked threads will be released. - */ - struct latch { - latch(int val) : count(val), released(false) { } - latch(const latch&) = delete; - latch& operator=(const latch&) = delete; - ~latch( ) { } - void arrive( ) { - __arrive( ); - } - void arrive_and_wait( ) { - if(!__arrive( )) - wait( ); - } - void wait( ) { - while(!released.load_when_not_equal(false,std::memory_order_acquire)) - ; - } - bool try_wait( ) { - return released.load(std::memory_order_acquire); - } - private: - bool __arrive( ) { - if(count.fetch_add(-1,std::memory_order_release)!=1) - return false; - released.store(true,std::memory_order_release); - return true; - } - std::atomic<int> count; - synchronic<bool> released; - }; - - /* - A barrier is created with an initial value representing the number of threads - that can arrive at the synchronization point. When that many threads have - arrived, the synchronization condition is reached and the threads are - released. The barrier will then reset, and may be reused for a new cycle, in - which the same set of threads may arrive again at the synchronization point. - The same set of threads shall arrive at the barrier in each cycle, otherwise - the behaviour is undefined. - */ - struct barrier { - barrier(int val) : expected(val), arrived(0), nexpected(val), epoch(0) { } - barrier(const barrier&) = delete; - barrier& operator=(const barrier&) = delete; - ~barrier() { } - void arrive_and_wait() { - int const myepoch = epoch.load(std::memory_order_relaxed); - if(!__arrive(myepoch)) - while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch) - ; - } - void arrive_and_drop() { - nexpected.fetch_add(-1,std::memory_order_relaxed); - __arrive(epoch.load(std::memory_order_relaxed)); - } - private: - bool __arrive(int const myepoch) { - int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1; - if(__builtin_expect(myresult == expected,0)) { - expected = nexpected.load(std::memory_order_relaxed); - arrived.store(0,std::memory_order_relaxed); - epoch.store(myepoch+1,std::memory_order_release); - return true; - } - return false; - } - int expected; - std::atomic<int> arrived, nexpected; - synchronic<int> epoch; - }; - - /* - A notifying barrier behaves as a barrier, but is constructed with a callable - completion function that is invoked after all threads have arrived at the - synchronization point, and before the synchronization condition is reached. - The completion may modify the set of threads that arrives at the barrier in - each cycle. - */ - struct notifying_barrier { - template <typename T> - notifying_barrier(int val, T && f) : expected(val), arrived(0), nexpected(val), epoch(0), completion(std::forward<T>(f)) { } - notifying_barrier(const notifying_barrier&) = delete; - notifying_barrier& operator=(const notifying_barrier&) = delete; - ~notifying_barrier( ) { } - void arrive_and_wait() { - int const myepoch = epoch.load(std::memory_order_relaxed); - if(!__arrive(myepoch)) - while(epoch.load_when_not_equal(myepoch,std::memory_order_acquire) == myepoch) - ; - } - void arrive_and_drop() { - nexpected.fetch_add(-1,std::memory_order_relaxed); - __arrive(epoch.load(std::memory_order_relaxed)); - } - private: - bool __arrive(int const myepoch) { - int const myresult = arrived.fetch_add(1,std::memory_order_acq_rel) + 1; - if(__builtin_expect(myresult == expected,0)) { - int const newexpected = completion(); - expected = newexpected ? newexpected : nexpected.load(std::memory_order_relaxed); - arrived.store(0,std::memory_order_relaxed); - epoch.store(myepoch+1,std::memory_order_release); - return true; - } - return false; - } - int expected; - std::atomic<int> arrived, nexpected; - synchronic<int> epoch; - std::function<int()> completion; - }; -}} - -#endif //__N3998_H diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp index afa01d0cde1f1253f216c415b81bf5c8fee1de2b..b514df351725ac55e88ea1c2e92eec4b1711e6b4 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp @@ -76,9 +76,6 @@ namespace Impl { template< typename Space , typename ResultType , typename FunctorType > class TaskBase ; -template< typename Space > -class TaskExec ; - } /* namespace Impl */ } /* namespace Kokkos */ @@ -149,8 +146,8 @@ private: // task->m_next is the dependence or zero // Postcondition: // task->m_next is linked list membership - KOKKOS_FUNCTION - void schedule( task_root_type * const ); + KOKKOS_FUNCTION void schedule_runnable( task_root_type * const ); + KOKKOS_FUNCTION void schedule_aggregate( task_root_type * const ); // Reschedule a task // Precondition: @@ -178,7 +175,7 @@ private: , task_root_type * const ); KOKKOS_FUNCTION - static task_root_type * pop_task( task_root_type * volatile * const ); + static task_root_type * pop_ready_task( task_root_type * volatile * const ); KOKKOS_FUNCTION static void decrement( task_root_type * task ); @@ -368,6 +365,7 @@ public: int16_t m_task_type ; ///< Type of task int16_t m_priority ; ///< Priority of runnable task + TaskBase() = delete ; TaskBase( TaskBase && ) = delete ; TaskBase( const TaskBase & ) = delete ; TaskBase & operator = ( TaskBase && ) = delete ; @@ -375,17 +373,43 @@ public: KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; + // Constructor for a runnable task KOKKOS_INLINE_FUNCTION - constexpr TaskBase() noexcept - : m_apply(0) - , m_queue(0) - , m_wait(0) - , m_next(0) - , m_ref_count(0) - , m_alloc_size(0) - , m_dep_count(0) - , m_task_type( TaskSingle ) - , m_priority( 1 /* TaskRegularPriority */ ) + constexpr TaskBase( function_type arg_apply + , queue_type * arg_queue + , TaskBase * arg_dependence + , int arg_ref_count + , int arg_alloc_size + , int arg_task_type + , int arg_priority + ) noexcept + : m_apply( arg_apply ) + , m_queue( arg_queue ) + , m_wait( 0 ) + , m_next( arg_dependence ) + , m_ref_count( arg_ref_count ) + , m_alloc_size( arg_alloc_size ) + , m_dep_count( 0 ) + , m_task_type( arg_task_type ) + , m_priority( arg_priority ) + {} + + // Constructor for an aggregate task + KOKKOS_INLINE_FUNCTION + constexpr TaskBase( queue_type * arg_queue + , int arg_ref_count + , int arg_alloc_size + , int arg_dep_count + ) noexcept + : m_apply( 0 ) + , m_queue( arg_queue ) + , m_wait( 0 ) + , m_next( 0 ) + , m_ref_count( arg_ref_count ) + , m_alloc_size( arg_alloc_size ) + , m_dep_count( arg_dep_count ) + , m_task_type( Aggregate ) + , m_priority( 0 ) {} //---------------------------------------- @@ -406,9 +430,13 @@ public: KOKKOS_INLINE_FUNCTION void add_dependence( TaskBase* dep ) { + // Precondition: lock == m_next + + TaskBase * const lock = (TaskBase *) LockTag ; + // Assign dependence to m_next. It will be processed in the subsequent // call to schedule. Error if the dependence is reset. - if ( 0 != Kokkos::atomic_exchange( & m_next, dep ) ) { + if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) { Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); } @@ -431,8 +459,13 @@ class TaskBase< ExecSpace , ResultType , void > { private: - static_assert( sizeof(TaskBase<ExecSpace,void,void>) == 48 , "" ); + using root_type = TaskBase<ExecSpace,void,void> ; + using function_type = typename root_type::function_type ; + using queue_type = typename root_type::queue_type ; + static_assert( sizeof(root_type) == 48 , "" ); + + TaskBase() = delete ; TaskBase( TaskBase && ) = delete ; TaskBase( const TaskBase & ) = delete ; TaskBase & operator = ( TaskBase && ) = delete ; @@ -444,9 +477,24 @@ public: KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; + // Constructor for runnable task KOKKOS_INLINE_FUNCTION - TaskBase() - : TaskBase< ExecSpace , void , void >() + constexpr TaskBase( function_type arg_apply + , queue_type * arg_queue + , root_type * arg_dependence + , int arg_ref_count + , int arg_alloc_size + , int arg_task_type + , int arg_priority + ) + : root_type( arg_apply + , arg_queue + , arg_dependence + , arg_ref_count + , arg_alloc_size + , arg_task_type + , arg_priority + ) , m_result() {} @@ -471,11 +519,14 @@ private: public: - using root_type = TaskBase< ExecSpace , void , void > ; - using base_type = TaskBase< ExecSpace , ResultType , void > ; - using member_type = TaskExec< ExecSpace > ; - using functor_type = FunctorType ; - using result_type = ResultType ; + using root_type = TaskBase< ExecSpace , void , void > ; + using base_type = TaskBase< ExecSpace , ResultType , void > ; + using specialization = TaskQueueSpecialization< ExecSpace > ; + using function_type = typename root_type::function_type ; + using queue_type = typename root_type::queue_type ; + using member_type = typename specialization::member_type ; + using functor_type = FunctorType ; + using result_type = ResultType ; template< typename Type > KOKKOS_INLINE_FUNCTION static @@ -522,13 +573,30 @@ public: if ( 0 == member->team_rank() && !(task->requested_respawn()) ) { // Did not respawn, destroy the functor to free memory. static_cast<functor_type*>(task)->~functor_type(); - // Cannot destroy the task until its dependences have been processed. + // Cannot destroy and deallocate the task until its dependences + // have been processed. } } + // Constructor for runnable task KOKKOS_INLINE_FUNCTION - TaskBase( functor_type const & arg_functor ) - : base_type() + constexpr TaskBase( function_type arg_apply + , queue_type * arg_queue + , root_type * arg_dependence + , int arg_ref_count + , int arg_alloc_size + , int arg_task_type + , int arg_priority + , FunctorType && arg_functor + ) + : base_type( arg_apply + , arg_queue + , arg_dependence + , arg_ref_count + , arg_alloc_size + , arg_task_type + , arg_priority + ) , functor_type( arg_functor ) {} diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp index fefbbad8bde297ce94ad99058e6f25eca6046b7e..23f5d3cd30dbbf87c024af935356961c1642a022 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp @@ -170,6 +170,7 @@ bool TaskQueue< ExecSpace >::push_task ) { // Push task into a concurrently pushed and popped queue. + // The queue can be either a ready task queue or a waiting task queue. // The queue is a linked list where 'task->m_next' form the links. // Fail the push attempt if the queue is locked; // otherwise retry until the push succeeds. @@ -227,13 +228,12 @@ bool TaskQueue< ExecSpace >::push_task template< typename ExecSpace > KOKKOS_FUNCTION typename TaskQueue< ExecSpace >::task_root_type * -TaskQueue< ExecSpace >::pop_task +TaskQueue< ExecSpace >::pop_ready_task ( TaskQueue< ExecSpace >::task_root_type * volatile * const queue ) { - // Pop task from a concurrently pushed and popped queue. + // Pop task from a concurrently pushed and popped ready task queue. // The queue is a linked list where 'task->m_next' form the links. - task_root_type * const zero = (task_root_type *) 0 ; task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; @@ -252,85 +252,83 @@ TaskQueue< ExecSpace >::pop_task // (1) lock, (2) end, or (3) a valid task. // Thus zero will never appear in the queue. // - // If queue is locked then just read by guaranteeing - // the CAS will fail. + // If queue is locked then just read by guaranteeing the CAS will fail. if ( lock == task ) task = 0 ; task_root_type * const x = task ; - task = Kokkos::atomic_compare_exchange(queue,task,lock); - - if ( x == task ) break ; // CAS succeeded and queue is locked - } + task = Kokkos::atomic_compare_exchange(queue,x,lock); - if ( end != task ) { + if ( x == task ) { + // CAS succeeded and queue is locked + // + // This thread has locked the queue and removed 'task' from the queue. + // Extract the next entry of the queue from 'task->m_next' + // and mark 'task' as popped from a queue by setting + // 'task->m_next = lock'. + // + // Place the next entry in the head of the queue, + // which also unlocks the queue. + // + // This thread has exclusive access to + // the queue and the popped task's m_next. - // This thread has locked the queue and removed 'task' from the queue. - // Extract the next entry of the queue from 'task->m_next' - // and mark 'task' as popped from a queue by setting - // 'task->m_next = lock'. + *queue = task->m_next ; task->m_next = lock ; - task_root_type * const next = - Kokkos::atomic_exchange( & task->m_next , lock ); + Kokkos::memory_fence(); - // Place the next entry in the head of the queue, - // which also unlocks the queue. - - task_root_type * const unlock = - Kokkos::atomic_exchange( queue , next ); +#if 0 + printf( "pop_ready_task( 0x%lx 0x%lx { 0x%lx 0x%lx %d %d %d } )\n" + , uintptr_t(queue) + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , int(task->m_task_type) + , int(task->m_priority) + , int(task->m_ref_count) ); +#endif - if ( next == zero || next == lock || lock != unlock ) { - Kokkos::abort("TaskQueue::pop_task ERROR"); + return task ; } } -#if 0 - if ( end != task ) { - printf( "pop_task( 0x%lx 0x%lx { 0x%lx 0x%lx %d %d %d } )\n" - , uintptr_t(queue) - , uintptr_t(task) - , uintptr_t(task->m_wait) - , uintptr_t(task->m_next) - , int(task->m_task_type) - , int(task->m_priority) - , int(task->m_ref_count) ); - } -#endif - - return task ; + return end ; } //---------------------------------------------------------------------------- template< typename ExecSpace > KOKKOS_FUNCTION -void TaskQueue< ExecSpace >::schedule +void TaskQueue< ExecSpace >::schedule_runnable ( TaskQueue< ExecSpace >::task_root_type * const task ) { - // Schedule a runnable or when_all task upon construction / spawn + // Schedule a runnable task upon construction / spawn // and upon completion of other tasks that 'task' is waiting on. - - // Precondition on runnable task state: - // task is either constructing or executing + // + // Precondition: + // - called by a single thread for the input task + // - calling thread has exclusive access to the task + // - task is not a member of a queue + // - if runnable then task is either constructing or respawning // // Constructing state: // task->m_wait == 0 - // task->m_next == dependence - // Executing-respawn state: - // task->m_wait == head of linked list - // task->m_next == dependence + // task->m_next == dependence or 0 + // Respawn state: + // task->m_wait == head of linked list: 'end' or valid task + // task->m_next == dependence or 0 // // Task state transition: - // Constructing -> Waiting - // Executing-respawn -> Waiting + // Constructing -> Waiting + // Respawn -> Waiting // // Postcondition on task state: - // task->m_wait == head of linked list - // task->m_next == member of linked list + // task->m_wait == head of linked list (queue) + // task->m_next == member of linked list (queue) #if 0 - printf( "schedule( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" + printf( "schedule_runnable( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" , uintptr_t(task) , uintptr_t(task->m_wait) , uintptr_t(task->m_next) @@ -343,135 +341,204 @@ void TaskQueue< ExecSpace >::schedule task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - //---------------------------------------- - { - // If Constructing then task->m_wait == 0 - // Change to waiting by task->m_wait = EndTag - - task_root_type * const init = - Kokkos::atomic_compare_exchange( & task->m_wait , zero , end ); + bool respawn = false ; - // Precondition + //---------------------------------------- - if ( lock == init ) { - Kokkos::abort("TaskQueue::schedule ERROR: task is complete"); - } + if ( zero == task->m_wait ) { + // Task in Constructing state + // - Transition to Waiting state + // Preconditions: + // - call occurs exclusively within a single thread - // if ( init == 0 ) Constructing -> Waiting - // else Executing-Respawn -> Waiting + task->m_wait = end ; + // Task in Waiting state } + else if ( lock != task->m_wait ) { + // Task in Executing state with Respawn request + // - Update dependence + // - Transition to Waiting state + respawn = true ; + } + else { + // Task in Complete state + Kokkos::abort("TaskQueue::schedule_runnable ERROR: task is complete"); + } + //---------------------------------------- + // Scheduling a runnable task which may have a depencency 'dep'. + // Extract dependence, if any, from task->m_next. + // If 'dep' is not null then attempt to push 'task' + // into the wait queue of 'dep'. + // If the push succeeds then 'task' may be + // processed or executed by another thread at any time. + // If the push fails then 'dep' is complete and 'task' + // is ready to execute. + + // Exclusive access so don't need an atomic exchange + // task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero ); + task_root_type * dep = task->m_next ; task->m_next = zero ; + + const bool is_ready = + ( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) ); + + if ( ( 0 != dep ) && respawn ) { + // Reference count for dep was incremented when + // respawn assigned dependency to task->m_next + // so that if dep completed prior to the + // above push_task dep would not be destroyed. + // dep reference count can now be decremented, + // which may deallocate the task. + TaskQueue::assign( & dep , (task_root_type *)0 ); + } - if ( task_root_type::Aggregate != task->m_task_type ) { + if ( is_ready ) { - // Scheduling a runnable task which may have a depencency 'dep'. - // Extract dependence, if any, from task->m_next. - // If 'dep' is not null then attempt to push 'task' - // into the wait queue of 'dep'. - // If the push succeeds then 'task' may be - // processed or executed by another thread at any time. - // If the push fails then 'dep' is complete and 'task' - // is ready to execute. + // No dependence or 'dep' is complete so push task into ready queue. + // Increment the ready count before pushing into ready queue + // to track number of ready + executing tasks. + // The ready count will be decremented when the task is complete. - task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero ); + Kokkos::atomic_increment( & m_ready_count ); - const bool is_ready = - ( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) ); + task_root_type * volatile * const ready_queue = + & m_ready[ task->m_priority ][ task->m_task_type ]; - // Reference count for dep was incremented when assigned - // to task->m_next so that if it completed prior to the - // above push_task dep would not be destroyed. - // dep reference count can now be decremented, - // which may deallocate the task. - TaskQueue::assign( & dep , (task_root_type *)0 ); + // A push_task fails if the ready queue is locked. + // A ready queue is only locked during a push or pop; + // i.e., it is never permanently locked. + // Retry push to ready queue until it succeeds. + // When the push succeeds then 'task' may be + // processed or executed by another thread at any time. - if ( is_ready ) { + while ( ! push_task( ready_queue , task ) ); + } - // No dependence or 'dep' is complete so push task into ready queue. - // Increment the ready count before pushing into ready queue - // to track number of ready + executing tasks. - // The ready count will be decremented when the task is complete. + //---------------------------------------- + // Postcondition: + // - A runnable 'task' was pushed into a wait or ready queue. + // - Concurrent execution may have already popped 'task' + // from a queue and processed it as appropriate. +} - Kokkos::atomic_increment( & m_ready_count ); +template< typename ExecSpace > +KOKKOS_FUNCTION +void TaskQueue< ExecSpace >::schedule_aggregate + ( TaskQueue< ExecSpace >::task_root_type * const task ) +{ + // Schedule an aggregate task upon construction + // and upon completion of other tasks that 'task' is waiting on. + // + // Precondition: + // - called by a single thread for the input task + // - calling thread has exclusive access to the task + // - task is not a member of a queue + // + // Constructing state: + // task->m_wait == 0 + // task->m_next == dependence or 0 + // + // Task state transition: + // Constructing -> Waiting + // + // Postcondition on task state: + // task->m_wait == head of linked list (queue) + // task->m_next == member of linked list (queue) + +#if 0 + printf( "schedule_aggregate( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" + , uintptr_t(task) + , uintptr_t(task->m_wait) + , uintptr_t(task->m_next) + , task->m_task_type + , task->m_priority + , task->m_ref_count ); +#endif - task_root_type * volatile * const queue = - & m_ready[ task->m_priority ][ task->m_task_type ]; + task_root_type * const zero = (task_root_type *) 0 ; + task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; + task_root_type * const end = (task_root_type *) task_root_type::EndTag ; - // A push_task fails if the ready queue is locked. - // A ready queue is only locked during a push or pop; - // i.e., it is never permanently locked. - // Retry push to ready queue until it succeeds. - // When the push succeeds then 'task' may be - // processed or executed by another thread at any time. + //---------------------------------------- - while ( ! push_task( queue , task ) ); - } + if ( zero == task->m_wait ) { + // Task in Constructing state + // - Transition to Waiting state + // Preconditions: + // - call occurs exclusively within a single thread + + task->m_wait = end ; + // Task in Waiting state + } + else if ( lock == task->m_wait ) { + // Task in Complete state + Kokkos::abort("TaskQueue::schedule_aggregate ERROR: task is complete"); } + //---------------------------------------- - else { - // Scheduling a 'when_all' task with multiple dependences. - // This scheduling may be called when the 'when_all' is - // (1) created or - // (2) being removed from a completed task's wait list. + // Scheduling a 'when_all' task with multiple dependences. + // This scheduling may be called when the 'when_all' is + // (1) created or + // (2) being removed from a completed task's wait list. - task_root_type ** const aggr = task->aggregate_dependences(); + task_root_type ** const aggr = task->aggregate_dependences(); - // Assume the 'when_all' is complete until a dependence is - // found that is not complete. + // Assume the 'when_all' is complete until a dependence is + // found that is not complete. - bool is_complete = true ; + bool is_complete = true ; - for ( int i = task->m_dep_count ; 0 < i && is_complete ; ) { + for ( int i = task->m_dep_count ; 0 < i && is_complete ; ) { - --i ; + --i ; - // Loop dependences looking for an incomplete task. - // Add this task to the incomplete task's wait queue. + // Loop dependences looking for an incomplete task. + // Add this task to the incomplete task's wait queue. - // Remove a task 'x' from the dependence list. - // The reference count of 'x' was incremented when - // it was assigned into the dependence list. + // Remove a task 'x' from the dependence list. + // The reference count of 'x' was incremented when + // it was assigned into the dependence list. - task_root_type * x = Kokkos::atomic_exchange( aggr + i , zero ); + // Exclusive access so don't need an atomic exchange + // task_root_type * x = Kokkos::atomic_exchange( aggr + i , zero ); + task_root_type * x = aggr[i] ; aggr[i] = zero ; - if ( x ) { + if ( x ) { - // If x->m_wait is not locked then push succeeds - // and the aggregate is not complete. - // If the push succeeds then this when_all 'task' may be - // processed by another thread at any time. - // For example, 'x' may be completeed by another - // thread and then re-schedule this when_all 'task'. + // If x->m_wait is not locked then push succeeds + // and the aggregate is not complete. + // If the push succeeds then this when_all 'task' may be + // processed by another thread at any time. + // For example, 'x' may be completeed by another + // thread and then re-schedule this when_all 'task'. - is_complete = ! push_task( & x->m_wait , task ); + is_complete = ! push_task( & x->m_wait , task ); - // Decrement reference count which had been incremented - // when 'x' was added to the dependence list. + // Decrement reference count which had been incremented + // when 'x' was added to the dependence list. - TaskQueue::assign( & x , zero ); - } + TaskQueue::assign( & x , zero ); } + } - if ( is_complete ) { - // The when_all 'task' was not added to a wait queue because - // all dependences were complete so this aggregate is complete. - // Complete the when_all 'task' to schedule other tasks - // that are waiting for the when_all 'task' to complete. + if ( is_complete ) { + // The when_all 'task' was not added to a wait queue because + // all dependences were complete so this aggregate is complete. + // Complete the when_all 'task' to schedule other tasks + // that are waiting for the when_all 'task' to complete. - task->m_next = lock ; + task->m_next = lock ; - complete( task ); + complete( task ); - // '*task' may have been deleted upon completion - } + // '*task' may have been deleted upon completion } + //---------------------------------------- // Postcondition: - // A runnable 'task' was pushed into a wait or ready queue. - // An aggregate 'task' was either pushed to a wait queue - // or completed. - // Concurrent execution may have already popped 'task' - // from a queue and processed it as appropriate. + // - An aggregate 'task' was either pushed to a wait queue or completed. + // - Concurrent execution may have already popped 'task' + // from a queue and processed it as appropriate. } //---------------------------------------------------------------------------- @@ -529,7 +596,7 @@ void TaskQueue< ExecSpace >::complete // Is a runnable task has finished executing and requested respawn. // Schedule the task for subsequent execution. - schedule( task ); + schedule_runnable( task ); } //---------------------------------------- else { @@ -556,18 +623,22 @@ void TaskQueue< ExecSpace >::complete TaskQueue::assign( & task , zero ); // This thread has exclusive access to the wait list so - // the concurrency-safe pop_task function is not needed. + // the concurrency-safe pop_ready_task function is not needed. // Schedule the tasks that have been waiting on the input 'task', // which may have been deleted. while ( x != end ) { + // Have exclusive access to 'x' until it is scheduled + // Set x->m_next = zero <= no dependence, not a respawn - // Set x->m_next = zero <= no dependence - - task_root_type * const next = - (task_root_type *) Kokkos::atomic_exchange( & x->m_next , zero ); + task_root_type * const next = x->m_next ; x->m_next = 0 ; - schedule( x ); + if ( task_root_type::Aggregate != x->m_task_type ) { + schedule_runnable( x ); + } + else { + schedule_aggregate( x ); + } x = next ; } diff --git a/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp b/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp index ff503cb27329c006aeb0b476c2dd54e09d43baa4..d72cde03fd2bb1ae40559c80d007f7a8836636c0 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Utilities.hpp @@ -45,6 +45,7 @@ #define KOKKOS_CORE_IMPL_UTILITIES_HPP #include <Kokkos_Macros.hpp> +#include <stdint.h> #include <type_traits> //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp index ad1b6dce39d03182d1187105d79a9cb8e239ac8e..93ff6c48a77d00e45e3028413d5c02f4020d65bc 100644 --- a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,52 +36,144 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ #include <Kokkos_Macros.hpp> + #include <impl/Kokkos_spinwait.hpp> +#include <Kokkos_Atomic.hpp> +#include <impl/Kokkos_BitOps.hpp> + /*--------------------------------------------------------------------------*/ -#if ( KOKKOS_ENABLE_ASM ) - #if defined( __arm__ ) || defined( __aarch64__ ) - /* No-operation instruction to idle the thread. */ - #define YIELD asm volatile("nop") +#if !defined( _WIN32 ) + #if defined( KOKKOS_ENABLE_ASM ) + #if defined( __arm__ ) || defined( __aarch64__ ) + /* No-operation instruction to idle the thread. */ + #define KOKKOS_INTERNAL_PAUSE + #else + /* Pause instruction to prevent excess processor bus usage */ + #define KOKKOS_INTERNAL_PAUSE asm volatile("pause\n":::"memory") + #endif + #define KOKKOS_INTERNAL_NOP2 asm volatile("nop\n" "nop\n") + #define KOKKOS_INTERNAL_NOP4 KOKKOS_INTERNAL_NOP2; KOKKOS_INTERNAL_NOP2 + #define KOKKOS_INTERNAL_NOP8 KOKKOS_INTERNAL_NOP4; KOKKOS_INTERNAL_NOP4; + #define KOKKOS_INTERNAL_NOP16 KOKKOS_INTERNAL_NOP8; KOKKOS_INTERNAL_NOP8; + #define KOKKOS_INTERNAL_NOP32 KOKKOS_INTERNAL_NOP16; KOKKOS_INTERNAL_NOP16; + namespace { + inline void kokkos_internal_yield( const unsigned i ) noexcept { + switch (Kokkos::Impl::bit_scan_reverse((i >> 2)+1u)) { + case 0u: KOKKOS_INTERNAL_NOP2; break; + case 1u: KOKKOS_INTERNAL_NOP4; break; + case 2u: KOKKOS_INTERNAL_NOP8; break; + case 3u: KOKKOS_INTERNAL_NOP16; break; + default: KOKKOS_INTERNAL_NOP32; + } + KOKKOS_INTERNAL_PAUSE; + } + } #else - /* Pause instruction to prevent excess processor bus usage */ - #define YIELD asm volatile("pause\n":::"memory") + #include <sched.h> + namespace { + inline void kokkos_internal_yield( const unsigned ) noexcept { + sched_yield(); + } + } + #endif +#else // defined( _WIN32 ) + #if defined ( KOKKOS_ENABLE_WINTHREAD ) + #include <process.h> + namespace { + inline void kokkos_internal_yield( const unsigned ) noexcept { + Sleep(0); + } + } + #elif defined( _MSC_VER ) + #define NOMINMAX + #include <winsock2.h> + #include <windows.h> + namespace { + inline void kokkos_internal_yield( const unsigned ) noexcept { + YieldProcessor(); + } + } + #else + #define KOKKOS_INTERNAL_PAUSE __asm__ __volatile__("pause\n":::"memory") + #define KOKKOS_INTERNAL_NOP2 __asm__ __volatile__("nop\n" "nop") + #define KOKKOS_INTERNAL_NOP4 KOKKOS_INTERNAL_NOP2; KOKKOS_INTERNAL_NOP2 + #define KOKKOS_INTERNAL_NOP8 KOKKOS_INTERNAL_NOP4; KOKKOS_INTERNAL_NOP4; + #define KOKKOS_INTERNAL_NOP16 KOKKOS_INTERNAL_NOP8; KOKKOS_INTERNAL_NOP8; + #define KOKKOS_INTERNAL_NOP32 KOKKOS_INTERNAL_NOP16; KOKKOS_INTERNAL_NOP16; + namespace { + inline void kokkos_internal_yield( const unsigned i ) noexcept { + switch (Kokkos::Impl::bit_scan_reverse((i >> 2)+1u)) { + case 0: KOKKOS_INTERNAL_NOP2; break; + case 1: KOKKOS_INTERNAL_NOP4; break; + case 2: KOKKOS_INTERNAL_NOP8; break; + case 3: KOKKOS_INTERNAL_NOP16; break; + default: KOKKOS_INTERNAL_NOP32; + } + KOKKOS_INTERNAL_PAUSE; + } + } #endif -#elif defined ( KOKKOS_ENABLE_WINTHREAD ) - #include <process.h> - #define YIELD Sleep(0) -#elif defined ( _WIN32) && defined (_MSC_VER) - /* Windows w/ Visual Studio */ - #define NOMINMAX - #include <winsock2.h> - #include <windows.h> -#define YIELD YieldProcessor(); -#elif defined ( _WIN32 ) - /* Windows w/ Intel*/ - #define YIELD __asm__ __volatile__("pause\n":::"memory") -#else - #include <sched.h> - #define YIELD sched_yield() #endif + /*--------------------------------------------------------------------------*/ namespace Kokkos { namespace Impl { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) -void spinwait( volatile int & flag , const int value ) + +void spinwait_while_equal( volatile int32_t & flag , const int32_t value ) +{ + Kokkos::store_fence(); + unsigned i = 0; + while ( value == flag ) { + kokkos_internal_yield(i); + ++i; + } + Kokkos::load_fence(); +} + +void spinwait_until_equal( volatile int32_t & flag , const int32_t value ) +{ + Kokkos::store_fence(); + unsigned i = 0; + while ( value != flag ) { + kokkos_internal_yield(i); + ++i; + } + Kokkos::load_fence(); +} + +void spinwait_while_equal( volatile int64_t & flag , const int64_t value ) { + Kokkos::store_fence(); + unsigned i = 0; while ( value == flag ) { - YIELD ; + kokkos_internal_yield(i); + ++i; + } + Kokkos::load_fence(); +} + +void spinwait_until_equal( volatile int64_t & flag , const int64_t value ) +{ + Kokkos::store_fence(); + unsigned i = 0; + while ( value != flag ) { + kokkos_internal_yield(i); + ++i; } + Kokkos::load_fence(); } + #endif } /* namespace Impl */ diff --git a/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp b/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp index cc87771faefcb8ad7716842890dbec4a9c1219a1..6e34b8a943d164eea1af317be66928a26a9e4ab2 100644 --- a/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -47,14 +47,30 @@ #include <Kokkos_Macros.hpp> +#include <cstdint> + namespace Kokkos { namespace Impl { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) -void spinwait( volatile int & flag , const int value ); + +void spinwait_while_equal( volatile int32_t & flag , const int32_t value ); +void spinwait_until_equal( volatile int32_t & flag , const int32_t value ); + +void spinwait_while_equal( volatile int64_t & flag , const int64_t value ); +void spinwait_until_equal( volatile int64_t & flag , const int64_t value ); #else + +KOKKOS_INLINE_FUNCTION +void spinwait_while_equal( volatile int32_t & , const int32_t ) {} +KOKKOS_INLINE_FUNCTION +void spinwait_until_equal( volatile int32_t & , const int32_t ) {} + +KOKKOS_INLINE_FUNCTION +void spinwait_while_equal( volatile int64_t & , const int64_t ) {} KOKKOS_INLINE_FUNCTION -void spinwait( volatile int & , const int ) {} +void spinwait_until_equal( volatile int64_t & , const int64_t ) {} + #endif } /* namespace Impl */ diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt index 795657fe876233c8ef7f962bdce12be4d0452e2f..caf6c50129f090cd13cd92e67a79880949e821a1 100644 --- a/lib/kokkos/core/unit_test/CMakeLists.txt +++ b/lib/kokkos/core/unit_test/CMakeLists.txt @@ -115,10 +115,31 @@ IF(Kokkos_ENABLE_OpenMP) ) ENDIF() -IF(Kokkos_ENABLE_QTHREAD) +IF(Kokkos_ENABLE_Qthreads) TRIBITS_ADD_EXECUTABLE_AND_TEST( - UnitTest_Qthread - SOURCES UnitTestMain.cpp TestQthread.cpp + UnitTest_Qthreads + SOURCES + UnitTestMain.cpp + qthreads/TestQthreads_Atomics.cpp + qthreads/TestQthreads_Other.cpp + qthreads/TestQthreads_Reductions.cpp + qthreads/TestQthreads_SubView_a.cpp + qthreads/TestQthreads_SubView_b.cpp + qthreads/TestQthreads_SubView_c01.cpp + qthreads/TestQthreads_SubView_c02.cpp + qthreads/TestQthreads_SubView_c03.cpp + qthreads/TestQthreads_SubView_c04.cpp + qthreads/TestQthreads_SubView_c05.cpp + qthreads/TestQthreads_SubView_c06.cpp + qthreads/TestQthreads_SubView_c07.cpp + qthreads/TestQthreads_SubView_c08.cpp + qthreads/TestQthreads_SubView_c09.cpp + qthreads/TestQthreads_SubView_c10.cpp + qthreads/TestQthreads_SubView_c11.cpp + qthreads/TestQthreads_SubView_c12.cpp + qthreads/TestQthreads_Team.cpp + qthreads/TestQthreads_ViewAPI_a.cpp + qthreads/TestQthreads_ViewAPI_b.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -194,4 +215,3 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( FAIL_REGULAR_EXPRESSION " FAILED " TESTONLYLIBS kokkos_gtest ) - diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile index cc59825fba85d17b67c0694de1198acd240587d9..d93830a28d9db5ae50306c70ae5187062a07c594 100644 --- a/lib/kokkos/core/unit_test/Makefile +++ b/lib/kokkos/core/unit_test/Makefile @@ -6,6 +6,7 @@ vpath %.cpp ${KOKKOS_PATH}/core/unit_test vpath %.cpp ${KOKKOS_PATH}/core/unit_test/serial vpath %.cpp ${KOKKOS_PATH}/core/unit_test/threads vpath %.cpp ${KOKKOS_PATH}/core/unit_test/openmp +vpath %.cpp ${KOKKOS_PATH}/core/unit_test/qthreads vpath %.cpp ${KOKKOS_PATH}/core/unit_test/cuda TEST_HEADERS = $(wildcard $(KOKKOS_PATH)/core/unit_test/*.hpp) @@ -35,15 +36,15 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA = TestCuda_Other.o TestCuda_Reductions_a.o TestCuda_Reductions_b.o TestCuda_Atomics.o TestCuda_Team.o TestCuda_Spaces.o OBJ_CUDA += TestCuda_SubView_a.o TestCuda_SubView_b.o ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - OBJ_OPENMP += TestCuda_SubView_c_all.o + OBJ_OPENMP += TestCuda_SubView_c_all.o else OBJ_CUDA += TestCuda_SubView_c01.o TestCuda_SubView_c02.o TestCuda_SubView_c03.o - OBJ_CUDA += TestCuda_SubView_c04.o TestCuda_SubView_c05.o TestCuda_SubView_c06.o - OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o + OBJ_CUDA += TestCuda_SubView_c04.o TestCuda_SubView_c05.o TestCuda_SubView_c06.o + OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o endif - OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o - OBJ_CUDA += TestCuda_ViewAPI_e.o TestCuda_ViewAPI_f.o TestCuda_ViewAPI_g.o TestCuda_ViewAPI_h.o + OBJ_CUDA += TestCuda_ViewAPI_a.o TestCuda_ViewAPI_b.o TestCuda_ViewAPI_c.o TestCuda_ViewAPI_d.o + OBJ_CUDA += TestCuda_ViewAPI_e.o TestCuda_ViewAPI_f.o TestCuda_ViewAPI_g.o TestCuda_ViewAPI_h.o OBJ_CUDA += TestCuda_ViewAPI_s.o OBJ_CUDA += UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_Cuda @@ -51,13 +52,13 @@ endif endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) - OBJ_THREADS = TestThreads_Other.o TestThreads_Reductions.o TestThreads_Atomics.o TestThreads_Team.o - OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o + OBJ_THREADS = TestThreads_Other.o TestThreads_Reductions.o TestThreads_Atomics.o TestThreads_Team.o + OBJ_THREADS += TestThreads_SubView_a.o TestThreads_SubView_b.o OBJ_THREADS += TestThreads_SubView_c01.o TestThreads_SubView_c02.o TestThreads_SubView_c03.o - OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o - OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o + OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o + OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o - OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o UnitTestMain.o gtest-all.o + OBJ_THREADS += TestThreads_ViewAPI_a.o TestThreads_ViewAPI_b.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_Threads TEST_TARGETS += test-threads endif @@ -66,11 +67,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) OBJ_OPENMP = TestOpenMP_Other.o TestOpenMP_Reductions.o TestOpenMP_Atomics.o TestOpenMP_Team.o OBJ_OPENMP += TestOpenMP_SubView_a.o TestOpenMP_SubView_b.o ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - OBJ_OPENMP += TestOpenMP_SubView_c_all.o + OBJ_OPENMP += TestOpenMP_SubView_c_all.o else OBJ_OPENMP += TestOpenMP_SubView_c01.o TestOpenMP_SubView_c02.o TestOpenMP_SubView_c03.o - OBJ_OPENMP += TestOpenMP_SubView_c04.o TestOpenMP_SubView_c05.o TestOpenMP_SubView_c06.o - OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o + OBJ_OPENMP += TestOpenMP_SubView_c04.o TestOpenMP_SubView_c05.o TestOpenMP_SubView_c06.o + OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o endif OBJ_OPENMP += TestOpenMP_ViewAPI_a.o TestOpenMP_ViewAPI_b.o UnitTestMain.o gtest-all.o @@ -78,28 +79,38 @@ endif TEST_TARGETS += test-openmp endif +ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1) + OBJ_QTHREADS = TestQthreads_Other.o TestQthreads_Reductions.o TestQthreads_Atomics.o TestQthreads_Team.o + OBJ_QTHREADS += TestQthreads_SubView_a.o TestQthreads_SubView_b.o +ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + OBJ_QTHREADS += TestQthreads_SubView_c_all.o +else + OBJ_QTHREADS += TestQthreads_SubView_c01.o TestQthreads_SubView_c02.o TestQthreads_SubView_c03.o + OBJ_QTHREADS += TestQthreads_SubView_c04.o TestQthreads_SubView_c05.o TestQthreads_SubView_c06.o + OBJ_QTHREADS += TestQthreads_SubView_c07.o TestQthreads_SubView_c08.o TestQthreads_SubView_c09.o + OBJ_QTHREADS += TestQthreads_SubView_c10.o TestQthreads_SubView_c11.o TestQthreads_SubView_c12.o +endif + OBJ_QTHREADS += TestQthreads_ViewAPI_a.o TestQthreads_ViewAPI_b.o UnitTestMain.o gtest-all.o + TARGETS += KokkosCore_UnitTest_Qthreads + TEST_TARGETS += test-qthreads +endif + ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) - OBJ_SERIAL = TestSerial_Other.o TestSerial_Reductions.o TestSerial_Atomics.o TestSerial_Team.o - OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o + OBJ_SERIAL = TestSerial_Other.o TestSerial_Reductions.o TestSerial_Atomics.o TestSerial_Team.o + OBJ_SERIAL += TestSerial_SubView_a.o TestSerial_SubView_b.o ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - OBJ_OPENMP += TestSerial_SubView_c_all.o + OBJ_OPENMP += TestSerial_SubView_c_all.o else OBJ_SERIAL += TestSerial_SubView_c01.o TestSerial_SubView_c02.o TestSerial_SubView_c03.o - OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o - OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o + OBJ_SERIAL += TestSerial_SubView_c04.o TestSerial_SubView_c05.o TestSerial_SubView_c06.o + OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o endif - OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o UnitTestMain.o gtest-all.o + OBJ_SERIAL += TestSerial_ViewAPI_a.o TestSerial_ViewAPI_b.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_Serial TEST_TARGETS += test-serial endif -ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) - OBJ_QTHREAD = TestQthread.o UnitTestMain.o gtest-all.o - TARGETS += KokkosCore_UnitTest_Qthread - TEST_TARGETS += test-qthread -endif - OBJ_HWLOC = TestHWLOC.o UnitTestMain.o gtest-all.o TARGETS += KokkosCore_UnitTest_HWLOC TEST_TARGETS += test-hwloc @@ -115,10 +126,6 @@ TARGETS += ${INITTESTS_TARGETS} INITTESTS_TEST_TARGETS := $(addprefix test-default-init-,${INITTESTS_NUMBERS}) TEST_TARGETS += ${INITTESTS_TEST_TARGETS} -OBJ_SYNCHRONIC = TestSynchronic.o UnitTestMain.o gtest-all.o -TARGETS += KokkosCore_UnitTest_Synchronic -TEST_TARGETS += test-synchronic - KokkosCore_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Cuda @@ -131,8 +138,8 @@ KokkosCore_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) KokkosCore_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Serial -KokkosCore_UnitTest_Qthread: $(OBJ_QTHREAD) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_QTHREAD) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Qthread +KokkosCore_UnitTest_Qthreads: $(OBJ_QTHREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_QTHREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Qthreads KokkosCore_UnitTest_HWLOC: $(OBJ_HWLOC) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_HWLOC) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_HWLOC @@ -146,9 +153,6 @@ KokkosCore_UnitTest_Default: $(OBJ_DEFAULT) $(KOKKOS_LINK_DEPENDS) ${INITTESTS_TARGETS}: KokkosCore_UnitTest_DefaultDeviceTypeInit_%: TestDefaultDeviceTypeInit_%.o UnitTestMain.o gtest-all.o $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) TestDefaultDeviceTypeInit_$*.o UnitTestMain.o gtest-all.o $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_DefaultDeviceTypeInit_$* -KokkosCore_UnitTest_Synchronic: $(OBJ_SYNCHRONIC) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SYNCHRONIC) $(KOKKOS_LIBS) $(LIB) -o KokkosCore_UnitTest_Synchronic - test-cuda: KokkosCore_UnitTest_Cuda ./KokkosCore_UnitTest_Cuda @@ -161,8 +165,8 @@ test-openmp: KokkosCore_UnitTest_OpenMP test-serial: KokkosCore_UnitTest_Serial ./KokkosCore_UnitTest_Serial -test-qthread: KokkosCore_UnitTest_Qthread - ./KokkosCore_UnitTest_Qthread +test-qthreads: KokkosCore_UnitTest_Qthreads + ./KokkosCore_UnitTest_Qthreads test-hwloc: KokkosCore_UnitTest_HWLOC ./KokkosCore_UnitTest_HWLOC @@ -176,9 +180,6 @@ test-default: KokkosCore_UnitTest_Default ${INITTESTS_TEST_TARGETS}: test-default-init-%: KokkosCore_UnitTest_DefaultDeviceTypeInit_% ./KokkosCore_UnitTest_DefaultDeviceTypeInit_$* -test-synchronic: KokkosCore_UnitTest_Synchronic - ./KokkosCore_UnitTest_Synchronic - build_all: $(TARGETS) test: $(TEST_TARGETS) @@ -193,4 +194,3 @@ clean: kokkos-clean gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc - diff --git a/lib/kokkos/core/unit_test/TestAggregate.hpp b/lib/kokkos/core/unit_test/TestAggregate.hpp index d22837f3ed7b67bccecfbe11ba4d71266a094616..f09cc5018cb698ec033639a326a29d8fffacec3f 100644 --- a/lib/kokkos/core/unit_test/TestAggregate.hpp +++ b/lib/kokkos/core/unit_test/TestAggregate.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -50,8 +50,6 @@ #include <sstream> #include <iostream> -/*--------------------------------------------------------------------------*/ - #include <impl/Kokkos_ViewArray.hpp> namespace Test { @@ -59,51 +57,68 @@ namespace Test { template< class DeviceType > void TestViewAggregate() { - typedef Kokkos::Array<double,32> value_type ; - - typedef Kokkos::Experimental::Impl:: - ViewDataAnalysis< value_type * , Kokkos::LayoutLeft , value_type > - analysis_1d ; + typedef Kokkos::Array< double, 32 > value_type; + typedef Kokkos::Experimental::Impl::ViewDataAnalysis< value_type *, Kokkos::LayoutLeft, value_type > analysis_1d; - static_assert( std::is_same< typename analysis_1d::specialize , Kokkos::Array<> >::value , "" ); + static_assert( std::is_same< typename analysis_1d::specialize, Kokkos::Array<> >::value, "" ); + typedef Kokkos::ViewTraits< value_type **, DeviceType > a32_traits; + typedef Kokkos::ViewTraits< typename a32_traits::scalar_array_type, DeviceType > flat_traits; - typedef Kokkos::ViewTraits< value_type ** , DeviceType > a32_traits ; - typedef Kokkos::ViewTraits< typename a32_traits::scalar_array_type , DeviceType > flat_traits ; + static_assert( std::is_same< typename a32_traits::specialize, Kokkos::Array<> >::value, "" ); + static_assert( std::is_same< typename a32_traits::value_type, value_type >::value, "" ); + static_assert( a32_traits::rank == 2, "" ); + static_assert( a32_traits::rank_dynamic == 2, "" ); - static_assert( std::is_same< typename a32_traits::specialize , Kokkos::Array<> >::value , "" ); - static_assert( std::is_same< typename a32_traits::value_type , value_type >::value , "" ); - static_assert( a32_traits::rank == 2 , "" ); - static_assert( a32_traits::rank_dynamic == 2 , "" ); + static_assert( std::is_same< typename flat_traits::specialize, void >::value, "" ); + static_assert( flat_traits::rank == 3, "" ); + static_assert( flat_traits::rank_dynamic == 2, "" ); + static_assert( flat_traits::dimension::N2 == 32, "" ); - static_assert( std::is_same< typename flat_traits::specialize , void >::value , "" ); - static_assert( flat_traits::rank == 3 , "" ); - static_assert( flat_traits::rank_dynamic == 2 , "" ); - static_assert( flat_traits::dimension::N2 == 32 , "" ); + typedef Kokkos::View< Kokkos::Array< double, 32 > **, DeviceType > a32_type; + typedef typename a32_type::array_type a32_flat_type; + static_assert( std::is_same< typename a32_type::value_type, value_type >::value, "" ); + static_assert( std::is_same< typename a32_type::pointer_type, double * >::value, "" ); + static_assert( a32_type::Rank == 2, "" ); + static_assert( a32_flat_type::Rank == 3, "" ); - typedef Kokkos::View< Kokkos::Array<double,32> ** , DeviceType > a32_type ; - - typedef typename a32_type::array_type a32_flat_type ; - - static_assert( std::is_same< typename a32_type::value_type , value_type >::value , "" ); - static_assert( std::is_same< typename a32_type::pointer_type , double * >::value , "" ); - static_assert( a32_type::Rank == 2 , "" ); - static_assert( a32_flat_type::Rank == 3 , "" ); - - a32_type x("test",4,5); + a32_type x( "test", 4, 5 ); a32_flat_type y( x ); - ASSERT_EQ( x.extent(0) , 4 ); - ASSERT_EQ( x.extent(1) , 5 ); - ASSERT_EQ( y.extent(0) , 4 ); - ASSERT_EQ( y.extent(1) , 5 ); - ASSERT_EQ( y.extent(2) , 32 ); -} - + ASSERT_EQ( x.extent( 0 ), 4 ); + ASSERT_EQ( x.extent( 1 ), 5 ); + ASSERT_EQ( y.extent( 0 ), 4 ); + ASSERT_EQ( y.extent( 1 ), 5 ); + ASSERT_EQ( y.extent( 2 ), 32 ); + + // Initialize arrays from brace-init-list as for std::array. + // + // Comment: Clang will issue the following warning if we don't use double + // braces here (one for initializing the Kokkos::Array and one for + // initializing the sub-aggreagate C-array data member), + // + // warning: suggest braces around initialization of subobject + // + // but single brace syntax would be valid as well. + Kokkos::Array< float, 2 > aggregate_initialization_syntax_1 = { { 1.41, 3.14 } }; + ASSERT_FLOAT_EQ( aggregate_initialization_syntax_1[0], 1.41 ); + ASSERT_FLOAT_EQ( aggregate_initialization_syntax_1[1], 3.14 ); + + Kokkos::Array< int, 3 > aggregate_initialization_syntax_2{ { 0, 1, 2 } }; // since C++11 + for ( int i = 0; i < 3; ++i ) { + ASSERT_EQ( aggregate_initialization_syntax_2[i], i ); + } + + // Note that this is a valid initialization. + Kokkos::Array< double, 3 > initialized_with_one_argument_missing = { { 255, 255 } }; + for (int i = 0; i < 2; ++i) { + ASSERT_DOUBLE_EQ( initialized_with_one_argument_missing[i], 255 ); + } + // But the following line would not compile +// Kokkos::Array< double, 3 > initialized_with_too_many{ { 1, 2, 3, 4 } }; } -/*--------------------------------------------------------------------------*/ -/*--------------------------------------------------------------------------*/ +} // namespace Test #endif /* #ifndef TEST_AGGREGATE_HPP */ diff --git a/lib/kokkos/core/unit_test/TestAtomic.hpp b/lib/kokkos/core/unit_test/TestAtomic.hpp index e948723574b48b2a64ee66c487062e34c0ccf29b..ff77b8dca6f0437393bacca9d42ed73d359e44d5 100644 --- a/lib/kokkos/core/unit_test/TestAtomic.hpp +++ b/lib/kokkos/core/unit_test/TestAtomic.hpp @@ -45,116 +45,129 @@ namespace TestAtomic { -// Struct for testing arbitrary size atomics +// Struct for testing arbitrary size atomics. -template<int N> +template< int N > struct SuperScalar { double val[N]; KOKKOS_INLINE_FUNCTION SuperScalar() { - for(int i=0; i<N; i++) + for ( int i = 0; i < N; i++ ) { val[i] = 0.0; + } } KOKKOS_INLINE_FUNCTION - SuperScalar(const SuperScalar& src) { - for(int i=0; i<N; i++) + SuperScalar( const SuperScalar & src ) { + for ( int i = 0; i < N; i++ ) { val[i] = src.val[i]; + } } KOKKOS_INLINE_FUNCTION - SuperScalar(const volatile SuperScalar& src) { - for(int i=0; i<N; i++) + SuperScalar( const volatile SuperScalar & src ) { + for ( int i = 0; i < N; i++ ) { val[i] = src.val[i]; + } } KOKKOS_INLINE_FUNCTION - SuperScalar& operator = (const SuperScalar& src) { - for(int i=0; i<N; i++) + SuperScalar& operator=( const SuperScalar & src ) { + for ( int i = 0; i < N; i++ ) { val[i] = src.val[i]; + } return *this; } KOKKOS_INLINE_FUNCTION - SuperScalar& operator = (const volatile SuperScalar& src) { - for(int i=0; i<N; i++) + SuperScalar& operator=( const volatile SuperScalar & src ) { + for ( int i = 0; i < N; i++ ) { val[i] = src.val[i]; + } return *this; } KOKKOS_INLINE_FUNCTION - void operator = (const SuperScalar& src) volatile { - for(int i=0; i<N; i++) + void operator=( const SuperScalar & src ) volatile { + for ( int i = 0; i < N; i++ ) { val[i] = src.val[i]; + } } KOKKOS_INLINE_FUNCTION - SuperScalar operator + (const SuperScalar& src) { + SuperScalar operator+( const SuperScalar & src ) { SuperScalar tmp = *this; - for(int i=0; i<N; i++) + for ( int i = 0; i < N; i++ ) { tmp.val[i] += src.val[i]; + } return tmp; } KOKKOS_INLINE_FUNCTION - SuperScalar& operator += (const double& src) { - for(int i=0; i<N; i++) - val[i] += 1.0*(i+1)*src; + SuperScalar& operator+=( const double & src ) { + for ( int i = 0; i < N; i++ ) { + val[i] += 1.0 * ( i + 1 ) * src; + } return *this; } KOKKOS_INLINE_FUNCTION - SuperScalar& operator += (const SuperScalar& src) { - for(int i=0; i<N; i++) + SuperScalar& operator+=( const SuperScalar & src ) { + for ( int i = 0; i < N; i++ ) { val[i] += src.val[i]; + } return *this; } KOKKOS_INLINE_FUNCTION - bool operator == (const SuperScalar& src) { + bool operator==( const SuperScalar & src ) { bool compare = true; - for(int i=0; i<N; i++) - compare = compare && ( val[i] == src.val[i]); + for( int i = 0; i < N; i++ ) { + compare = compare && ( val[i] == src.val[i] ); + } return compare; } KOKKOS_INLINE_FUNCTION - bool operator != (const SuperScalar& src) { + bool operator!=( const SuperScalar & src ) { bool compare = true; - for(int i=0; i<N; i++) - compare = compare && ( val[i] == src.val[i]); + for ( int i = 0; i < N; i++ ) { + compare = compare && ( val[i] == src.val[i] ); + } return !compare; } - - KOKKOS_INLINE_FUNCTION - SuperScalar(const double& src) { - for(int i=0; i<N; i++) - val[i] = 1.0 * (i+1) * src; + SuperScalar( const double & src ) { + for ( int i = 0; i < N; i++ ) { + val[i] = 1.0 * ( i + 1 ) * src; + } } - }; -template<int N> -std::ostream& operator<<(std::ostream& os, const SuperScalar<N>& dt) +template< int N > +std::ostream & operator<<( std::ostream & os, const SuperScalar< N > & dt ) { - os << "{ "; - for(int i=0;i<N-1;i++) - os << dt.val[i] << ", "; - os << dt.val[N-1] << "}"; - return os; + os << "{ "; + for ( int i = 0; i < N - 1; i++ ) { + os << dt.val[i] << ", "; + } + os << dt.val[N-1] << "}"; + + return os; } -template<class T,class DEVICE_TYPE> +template< class T, class DEVICE_TYPE > struct ZeroFunctor { typedef DEVICE_TYPE execution_space; - typedef typename Kokkos::View<T,execution_space> type; - typedef typename Kokkos::View<T,execution_space>::HostMirror h_type; + typedef typename Kokkos::View< T, execution_space > type; + typedef typename Kokkos::View< T, execution_space >::HostMirror h_type; + type data; + KOKKOS_INLINE_FUNCTION - void operator()(int) const { + void operator()( int ) const { data() = 0; } }; @@ -163,47 +176,53 @@ struct ZeroFunctor { //--------------atomic_fetch_add--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct AddFunctor{ +template< class T, class DEVICE_TYPE > +struct AddFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_add(&data(),(T)1); + void operator()( int ) const { + Kokkos::atomic_fetch_add( &data(), (T) 1 ); } }; -template<class T, class execution_space > -T AddLoop(int loop) { - struct ZeroFunctor<T,execution_space> f_zero; - typename ZeroFunctor<T,execution_space>::type data("Data"); - typename ZeroFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T AddLoop( int loop ) { + struct ZeroFunctor< T, execution_space > f_zero; + typename ZeroFunctor< T, execution_space >::type data( "Data" ); + typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" ); + f_zero.data = data; - Kokkos::parallel_for(1,f_zero); + Kokkos::parallel_for( 1, f_zero ); execution_space::fence(); - struct AddFunctor<T,execution_space> f_add; + struct AddFunctor< T, execution_space > f_add; + f_add.data = data; - Kokkos::parallel_for(loop,f_add); + Kokkos::parallel_for( loop, f_add ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T AddLoopSerial(int loop) { +template< class T > +T AddLoopSerial( int loop ) { T* data = new T[1]; data[0] = 0; - for(int i=0;i<loop;i++) - *data+=(T)1; + for ( int i = 0; i < loop; i++ ) { + *data += (T) 1; + } T val = *data; delete [] data; + return val; } @@ -211,65 +230,69 @@ T AddLoopSerial(int loop) { //--------------atomic_compare_exchange----------------- //------------------------------------------------------ -template<class T,class DEVICE_TYPE> -struct CASFunctor{ +template< class T, class DEVICE_TYPE > +struct CASFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - T old = data(); - T newval, assumed; - do { - assumed = old; - newval = assumed + (T)1; - old = Kokkos::atomic_compare_exchange(&data(), assumed, newval); - } - while( old != assumed ); + void operator()( int ) const { + T old = data(); + T newval, assumed; + + do { + assumed = old; + newval = assumed + (T) 1; + old = Kokkos::atomic_compare_exchange( &data(), assumed, newval ); + } while( old != assumed ); } }; -template<class T, class execution_space > -T CASLoop(int loop) { - struct ZeroFunctor<T,execution_space> f_zero; - typename ZeroFunctor<T,execution_space>::type data("Data"); - typename ZeroFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T CASLoop( int loop ) { + struct ZeroFunctor< T, execution_space > f_zero; + typename ZeroFunctor< T, execution_space >::type data( "Data" ); + typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" ); + f_zero.data = data; - Kokkos::parallel_for(1,f_zero); + Kokkos::parallel_for( 1, f_zero ); execution_space::fence(); - struct CASFunctor<T,execution_space> f_cas; + struct CASFunctor< T, execution_space > f_cas; + f_cas.data = data; - Kokkos::parallel_for(loop,f_cas); + Kokkos::parallel_for( loop, f_cas ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); return val; } -template<class T> -T CASLoopSerial(int loop) { +template< class T > +T CASLoopSerial( int loop ) { T* data = new T[1]; data[0] = 0; - for(int i=0;i<loop;i++) { - T assumed; - T newval; - T old; - do { - assumed = *data; - newval = assumed + (T)1; - old = *data; - *data = newval; - } - while(!(assumed==old)); + for ( int i = 0; i < loop; i++ ) { + T assumed; + T newval; + T old; + + do { + assumed = *data; + newval = assumed + (T) 1; + old = *data; + *data = newval; + } while( !( assumed == old ) ); } T val = *data; delete [] data; + return val; } @@ -277,109 +300,119 @@ T CASLoopSerial(int loop) { //--------------atomic_exchange----------------- //---------------------------------------------- -template<class T,class DEVICE_TYPE> -struct ExchFunctor{ +template< class T, class DEVICE_TYPE > +struct ExchFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data, data2; KOKKOS_INLINE_FUNCTION - void operator()(int i) const { - T old = Kokkos::atomic_exchange(&data(),(T)i); - Kokkos::atomic_fetch_add(&data2(),old); + void operator()( int i ) const { + T old = Kokkos::atomic_exchange( &data(), (T) i ); + Kokkos::atomic_fetch_add( &data2(), old ); } }; -template<class T, class execution_space > -T ExchLoop(int loop) { - struct ZeroFunctor<T,execution_space> f_zero; - typename ZeroFunctor<T,execution_space>::type data("Data"); - typename ZeroFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T ExchLoop( int loop ) { + struct ZeroFunctor< T, execution_space > f_zero; + typename ZeroFunctor< T, execution_space >::type data( "Data" ); + typename ZeroFunctor< T, execution_space >::h_type h_data( "HData" ); + f_zero.data = data; - Kokkos::parallel_for(1,f_zero); + Kokkos::parallel_for( 1, f_zero ); execution_space::fence(); - typename ZeroFunctor<T,execution_space>::type data2("Data"); - typename ZeroFunctor<T,execution_space>::h_type h_data2("HData"); + typename ZeroFunctor< T, execution_space >::type data2( "Data" ); + typename ZeroFunctor< T, execution_space >::h_type h_data2( "HData" ); + f_zero.data = data2; - Kokkos::parallel_for(1,f_zero); + Kokkos::parallel_for( 1, f_zero ); execution_space::fence(); - struct ExchFunctor<T,execution_space> f_exch; + struct ExchFunctor< T, execution_space > f_exch; + f_exch.data = data; f_exch.data2 = data2; - Kokkos::parallel_for(loop,f_exch); + Kokkos::parallel_for( loop, f_exch ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); - Kokkos::deep_copy(h_data2,data2); + Kokkos::deep_copy( h_data, data ); + Kokkos::deep_copy( h_data2, data2 ); T val = h_data() + h_data2(); return val; } -template<class T> -T ExchLoopSerial(typename std::conditional<!std::is_same<T,Kokkos::complex<double> >::value,int,void>::type loop) { +template< class T > +T ExchLoopSerial( typename std::conditional< !std::is_same< T, Kokkos::complex<double> >::value, int, void >::type loop ) { T* data = new T[1]; T* data2 = new T[1]; data[0] = 0; data2[0] = 0; - for(int i=0;i<loop;i++) { - T old = *data; - *data=(T) i; - *data2+=old; + + for ( int i = 0; i < loop; i++ ) { + T old = *data; + *data = (T) i; + *data2 += old; } T val = *data2 + *data; delete [] data; delete [] data2; + return val; } -template<class T> -T ExchLoopSerial(typename std::conditional<std::is_same<T,Kokkos::complex<double> >::value,int,void>::type loop) { +template< class T > +T ExchLoopSerial( typename std::conditional< std::is_same< T, Kokkos::complex<double> >::value, int, void >::type loop ) { T* data = new T[1]; T* data2 = new T[1]; data[0] = 0; data2[0] = 0; - for(int i=0;i<loop;i++) { - T old = *data; - data->real() = (static_cast<double>(i)); - data->imag() = 0; - *data2+=old; + + for ( int i = 0; i < loop; i++ ) { + T old = *data; + data->real() = ( static_cast<double>( i ) ); + data->imag() = 0; + *data2 += old; } T val = *data2 + *data; delete [] data; delete [] data2; + return val; } -template<class T, class DeviceType > -T LoopVariant(int loop, int test) { - switch (test) { - case 1: return AddLoop<T,DeviceType>(loop); - case 2: return CASLoop<T,DeviceType>(loop); - case 3: return ExchLoop<T,DeviceType>(loop); +template< class T, class DeviceType > +T LoopVariant( int loop, int test ) { + switch ( test ) { + case 1: return AddLoop< T, DeviceType >( loop ); + case 2: return CASLoop< T, DeviceType >( loop ); + case 3: return ExchLoop< T, DeviceType >( loop ); } + return 0; } -template<class T> -T LoopVariantSerial(int loop, int test) { - switch (test) { - case 1: return AddLoopSerial<T>(loop); - case 2: return CASLoopSerial<T>(loop); - case 3: return ExchLoopSerial<T>(loop); +template< class T > +T LoopVariantSerial( int loop, int test ) { + switch ( test ) { + case 1: return AddLoopSerial< T >( loop ); + case 2: return CASLoopSerial< T >( loop ); + case 3: return ExchLoopSerial< T >( loop ); } + return 0; } -template<class T,class DeviceType> -bool Loop(int loop, int test) +template< class T, class DeviceType > +bool Loop( int loop, int test ) { - T res = LoopVariant<T,DeviceType>(loop,test); - T resSerial = LoopVariantSerial<T>(loop,test); + T res = LoopVariant< T, DeviceType >( loop, test ); + T resSerial = LoopVariantSerial< T >( loop, test ); bool passed = true; @@ -387,16 +420,14 @@ bool Loop(int loop, int test) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = " << test << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - - return passed ; -} - + return passed; } +} // namespace TestAtomic diff --git a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp index 7f1519045187c535c586659e757eeb24609ccb50..e3ceca404ff12c1c9e5da04bf70d183fee87dfdd 100644 --- a/lib/kokkos/core/unit_test/TestAtomicOperations.hpp +++ b/lib/kokkos/core/unit_test/TestAtomicOperations.hpp @@ -49,14 +49,16 @@ namespace TestAtomicOperations { //--------------zero_functor--------------------- //----------------------------------------------- -template<class T,class DEVICE_TYPE> +template< class T, class DEVICE_TYPE > struct ZeroFunctor { typedef DEVICE_TYPE execution_space; - typedef typename Kokkos::View<T,execution_space> type; - typedef typename Kokkos::View<T,execution_space>::HostMirror h_type; + typedef typename Kokkos::View< T, execution_space > type; + typedef typename Kokkos::View< T, execution_space >::HostMirror h_type; + type data; + KOKKOS_INLINE_FUNCTION - void operator()(int) const { + void operator()( int ) const { data() = 0; } }; @@ -65,78 +67,84 @@ struct ZeroFunctor { //--------------init_functor--------------------- //----------------------------------------------- -template<class T,class DEVICE_TYPE> +template< class T, class DEVICE_TYPE > struct InitFunctor { typedef DEVICE_TYPE execution_space; - typedef typename Kokkos::View<T,execution_space> type; - typedef typename Kokkos::View<T,execution_space>::HostMirror h_type; + typedef typename Kokkos::View< T, execution_space > type; + typedef typename Kokkos::View< T, execution_space >::HostMirror h_type; + type data; - T init_value ; + T init_value; + KOKKOS_INLINE_FUNCTION - void operator()(int) const { + void operator()( int ) const { data() = init_value; } - InitFunctor(T _init_value) : init_value(_init_value) {} + InitFunctor( T _init_value ) : init_value( _init_value ) {} }; - //--------------------------------------------------- //--------------atomic_fetch_max--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct MaxFunctor{ +template< class T, class DEVICE_TYPE > +struct MaxFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - //Kokkos::atomic_fetch_max(&data(),(T)1); - Kokkos::atomic_fetch_max(&data(),(T)i1); + void operator()( int ) const { + //Kokkos::atomic_fetch_max( &data(), (T) 1 ); + Kokkos::atomic_fetch_max( &data(), (T) i1 ); } - MaxFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + MaxFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template<class T, class execution_space > -T MaxAtomic(T i0 , T i1) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T MaxAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct MaxFunctor<T,execution_space> f(i0,i1); + struct MaxFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T MaxAtomicCheck(T i0 , T i1) { +template< class T > +T MaxAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = (i0 > i1 ? i0 : i1) ; + *data = ( i0 > i1 ? i0 : i1 ); T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool MaxAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool MaxAtomicTest( T i0, T i1 ) { - T res = MaxAtomic<T,DeviceType>(i0,i1); - T resSerial = MaxAtomicCheck<T>(i0,i1); + T res = MaxAtomic< T, DeviceType >( i0, i1 ); + T resSerial = MaxAtomicCheck<T>( i0, i1 ); bool passed = true; @@ -144,71 +152,77 @@ bool MaxAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = MaxAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_min--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct MinFunctor{ +template< class T, class DEVICE_TYPE > +struct MinFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_min(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_min( &data(), (T) i1 ); } - MinFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + MinFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template<class T, class execution_space > -T MinAtomic(T i0 , T i1) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T MinAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct MinFunctor<T,execution_space> f(i0,i1); + struct MinFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T MinAtomicCheck(T i0 , T i1) { +template< class T > +T MinAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = (i0 < i1 ? i0 : i1) ; + *data = ( i0 < i1 ? i0 : i1 ); T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool MinAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool MinAtomicTest( T i0, T i1 ) { - T res = MinAtomic<T,DeviceType>(i0,i1); - T resSerial = MinAtomicCheck<T>(i0,i1); + T res = MinAtomic< T, DeviceType >( i0, i1 ); + T resSerial = MinAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -216,55 +230,60 @@ bool MinAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = MinAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_increment--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct IncFunctor{ +template< class T, class DEVICE_TYPE > +struct IncFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_increment(&data()); + void operator()( int ) const { + Kokkos::atomic_increment( &data() ); } - IncFunctor( T _i0 ) : i0(_i0) {} + + IncFunctor( T _i0 ) : i0( _i0 ) {} }; -template<class T, class execution_space > -T IncAtomic(T i0) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T IncAtomic( T i0 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct IncFunctor<T,execution_space> f(i0); + struct IncFunctor< T, execution_space > f( i0 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T IncAtomicCheck(T i0) { +template< class T > +T IncAtomicCheck( T i0 ) { T* data = new T[1]; data[0] = 0; @@ -272,14 +291,15 @@ T IncAtomicCheck(T i0) { T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool IncAtomicTest(T i0) +template< class T, class DeviceType > +bool IncAtomicTest( T i0 ) { - T res = IncAtomic<T,DeviceType>(i0); - T resSerial = IncAtomicCheck<T>(i0); + T res = IncAtomic< T, DeviceType >( i0 ); + T resSerial = IncAtomicCheck< T >( i0 ); bool passed = true; @@ -287,55 +307,60 @@ bool IncAtomicTest(T i0) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = IncAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_decrement--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct DecFunctor{ +template< class T, class DEVICE_TYPE > +struct DecFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_decrement(&data()); + void operator()( int ) const { + Kokkos::atomic_decrement( &data() ); } - DecFunctor( T _i0 ) : i0(_i0) {} + + DecFunctor( T _i0 ) : i0( _i0 ) {} }; -template<class T, class execution_space > -T DecAtomic(T i0) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T DecAtomic( T i0 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct DecFunctor<T,execution_space> f(i0); + struct DecFunctor< T, execution_space > f( i0 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T DecAtomicCheck(T i0) { +template< class T > +T DecAtomicCheck( T i0 ) { T* data = new T[1]; data[0] = 0; @@ -343,14 +368,15 @@ T DecAtomicCheck(T i0) { T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool DecAtomicTest(T i0) +template< class T, class DeviceType > +bool DecAtomicTest( T i0 ) { - T res = DecAtomic<T,DeviceType>(i0); - T resSerial = DecAtomicCheck<T>(i0); + T res = DecAtomic< T, DeviceType >( i0 ); + T resSerial = DecAtomicCheck< T >( i0 ); bool passed = true; @@ -358,71 +384,77 @@ bool DecAtomicTest(T i0) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = DecAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_mul--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct MulFunctor{ +template< class T, class DEVICE_TYPE > +struct MulFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_mul(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_mul( &data(), (T) i1 ); } - MulFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + MulFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template<class T, class execution_space > -T MulAtomic(T i0 , T i1) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T MulAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct MulFunctor<T,execution_space> f(i0,i1); + struct MulFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T MulAtomicCheck(T i0 , T i1) { +template< class T > +T MulAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0*i1 ; + *data = i0*i1; T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool MulAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool MulAtomicTest( T i0, T i1 ) { - T res = MulAtomic<T,DeviceType>(i0,i1); - T resSerial = MulAtomicCheck<T>(i0,i1); + T res = MulAtomic< T, DeviceType >( i0, i1 ); + T resSerial = MulAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -430,71 +462,77 @@ bool MulAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = MulAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_div--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct DivFunctor{ +template< class T, class DEVICE_TYPE > +struct DivFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_div(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_div( &data(), (T) i1 ); } - DivFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + DivFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template<class T, class execution_space > -T DivAtomic(T i0 , T i1) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T DivAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct DivFunctor<T,execution_space> f(i0,i1); + struct DivFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T DivAtomicCheck(T i0 , T i1) { +template< class T > +T DivAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0/i1 ; + *data = i0 / i1; T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool DivAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool DivAtomicTest( T i0, T i1 ) { - T res = DivAtomic<T,DeviceType>(i0,i1); - T resSerial = DivAtomicCheck<T>(i0,i1); + T res = DivAtomic< T, DeviceType >( i0, i1 ); + T resSerial = DivAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -502,71 +540,77 @@ bool DivAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = DivAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_mod--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct ModFunctor{ +template< class T, class DEVICE_TYPE > +struct ModFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_mod(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_mod( &data(), (T) i1 ); } - ModFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + ModFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template<class T, class execution_space > -T ModAtomic(T i0 , T i1) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T ModAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct ModFunctor<T,execution_space> f(i0,i1); + struct ModFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T ModAtomicCheck(T i0 , T i1) { +template< class T > +T ModAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0%i1 ; + *data = i0 % i1; T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool ModAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool ModAtomicTest( T i0, T i1 ) { - T res = ModAtomic<T,DeviceType>(i0,i1); - T resSerial = ModAtomicCheck<T>(i0,i1); + T res = ModAtomic< T, DeviceType >( i0, i1 ); + T resSerial = ModAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -574,71 +618,77 @@ bool ModAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = ModAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_and--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct AndFunctor{ +template< class T, class DEVICE_TYPE > +struct AndFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_and(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_and( &data(), (T) i1 ); } - AndFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + AndFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template<class T, class execution_space > -T AndAtomic(T i0 , T i1) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T AndAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct AndFunctor<T,execution_space> f(i0,i1); + struct AndFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T AndAtomicCheck(T i0 , T i1) { +template< class T > +T AndAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0&i1 ; + *data = i0 & i1; T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool AndAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool AndAtomicTest( T i0, T i1 ) { - T res = AndAtomic<T,DeviceType>(i0,i1); - T resSerial = AndAtomicCheck<T>(i0,i1); + T res = AndAtomic< T, DeviceType >( i0, i1 ); + T resSerial = AndAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -646,71 +696,77 @@ bool AndAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = AndAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_or---------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct OrFunctor{ +template< class T, class DEVICE_TYPE > +struct OrFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_or(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_or( &data(), (T) i1 ); } - OrFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + OrFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template<class T, class execution_space > -T OrAtomic(T i0 , T i1) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T OrAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct OrFunctor<T,execution_space> f(i0,i1); + struct OrFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T OrAtomicCheck(T i0 , T i1) { +template< class T > +T OrAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0|i1 ; + *data = i0 | i1; T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool OrAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool OrAtomicTest( T i0, T i1 ) { - T res = OrAtomic<T,DeviceType>(i0,i1); - T resSerial = OrAtomicCheck<T>(i0,i1); + T res = OrAtomic< T, DeviceType >( i0, i1 ); + T resSerial = OrAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -718,71 +774,77 @@ bool OrAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = OrAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_xor--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct XorFunctor{ +template< class T, class DEVICE_TYPE > +struct XorFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_xor(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_xor( &data(), (T) i1 ); } - XorFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + XorFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template<class T, class execution_space > -T XorAtomic(T i0 , T i1) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T XorAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct XorFunctor<T,execution_space> f(i0,i1); + struct XorFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T XorAtomicCheck(T i0 , T i1) { +template< class T > +T XorAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0^i1 ; + *data = i0 ^ i1; T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool XorAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool XorAtomicTest( T i0, T i1 ) { - T res = XorAtomic<T,DeviceType>(i0,i1); - T resSerial = XorAtomicCheck<T>(i0,i1); + T res = XorAtomic< T, DeviceType >( i0, i1 ); + T resSerial = XorAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -790,71 +852,77 @@ bool XorAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = XorAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_lshift--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct LShiftFunctor{ +template< class T, class DEVICE_TYPE > +struct LShiftFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_lshift(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_lshift( &data(), (T) i1 ); } - LShiftFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + LShiftFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template<class T, class execution_space > -T LShiftAtomic(T i0 , T i1) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T LShiftAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct LShiftFunctor<T,execution_space> f(i0,i1); + struct LShiftFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T LShiftAtomicCheck(T i0 , T i1) { +template< class T > +T LShiftAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0<<i1 ; + *data = i0 << i1; T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool LShiftAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool LShiftAtomicTest( T i0, T i1 ) { - T res = LShiftAtomic<T,DeviceType>(i0,i1); - T resSerial = LShiftAtomicCheck<T>(i0,i1); + T res = LShiftAtomic< T, DeviceType >( i0, i1 ); + T resSerial = LShiftAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -862,71 +930,77 @@ bool LShiftAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = LShiftAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } //--------------------------------------------------- //--------------atomic_fetch_rshift--------------------- //--------------------------------------------------- -template<class T,class DEVICE_TYPE> -struct RShiftFunctor{ +template< class T, class DEVICE_TYPE > +struct RShiftFunctor { typedef DEVICE_TYPE execution_space; - typedef Kokkos::View<T,execution_space> type; + typedef Kokkos::View< T, execution_space > type; + type data; T i0; T i1; KOKKOS_INLINE_FUNCTION - void operator()(int) const { - Kokkos::atomic_fetch_rshift(&data(),(T)i1); + void operator()( int ) const { + Kokkos::atomic_fetch_rshift( &data(), (T) i1 ); } - RShiftFunctor( T _i0 , T _i1 ) : i0(_i0) , i1(_i1) {} + + RShiftFunctor( T _i0, T _i1 ) : i0( _i0 ), i1( _i1 ) {} }; -template<class T, class execution_space > -T RShiftAtomic(T i0 , T i1) { - struct InitFunctor<T,execution_space> f_init(i0); - typename InitFunctor<T,execution_space>::type data("Data"); - typename InitFunctor<T,execution_space>::h_type h_data("HData"); +template< class T, class execution_space > +T RShiftAtomic( T i0, T i1 ) { + struct InitFunctor< T, execution_space > f_init( i0 ); + typename InitFunctor< T, execution_space >::type data( "Data" ); + typename InitFunctor< T, execution_space >::h_type h_data( "HData" ); + f_init.data = data; - Kokkos::parallel_for(1,f_init); + Kokkos::parallel_for( 1, f_init ); execution_space::fence(); - struct RShiftFunctor<T,execution_space> f(i0,i1); + struct RShiftFunctor< T, execution_space > f( i0, i1 ); + f.data = data; - Kokkos::parallel_for(1,f); + Kokkos::parallel_for( 1, f ); execution_space::fence(); - Kokkos::deep_copy(h_data,data); + Kokkos::deep_copy( h_data, data ); T val = h_data(); + return val; } -template<class T> -T RShiftAtomicCheck(T i0 , T i1) { +template< class T > +T RShiftAtomicCheck( T i0, T i1 ) { T* data = new T[1]; data[0] = 0; - *data = i0>>i1 ; + *data = i0 >> i1; T val = *data; delete [] data; + return val; } -template<class T,class DeviceType> -bool RShiftAtomicTest(T i0, T i1) +template< class T, class DeviceType > +bool RShiftAtomicTest( T i0, T i1 ) { - T res = RShiftAtomic<T,DeviceType>(i0,i1); - T resSerial = RShiftAtomicCheck<T>(i0,i1); + T res = RShiftAtomic< T, DeviceType >( i0, i1 ); + T resSerial = RShiftAtomicCheck< T >( i0, i1 ); bool passed = true; @@ -934,52 +1008,52 @@ bool RShiftAtomicTest(T i0, T i1) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = RShiftAtomicTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //--------------atomic_test_control------------------ //--------------------------------------------------- -template<class T,class DeviceType> -bool AtomicOperationsTestIntegralType( int i0 , int i1 , int test ) +template< class T, class DeviceType > +bool AtomicOperationsTestIntegralType( int i0, int i1, int test ) { - switch (test) { - case 1: return MaxAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 2: return MinAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 3: return MulAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 4: return DivAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 5: return ModAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 6: return AndAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 7: return OrAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 8: return XorAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 9: return LShiftAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 10: return RShiftAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 11: return IncAtomicTest<T,DeviceType>( (T)i0 ); - case 12: return DecAtomicTest<T,DeviceType>( (T)i0 ); + switch ( test ) { + case 1: return MaxAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 2: return MinAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 3: return MulAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 4: return DivAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 5: return ModAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 6: return AndAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 7: return OrAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 8: return XorAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 9: return LShiftAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 10: return RShiftAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 11: return IncAtomicTest< T, DeviceType >( (T) i0 ); + case 12: return DecAtomicTest< T, DeviceType >( (T) i0 ); } + return 0; } -template<class T,class DeviceType> -bool AtomicOperationsTestNonIntegralType( int i0 , int i1 , int test ) +template< class T, class DeviceType > +bool AtomicOperationsTestNonIntegralType( int i0, int i1, int test ) { - switch (test) { - case 1: return MaxAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 2: return MinAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 3: return MulAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); - case 4: return DivAtomicTest<T,DeviceType>( (T)i0 , (T)i1 ); + switch ( test ) { + case 1: return MaxAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 2: return MinAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 3: return MulAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); + case 4: return DivAtomicTest< T, DeviceType >( (T) i0, (T) i1 ); } + return 0; } -} // namespace - +} // namespace TestAtomicOperations diff --git a/lib/kokkos/core/unit_test/TestAtomicViews.hpp b/lib/kokkos/core/unit_test/TestAtomicViews.hpp index 739492d32f806a80d1b64f10e3d0ba887f627acd..71080e5c8216aecd01985139c37bb68931139929 100644 --- a/lib/kokkos/core/unit_test/TestAtomicViews.hpp +++ b/lib/kokkos/core/unit_test/TestAtomicViews.hpp @@ -49,56 +49,52 @@ namespace TestAtomicViews { //-----------atomic view api tests----------------- //------------------------------------------------- -template< class T , class ... P > -size_t allocation_count( const Kokkos::View<T,P...> & view ) +template< class T, class ... P > +size_t allocation_count( const Kokkos::View< T, P... > & view ) { const size_t card = view.size(); const size_t alloc = view.span(); - const int memory_span = Kokkos::View<int*>::required_allocation_size(100); + const int memory_span = Kokkos::View< int* >::required_allocation_size( 100 ); - return (card <= alloc && memory_span == 400) ? alloc : 0 ; + return ( card <= alloc && memory_span == 400 ) ? alloc : 0; } -template< class DataType , - class DeviceType , +template< class DataType, + class DeviceType, unsigned Rank = Kokkos::ViewTraits< DataType >::rank > -struct TestViewOperator_LeftAndRight ; +struct TestViewOperator_LeftAndRight; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 1 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } + { update = 0; } + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > left_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space, Kokkos::MemoryTraits< Kokkos::Atomic > > left_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > right_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space, Kokkos::MemoryTraits< Kokkos::Atomic > > right_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutStride, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > stride_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutStride, execution_space, Kokkos::MemoryTraits< Kokkos::Atomic >> stride_view ; - - left_view left ; - right_view right ; - stride_view left_stride ; - stride_view right_stride ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + stride_view left_stride; + stride_view right_stride; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -111,357 +107,338 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { - // below checks that values match, but unable to check the references - // - should this be able to be checked? - if ( left(i0) != left(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( right(i0) != right(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( left(i0) != left_stride(i0) ) { update |= 4 ; } - if ( right(i0) != right_stride(i0) ) { update |= 8 ; } - /* - if ( & left(i0) != & left(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( & right(i0) != & right(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( & left(i0) != & left_stride(i0) ) { update |= 4 ; } - if ( & right(i0) != & right_stride(i0) ) { update |= 8 ; } - */ + // Below checks that values match, but unable to check the references. + // Should this be able to be checked? + if ( left( i0 ) != left( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( right( i0 ) != right( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( left( i0 ) != left_stride( i0 ) ) { update |= 4; } + if ( right( i0 ) != right_stride( i0 ) ) { update |= 8; } +/* + if ( &left( i0 ) != &left( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( &right( i0 ) != &right( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( &left( i0 ) != &left_stride( i0 ) ) { update |= 4; } + if ( &right( i0 ) != &right_stride( i0 ) ) { update |= 8; } +*/ } } }; - template< typename T, class DeviceType > class TestAtomicViewAPI { public: - typedef DeviceType device ; + typedef DeviceType device; - enum { N0 = 1000 , - N1 = 3 , - N2 = 5 , + enum { N0 = 1000, + N1 = 3, + N2 = 5, N3 = 7 }; - typedef Kokkos::View< T , device > dView0 ; - typedef Kokkos::View< T* , device > dView1 ; - typedef Kokkos::View< T*[N1] , device > dView2 ; - typedef Kokkos::View< T*[N1][N2] , device > dView3 ; - typedef Kokkos::View< T*[N1][N2][N3] , device > dView4 ; - typedef Kokkos::View< const T*[N1][N2][N3] , device > const_dView4 ; - typedef Kokkos::View< T****, device, Kokkos::MemoryUnmanaged > dView4_unmanaged ; - typedef typename dView0::host_mirror_space host ; + typedef Kokkos::View< T, device > dView0; + typedef Kokkos::View< T*, device > dView1; + typedef Kokkos::View< T*[N1], device > dView2; + typedef Kokkos::View< T*[N1][N2], device > dView3; + typedef Kokkos::View< T*[N1][N2][N3], device > dView4; + typedef Kokkos::View< const T*[N1][N2][N3], device > const_dView4; + typedef Kokkos::View< T****, device, Kokkos::MemoryUnmanaged > dView4_unmanaged; + typedef typename dView0::host_mirror_space host; - typedef Kokkos::View< T , device , Kokkos::MemoryTraits< Kokkos::Atomic > > aView0 ; - typedef Kokkos::View< T* , device , Kokkos::MemoryTraits< Kokkos::Atomic > > aView1 ; - typedef Kokkos::View< T*[N1] , device , Kokkos::MemoryTraits< Kokkos::Atomic > > aView2 ; - typedef Kokkos::View< T*[N1][N2] , device , Kokkos::MemoryTraits< Kokkos::Atomic > > aView3 ; - typedef Kokkos::View< T*[N1][N2][N3] , device , Kokkos::MemoryTraits< Kokkos::Atomic > > aView4 ; - typedef Kokkos::View< const T*[N1][N2][N3] , device , Kokkos::MemoryTraits< Kokkos::Atomic > > const_aView4 ; + typedef Kokkos::View< T, device, Kokkos::MemoryTraits< Kokkos::Atomic > > aView0; + typedef Kokkos::View< T*, device, Kokkos::MemoryTraits< Kokkos::Atomic > > aView1; + typedef Kokkos::View< T*[N1], device, Kokkos::MemoryTraits< Kokkos::Atomic > > aView2; + typedef Kokkos::View< T*[N1][N2], device, Kokkos::MemoryTraits< Kokkos::Atomic > > aView3; + typedef Kokkos::View< T*[N1][N2][N3], device, Kokkos::MemoryTraits< Kokkos::Atomic > > aView4; + typedef Kokkos::View< const T*[N1][N2][N3], device, Kokkos::MemoryTraits< Kokkos::Atomic > > const_aView4; - typedef Kokkos::View< T****, device, Kokkos::MemoryTraits< Kokkos::Unmanaged | Kokkos::Atomic > > aView4_unmanaged ; + typedef Kokkos::View< T****, device, Kokkos::MemoryTraits< Kokkos::Unmanaged | Kokkos::Atomic > > aView4_unmanaged; - typedef typename aView0::host_mirror_space host_atomic ; + typedef typename aView0::host_mirror_space host_atomic; TestAtomicViewAPI() { - TestViewOperator_LeftAndRight< int[2] , device >::testit(); + TestViewOperator_LeftAndRight< int[2], device >::testit(); run_test_rank0(); run_test_rank4(); run_test_const(); } - static void run_test_rank0() { - dView0 dx , dy ; - aView0 ax , ay , az ; + dView0 dx, dy; + aView0 ax, ay, az; dx = dView0( "dx" ); dy = dView0( "dy" ); - ASSERT_EQ( dx.use_count() , size_t(1) ); - ASSERT_EQ( dy.use_count() , size_t(1) ); - - ax = dx ; - ay = dy ; - ASSERT_EQ( dx.use_count() , size_t(2) ); - ASSERT_EQ( dy.use_count() , size_t(2) ); - ASSERT_EQ( dx.use_count() , ax.use_count() ); - - az = ax ; - ASSERT_EQ( dx.use_count() , size_t(3) ); - ASSERT_EQ( ax.use_count() , size_t(3) ); - ASSERT_EQ( az.use_count() , size_t(3) ); - ASSERT_EQ( az.use_count() , ax.use_count() ); + ASSERT_EQ( dx.use_count(), size_t( 1 ) ); + ASSERT_EQ( dy.use_count(), size_t( 1 ) ); + + ax = dx; + ay = dy; + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); + ASSERT_EQ( dy.use_count(), size_t( 2 ) ); + ASSERT_EQ( dx.use_count(), ax.use_count() ); + + az = ax; + ASSERT_EQ( dx.use_count(), size_t( 3 ) ); + ASSERT_EQ( ax.use_count(), size_t( 3 ) ); + ASSERT_EQ( az.use_count(), size_t( 3 ) ); + ASSERT_EQ( az.use_count(), ax.use_count() ); } static void run_test_rank4() { - dView4 dx , dy ; - aView4 ax , ay , az ; + dView4 dx, dy; + aView4 ax, ay, az; - dx = dView4( "dx" , N0 ); - dy = dView4( "dy" , N0 ); - ASSERT_EQ( dx.use_count() , size_t(1) ); - ASSERT_EQ( dy.use_count() , size_t(1) ); + dx = dView4( "dx", N0 ); + dy = dView4( "dy", N0 ); + ASSERT_EQ( dx.use_count(), size_t( 1 ) ); + ASSERT_EQ( dy.use_count(), size_t( 1 ) ); - ax = dx ; - ay = dy ; - ASSERT_EQ( dx.use_count() , size_t(2) ); - ASSERT_EQ( dy.use_count() , size_t(2) ); - ASSERT_EQ( dx.use_count() , ax.use_count() ); + ax = dx; + ay = dy; + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); + ASSERT_EQ( dy.use_count(), size_t( 2 ) ); + ASSERT_EQ( dx.use_count(), ax.use_count() ); dView4_unmanaged unmanaged_dx = dx; - ASSERT_EQ( dx.use_count() , size_t(2) ); + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); - az = ax ; - ASSERT_EQ( dx.use_count() , size_t(3) ); - ASSERT_EQ( ax.use_count() , size_t(3) ); - ASSERT_EQ( az.use_count() , size_t(3) ); - ASSERT_EQ( az.use_count() , ax.use_count() ); + az = ax; + ASSERT_EQ( dx.use_count(), size_t( 3 ) ); + ASSERT_EQ( ax.use_count(), size_t( 3 ) ); + ASSERT_EQ( az.use_count(), size_t( 3 ) ); + ASSERT_EQ( az.use_count(), ax.use_count() ); aView4_unmanaged unmanaged_ax = ax; - ASSERT_EQ( ax.use_count() , size_t(3) ); + ASSERT_EQ( ax.use_count(), size_t( 3 ) ); - aView4_unmanaged unmanaged_ax_from_ptr_dx = aView4_unmanaged(dx.data(), - dx.dimension_0(), - dx.dimension_1(), - dx.dimension_2(), - dx.dimension_3()); - ASSERT_EQ( ax.use_count() , size_t(3) ); + aView4_unmanaged unmanaged_ax_from_ptr_dx = + aView4_unmanaged( dx.data(), dx.dimension_0(), dx.dimension_1(), dx.dimension_2(), dx.dimension_3() ); + ASSERT_EQ( ax.use_count(), size_t( 3 ) ); - const_aView4 const_ax = ax ; - ASSERT_EQ( ax.use_count() , size_t(4) ); - ASSERT_EQ( const_ax.use_count() , ax.use_count() ); + const_aView4 const_ax = ax; + ASSERT_EQ( ax.use_count(), size_t( 4 ) ); + ASSERT_EQ( const_ax.use_count(), ax.use_count() ); ASSERT_FALSE( ax.data() == 0 ); ASSERT_FALSE( const_ax.data() == 0 ); // referenceable ptr ASSERT_FALSE( unmanaged_ax.data() == 0 ); ASSERT_FALSE( unmanaged_ax_from_ptr_dx.data() == 0 ); ASSERT_FALSE( ay.data() == 0 ); -// ASSERT_NE( ax , ay ); +// ASSERT_NE( ax, ay ); // Above test results in following runtime error from gtest: // Expected: (ax) != (ay), actual: 32-byte object <30-01 D0-A0 D8-7F 00-00 00-31 44-0C 01-00 00-00 E8-03 00-00 00-00 00-00 69-00 00-00 00-00 00-00> vs 32-byte object <80-01 D0-A0 D8-7F 00-00 00-A1 4A-0C 01-00 00-00 E8-03 00-00 00-00 00-00 69-00 00-00 00-00 00-00> - ASSERT_EQ( ax.dimension_0() , unsigned(N0) ); - ASSERT_EQ( ax.dimension_1() , unsigned(N1) ); - ASSERT_EQ( ax.dimension_2() , unsigned(N2) ); - ASSERT_EQ( ax.dimension_3() , unsigned(N3) ); + ASSERT_EQ( ax.dimension_0(), unsigned( N0 ) ); + ASSERT_EQ( ax.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( ax.dimension_2(), unsigned( N2 ) ); + ASSERT_EQ( ax.dimension_3(), unsigned( N3 ) ); - ASSERT_EQ( ay.dimension_0() , unsigned(N0) ); - ASSERT_EQ( ay.dimension_1() , unsigned(N1) ); - ASSERT_EQ( ay.dimension_2() , unsigned(N2) ); - ASSERT_EQ( ay.dimension_3() , unsigned(N3) ); + ASSERT_EQ( ay.dimension_0(), unsigned( N0 ) ); + ASSERT_EQ( ay.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( ay.dimension_2(), unsigned( N2 ) ); + ASSERT_EQ( ay.dimension_3(), unsigned( N3 ) ); - ASSERT_EQ( unmanaged_ax_from_ptr_dx.capacity(),unsigned(N0)*unsigned(N1)*unsigned(N2)*unsigned(N3) ); + ASSERT_EQ( unmanaged_ax_from_ptr_dx.capacity(), unsigned( N0 ) * unsigned( N1 ) * unsigned( N2 ) * unsigned( N3 ) ); } - typedef T DataType[2] ; + typedef T DataType[2]; static void check_auto_conversion_to_const( - const Kokkos::View< const DataType , device , Kokkos::MemoryTraits< Kokkos::Atomic> > & arg_const , - const Kokkos::View< const DataType , device , Kokkos::MemoryTraits< Kokkos::Atomic> > & arg ) + const Kokkos::View< const DataType, device, Kokkos::MemoryTraits<Kokkos::Atomic> > & arg_const, + const Kokkos::View< const DataType, device, Kokkos::MemoryTraits<Kokkos::Atomic> > & arg ) { ASSERT_TRUE( arg_const == arg ); } static void run_test_const() { - typedef Kokkos::View< DataType , device , Kokkos::MemoryTraits< Kokkos::Atomic> > typeX ; - typedef Kokkos::View< const DataType , device , Kokkos::MemoryTraits< Kokkos::Atomic> > const_typeX ; + typedef Kokkos::View< DataType, device, Kokkos::MemoryTraits<Kokkos::Atomic> > typeX; + typedef Kokkos::View< const DataType, device, Kokkos::MemoryTraits<Kokkos::Atomic> > const_typeX; typeX x( "X" ); - const_typeX xc = x ; + const_typeX xc = x; //ASSERT_TRUE( xc == x ); // const xc is referenceable, non-const x is not //ASSERT_TRUE( x == xc ); - check_auto_conversion_to_const( x , xc ); + check_auto_conversion_to_const( x, xc ); } - }; - //--------------------------------------------------- //-----------initialization functors----------------- //--------------------------------------------------- template<class T, class execution_space > struct InitFunctor_Seq { + typedef Kokkos::View< T*, execution_space > view_type; - typedef Kokkos::View< T* , execution_space > view_type ; - - view_type input ; - const long length ; + view_type input; + const long length; - InitFunctor_Seq( view_type & input_ , const long length_ ) - : input(input_) - , length(length_) + InitFunctor_Seq( view_type & input_, const long length_ ) + : input( input_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION void operator()( const long i ) const { if ( i < length ) { - input(i) = (T) i ; + input( i ) = (T) i; } } - }; - template<class T, class execution_space > struct InitFunctor_ModTimes { + typedef Kokkos::View< T*, execution_space > view_type; - typedef Kokkos::View< T* , execution_space > view_type ; - - view_type input ; - const long length ; - const long remainder ; + view_type input; + const long length; + const long remainder; - InitFunctor_ModTimes( view_type & input_ , const long length_ , const long remainder_ ) - : input(input_) - , length(length_) - , remainder(remainder_) + InitFunctor_ModTimes( view_type & input_, const long length_, const long remainder_ ) + : input( input_ ) + , length( length_ ) + , remainder( remainder_ ) {} KOKKOS_INLINE_FUNCTION void operator()( const long i ) const { if ( i < length ) { - if ( i % (remainder+1) == remainder ) { - input(i) = (T)2 ; + if ( i % ( remainder + 1 ) == remainder ) { + input( i ) = (T) 2; } else { - input(i) = (T)1 ; + input( i ) = (T) 1; } } } }; - template<class T, class execution_space > struct InitFunctor_ModShift { + typedef Kokkos::View< T*, execution_space > view_type; - typedef Kokkos::View< T* , execution_space > view_type ; - - view_type input ; - const long length ; - const long remainder ; + view_type input; + const long length; + const long remainder; - InitFunctor_ModShift( view_type & input_ , const long length_ , const long remainder_ ) - : input(input_) - , length(length_) - , remainder(remainder_) + InitFunctor_ModShift( view_type & input_, const long length_, const long remainder_ ) + : input( input_ ) + , length( length_ ) + , remainder( remainder_ ) {} KOKKOS_INLINE_FUNCTION void operator()( const long i ) const { if ( i < length ) { - if ( i % (remainder+1) == remainder ) { - input(i) = 1 ; + if ( i % ( remainder + 1 ) == remainder ) { + input( i ) = 1; } } } }; - //--------------------------------------------------- //-----------atomic view plus-equal------------------ //--------------------------------------------------- template<class T, class execution_space > struct PlusEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type even_odd_result; const long length; // Wrap the result view in an atomic view, use this for operator - PlusEqualAtomicViewFunctor( const view_type & input_ , view_type & even_odd_result_ , const long length_) - : input(input_) - , even_odd_result(even_odd_result_) - , length(length_) + PlusEqualAtomicViewFunctor( const view_type & input_, view_type & even_odd_result_, const long length_ ) + : input( input_ ) + , even_odd_result( even_odd_result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 2 == 0 ) { - even_odd_result(0) += input(i); + even_odd_result( 0 ) += input( i ); } else { - even_odd_result(1) += input(i); + even_odd_result( 1 ) += input( i ); } } } - }; - -template<class T, class execution_space > -T PlusEqualAtomicView(const long input_length) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T PlusEqualAtomicView( const long input_length ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",2) ; + view_type input( "input_view", length ); + view_type result_view( "result_view", 2 ); - InitFunctor_Seq<T, execution_space> init_f( input , length ) ; - Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>(0, length) , init_f ); + InitFunctor_Seq< T, execution_space > init_f( input, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - PlusEqualAtomicViewFunctor<T,execution_space> functor(input, result_view, length); - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), functor); + PlusEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0) + h_result_view(1) ) ; + return (T) ( h_result_view( 0 ) + h_result_view( 1 ) ); } -template<class T> +template< class T > T PlusEqualAtomicViewCheck( const long input_length ) { - const long N = input_length; T result[2]; + if ( N % 2 == 0 ) { - const long half_sum_end = (N/2) - 1; + const long half_sum_end = ( N / 2 ) - 1; const long full_sum_end = N - 1; - result[0] = half_sum_end*(half_sum_end + 1)/2 ; //even sum - result[1] = ( full_sum_end*(full_sum_end + 1)/2 ) - result[0] ; // odd sum + result[0] = half_sum_end * ( half_sum_end + 1 ) / 2; // Even sum. + result[1] = ( full_sum_end * ( full_sum_end + 1 ) / 2 ) - result[0]; // Odd sum. } else { - const long half_sum_end = (T)(N/2) ; + const long half_sum_end = (T) ( N / 2 ); const long full_sum_end = N - 2; - result[0] = half_sum_end*(half_sum_end - 1)/2 ; //even sum - result[1] = ( full_sum_end*(full_sum_end - 1)/2 ) - result[0] ; // odd sum + result[0] = half_sum_end * ( half_sum_end - 1 ) / 2; // Even sum. + result[1] = ( full_sum_end * ( full_sum_end - 1 ) / 2 ) - result[0]; // Odd sum. } - return (T)(result[0] + result[1]); + return (T) ( result[0] + result[1] ); } -template<class T,class DeviceType> -bool PlusEqualAtomicViewTest(long input_length) +template< class T, class DeviceType > +bool PlusEqualAtomicViewTest( long input_length ) { - T res = PlusEqualAtomicView<T,DeviceType>(input_length); - T resSerial = PlusEqualAtomicViewCheck<T>(input_length); + T res = PlusEqualAtomicView< T, DeviceType >( input_length ); + T resSerial = PlusEqualAtomicViewCheck< T >( input_length ); bool passed = true; @@ -469,104 +446,98 @@ bool PlusEqualAtomicViewTest(long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = PlusEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //-----------atomic view minus-equal----------------- //--------------------------------------------------- template<class T, class execution_space > struct MinusEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type even_odd_result; const long length; - // Wrap the result view in an atomic view, use this for operator - MinusEqualAtomicViewFunctor( const view_type & input_ , view_type & even_odd_result_ , const long length_) - : input(input_) - , even_odd_result(even_odd_result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + MinusEqualAtomicViewFunctor( const view_type & input_, view_type & even_odd_result_, const long length_ ) + : input( input_ ) + , even_odd_result( even_odd_result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 2 == 0 ) { - even_odd_result(0) -= input(i); + even_odd_result( 0 ) -= input( i ); } else { - even_odd_result(1) -= input(i); + even_odd_result( 1 ) -= input( i ); } } } - }; - -template<class T, class execution_space > -T MinusEqualAtomicView(const long input_length) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T MinusEqualAtomicView( const long input_length ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",2) ; + view_type input( "input_view", length ); + view_type result_view( "result_view", 2 ); - InitFunctor_Seq<T, execution_space> init_f( input , length ) ; - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), init_f ); + InitFunctor_Seq< T, execution_space > init_f( input, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - MinusEqualAtomicViewFunctor<T,execution_space> functor(input, result_view,length); - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), functor); + MinusEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0) + h_result_view(1) ) ; + return (T) ( h_result_view( 0 ) + h_result_view( 1 ) ); } -template<class T> +template< class T > T MinusEqualAtomicViewCheck( const long input_length ) { - const long N = input_length; T result[2]; + if ( N % 2 == 0 ) { - const long half_sum_end = (N/2) - 1; + const long half_sum_end = ( N / 2 ) - 1; const long full_sum_end = N - 1; - result[0] = -1*( half_sum_end*(half_sum_end + 1)/2 ) ; //even sum - result[1] = -1*( ( full_sum_end*(full_sum_end + 1)/2 ) + result[0] ) ; // odd sum + result[0] = -1 * ( half_sum_end * ( half_sum_end + 1 ) / 2 ); // Even sum. + result[1] = -1 * ( ( full_sum_end * ( full_sum_end + 1 ) / 2 ) + result[0] ); // Odd sum. } else { - const long half_sum_end = (long)(N/2) ; + const long half_sum_end = (long) ( N / 2 ); const long full_sum_end = N - 2; - result[0] = -1*( half_sum_end*(half_sum_end - 1)/2 ) ; //even sum - result[1] = -1*( ( full_sum_end*(full_sum_end - 1)/2 ) + result[0] ) ; // odd sum + result[0] = -1 * ( half_sum_end * ( half_sum_end - 1 ) / 2 ); // Even sum. + result[1] = -1 * ( ( full_sum_end * ( full_sum_end - 1 ) / 2 ) + result[0] ); // Odd sum. } - return (result[0] + result[1]); + return ( result[0] + result[1] ); } -template<class T,class DeviceType> -bool MinusEqualAtomicViewTest(long input_length) +template< class T, class DeviceType > +bool MinusEqualAtomicViewTest( long input_length ) { - T res = MinusEqualAtomicView<T,DeviceType>(input_length); - T resSerial = MinusEqualAtomicViewCheck<T>(input_length); + T res = MinusEqualAtomicView< T, DeviceType >( input_length ); + T resSerial = MinusEqualAtomicViewCheck< T >( input_length ); bool passed = true; @@ -574,83 +545,76 @@ bool MinusEqualAtomicViewTest(long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = MinusEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //-----------atomic view times-equal----------------- //--------------------------------------------------- template<class T, class execution_space > struct TimesEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type result; const long length; // Wrap the result view in an atomic view, use this for operator - TimesEqualAtomicViewFunctor( const view_type & input_ , view_type & result_ , const long length_) - : input(input_) - , result(result_) - , length(length_) + TimesEqualAtomicViewFunctor( const view_type & input_, view_type & result_, const long length_ ) + : input( input_ ) + , result( result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length && i > 0 ) { - result(0) *= (double)input(i); + result( 0 ) *= (double) input( i ); } } - }; - -template<class T, class execution_space > -T TimesEqualAtomicView(const long input_length, const long remainder) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T TimesEqualAtomicView( const long input_length, const long remainder ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",1) ; - deep_copy(result_view, 1.0); + view_type input( "input_view", length ); + view_type result_view( "result_view", 1 ); + deep_copy( result_view, 1.0 ); - InitFunctor_ModTimes<T, execution_space> init_f( input , length , remainder ) ; - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), init_f ); + InitFunctor_ModTimes< T, execution_space > init_f( input, length, remainder ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - TimesEqualAtomicViewFunctor<T,execution_space> functor(input, result_view, length); - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), functor); + TimesEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0)) ; + return (T) ( h_result_view( 0 ) ); } -template<class T> +template< class T > T TimesEqualAtomicViewCheck( const long input_length, const long remainder ) { - - //Analytical result + // Analytical result. const long N = input_length; T result = 1.0; for ( long i = 2; i < N; ++i ) { - if ( i % (remainder+1) == remainder ) { + if ( i % ( remainder + 1 ) == remainder ) { result *= 2.0; } else { @@ -658,15 +622,15 @@ T TimesEqualAtomicViewCheck( const long input_length, const long remainder ) { } } - return (T)result; + return (T) result; } -template<class T, class DeviceType> -bool TimesEqualAtomicViewTest(const long input_length) +template< class T, class DeviceType> +bool TimesEqualAtomicViewTest( const long input_length ) { const long remainder = 23; - T res = TimesEqualAtomicView<T,DeviceType>(input_length, remainder); - T resSerial = TimesEqualAtomicViewCheck<T>(input_length, remainder); + T res = TimesEqualAtomicView< T, DeviceType >( input_length, remainder ); + T resSerial = TimesEqualAtomicViewCheck< T >( input_length, remainder ); bool passed = true; @@ -674,101 +638,93 @@ bool TimesEqualAtomicViewTest(const long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = TimesEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //------------atomic view div-equal------------------ //--------------------------------------------------- template<class T, class execution_space > struct DivEqualAtomicViewFunctor { - - typedef Kokkos::View< T , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T , execution_space > scalar_view_type ; + typedef Kokkos::View< T, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T, execution_space > scalar_view_type; view_type input; atomic_view_type result; const long length; - // Wrap the result view in an atomic view, use this for operator - DivEqualAtomicViewFunctor( const view_type & input_ , scalar_view_type & result_ , const long length_) - : input(input_) - , result(result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + DivEqualAtomicViewFunctor( const view_type & input_, scalar_view_type & result_, const long length_ ) + : input( input_ ) + , result( result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length && i > 0 ) { - result() /= (double)(input(i)); + result() /= (double) ( input( i ) ); } } - }; - -template<class T, class execution_space > -T DivEqualAtomicView(const long input_length, const long remainder) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T , execution_space > scalar_view_type ; - typedef typename scalar_view_type::HostMirror host_scalar_view_type ; +template< class T, class execution_space > +T DivEqualAtomicView( const long input_length, const long remainder ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T, execution_space > scalar_view_type; + typedef typename scalar_view_type::HostMirror host_scalar_view_type; const long length = input_length; - view_type input("input_view",length) ; - scalar_view_type result_view("result_view") ; - Kokkos::deep_copy(result_view, 12121212121); + view_type input( "input_view", length ); + scalar_view_type result_view( "result_view" ); + Kokkos::deep_copy( result_view, 12121212121 ); - InitFunctor_ModTimes<T, execution_space> init_f( input , length , remainder ) ; - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), init_f ); + InitFunctor_ModTimes< T, execution_space > init_f( input, length, remainder ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - DivEqualAtomicViewFunctor<T,execution_space> functor(input, result_view, length); - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), functor); + DivEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_scalar_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_scalar_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view()) ; + return (T) ( h_result_view() ); } -template<class T> -T DivEqualAtomicViewCheck( const long input_length , const long remainder ) { - +template< class T > +T DivEqualAtomicViewCheck( const long input_length, const long remainder ) { const long N = input_length; T result = 12121212121.0; for ( long i = 2; i < N; ++i ) { - if ( i % (remainder+1) == remainder ) { + if ( i % ( remainder + 1 ) == remainder ) { result /= 1.0; } else { result /= 2.0; } - } - return (T)result; + return (T) result; } -template<class T, class DeviceType> -bool DivEqualAtomicViewTest(const long input_length) +template< class T, class DeviceType > +bool DivEqualAtomicViewTest( const long input_length ) { const long remainder = 23; - T res = DivEqualAtomicView<T,DeviceType>(input_length, remainder); - T resSerial = DivEqualAtomicViewCheck<T>(input_length, remainder); + T res = DivEqualAtomicView< T, DeviceType >( input_length, remainder ); + T resSerial = DivEqualAtomicViewCheck< T >( input_length, remainder ); bool passed = true; @@ -776,83 +732,76 @@ bool DivEqualAtomicViewTest(const long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = DivEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //------------atomic view mod-equal------------------ //--------------------------------------------------- -template<class T, class execution_space > +template< class T, class execution_space > struct ModEqualAtomicViewFunctor { - - typedef Kokkos::View< T , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T , execution_space > scalar_view_type ; + typedef Kokkos::View< T, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T, execution_space > scalar_view_type; view_type input; atomic_view_type result; const long length; - // Wrap the result view in an atomic view, use this for operator - ModEqualAtomicViewFunctor( const view_type & input_ , scalar_view_type & result_ , const long length_) - : input(input_) - , result(result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + ModEqualAtomicViewFunctor( const view_type & input_, scalar_view_type & result_, const long length_ ) + : input( input_ ) + , result( result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length && i > 0 ) { - result() %= (double)(input(i)); + result() %= (double) ( input( i ) ); } } - }; - -template<class T, class execution_space > -T ModEqualAtomicView(const long input_length, const long remainder) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T , execution_space > scalar_view_type ; - typedef typename scalar_view_type::HostMirror host_scalar_view_type ; +template< class T, class execution_space > +T ModEqualAtomicView( const long input_length, const long remainder ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T, execution_space > scalar_view_type; + typedef typename scalar_view_type::HostMirror host_scalar_view_type; const long length = input_length; - view_type input("input_view",length) ; - scalar_view_type result_view("result_view") ; - Kokkos::deep_copy(result_view, 12121212121); + view_type input( "input_view", length ); + scalar_view_type result_view( "result_view" ); + Kokkos::deep_copy( result_view, 12121212121 ); - InitFunctor_ModTimes<T, execution_space> init_f( input , length , remainder ) ; - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), init_f ); + InitFunctor_ModTimes< T, execution_space > init_f( input, length, remainder ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - ModEqualAtomicViewFunctor<T,execution_space> functor(input, result_view, length); - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), functor); + ModEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_scalar_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_scalar_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view()) ; + return (T) ( h_result_view() ); } -template<class T> -T ModEqualAtomicViewCheck( const long input_length , const long remainder ) { - +template< class T > +T ModEqualAtomicViewCheck( const long input_length, const long remainder ) { const long N = input_length; T result = 12121212121; for ( long i = 2; i < N; ++i ) { - if ( i % (remainder+1) == remainder ) { + if ( i % ( remainder + 1 ) == remainder ) { result %= 1; } else { @@ -860,19 +809,18 @@ T ModEqualAtomicViewCheck( const long input_length , const long remainder ) { } } - return (T)result; + return (T) result; } -template<class T, class DeviceType> -bool ModEqualAtomicViewTest(const long input_length) +template< class T, class DeviceType > +bool ModEqualAtomicViewTest( const long input_length ) { - - static_assert( std::is_integral<T>::value, "ModEqualAtomicView Error: Type must be integral type for this unit test"); + static_assert( std::is_integral< T >::value, "ModEqualAtomicView Error: Type must be integral type for this unit test" ); const long remainder = 23; - T res = ModEqualAtomicView<T,DeviceType>(input_length, remainder); - T resSerial = ModEqualAtomicViewCheck<T>(input_length, remainder); + T res = ModEqualAtomicView< T, DeviceType >( input_length, remainder ); + T resSerial = ModEqualAtomicViewCheck< T >( input_length, remainder ); bool passed = true; @@ -880,142 +828,134 @@ bool ModEqualAtomicViewTest(const long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = ModEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //------------atomic view rs-equal------------------ //--------------------------------------------------- -template<class T, class execution_space > +template< class T, class execution_space > struct RSEqualAtomicViewFunctor { - - typedef Kokkos::View< T**** , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T**** , execution_space > result_view_type ; + typedef Kokkos::View< T****, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T****, execution_space > result_view_type; const view_type input; atomic_view_type result; const long length; const long value; - // Wrap the result view in an atomic view, use this for operator - RSEqualAtomicViewFunctor( const view_type & input_ , result_view_type & result_ , const long & length_ , const long & value_ ) - : input(input_) - , result(result_) - , length(length_) - , value(value_) + // Wrap the result view in an atomic view, use this for operator. + RSEqualAtomicViewFunctor( const view_type & input_, result_view_type & result_, const long & length_, const long & value_ ) + : input( input_ ) + , result( result_ ) + , length( length_ ) + , value( value_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 4 == 0 ) { - result(1,0,0,0) >>= input(i); + result( 1, 0, 0, 0 ) >>= input( i ); } else if ( i % 4 == 1 ) { - result(0,1,0,0) >>= input(i); + result( 0, 1, 0, 0 ) >>= input( i ); } else if ( i % 4 == 2 ) { - result(0,0,1,0) >>= input(i); + result( 0, 0, 1, 0 ) >>= input( i ); } else if ( i % 4 == 3 ) { - result(0,0,0,1) >>= input(i); + result( 0, 0, 0, 1 ) >>= input( i ); } } } - }; - -template<class T, class execution_space > -T RSEqualAtomicView(const long input_length, const long value, const long remainder) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T**** , execution_space > result_view_type ; - typedef typename result_view_type::HostMirror host_scalar_view_type ; +template< class T, class execution_space > +T RSEqualAtomicView( const long input_length, const long value, const long remainder ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T****, execution_space > result_view_type; + typedef typename result_view_type::HostMirror host_scalar_view_type; const long length = input_length; - view_type input("input_view",length) ; - result_view_type result_view("result_view",2,2,2,2) ; - host_scalar_view_type h_result_view = Kokkos::create_mirror_view(result_view); - h_result_view(1,0,0,0) = value; - h_result_view(0,1,0,0) = value; - h_result_view(0,0,1,0) = value; - h_result_view(0,0,0,1) = value; - Kokkos::deep_copy( result_view , h_result_view ); + view_type input( "input_view", length ); + result_view_type result_view( "result_view", 2, 2, 2, 2 ); + host_scalar_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + h_result_view( 1, 0, 0, 0 ) = value; + h_result_view( 0, 1, 0, 0 ) = value; + h_result_view( 0, 0, 1, 0 ) = value; + h_result_view( 0, 0, 0, 1 ) = value; + Kokkos::deep_copy( result_view, h_result_view ); + InitFunctor_ModShift< T, execution_space > init_f( input, length, remainder ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - InitFunctor_ModShift<T, execution_space> init_f( input , length , remainder ) ; - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), init_f ); - - RSEqualAtomicViewFunctor<T,execution_space> functor(input, result_view, length, value); - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), functor); + RSEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length, value ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - Kokkos::deep_copy(h_result_view, result_view); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(1,0,0,0)) ; + return (T) ( h_result_view( 1, 0, 0, 0 ) ); } -template<class T> +template< class T > T RSEqualAtomicViewCheck( const long input_length, const long value, const long remainder ) { - - T result[4] ; - result[0] = value ; - result[1] = value ; - result[2] = value ; - result[3] = value ; + T result[4]; + result[0] = value; + result[1] = value; + result[2] = value; + result[3] = value; T * input = new T[input_length]; for ( long i = 0; i < input_length; ++i ) { - if ( i % (remainder+1) == remainder ) { - input[i] = 1; - } - else { - input[i] = 0; - } + if ( i % ( remainder + 1 ) == remainder ) { + input[i] = 1; + } + else { + input[i] = 0; + } } for ( long i = 0; i < input_length; ++i ) { - if ( i % 4 == 0 ) { - result[0] >>= input[i]; - } - else if ( i % 4 == 1 ) { - result[1] >>= input[i]; - } - else if ( i % 4 == 2 ) { - result[2] >>= input[i]; - } - else if ( i % 4 == 3 ) { - result[3] >>= input[i]; - } + if ( i % 4 == 0 ) { + result[0] >>= input[i]; + } + else if ( i % 4 == 1 ) { + result[1] >>= input[i]; + } + else if ( i % 4 == 2 ) { + result[2] >>= input[i]; + } + else if ( i % 4 == 3 ) { + result[3] >>= input[i]; + } } + delete [] input; - return (T)result[0]; + return (T) result[0]; } -template<class T, class DeviceType> -bool RSEqualAtomicViewTest(const long input_length) +template< class T, class DeviceType > +bool RSEqualAtomicViewTest( const long input_length ) { - - static_assert( std::is_integral<T>::value, "RSEqualAtomicViewTest: Must be integral type for test"); + static_assert( std::is_integral< T >::value, "RSEqualAtomicViewTest: Must be integral type for test" ); const long remainder = 61042; //prime - 1 - const long value = 1073741825; // 2^30+1 - T res = RSEqualAtomicView<T,DeviceType>(input_length, value, remainder); - T resSerial = RSEqualAtomicViewCheck<T>(input_length, value, remainder); + const long value = 1073741825; // 2^30+1 + T res = RSEqualAtomicView< T, DeviceType >( input_length, value, remainder ); + T resSerial = RSEqualAtomicViewCheck< T >( input_length, value, remainder ); bool passed = true; @@ -1023,142 +963,134 @@ bool RSEqualAtomicViewTest(const long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = RSEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //------------atomic view ls-equal------------------ //--------------------------------------------------- template<class T, class execution_space > struct LSEqualAtomicViewFunctor { - - typedef Kokkos::View< T**** , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T**** , execution_space > result_view_type ; + typedef Kokkos::View< T****, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T****, execution_space > result_view_type; view_type input; atomic_view_type result; const long length; const long value; - // Wrap the result view in an atomic view, use this for operator - LSEqualAtomicViewFunctor( const view_type & input_ , result_view_type & result_ , const long & length_ , const long & value_ ) - : input(input_) - , result(result_) - , length(length_) - , value(value_) + // Wrap the result view in an atomic view, use this for operator. + LSEqualAtomicViewFunctor( const view_type & input_, result_view_type & result_, const long & length_, const long & value_ ) + : input( input_ ) + , result( result_ ) + , length( length_ ) + , value( value_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 4 == 0 ) { - result(1,0,0,0) <<= input(i); + result( 1, 0, 0, 0 ) <<= input( i ); } else if ( i % 4 == 1 ) { - result(0,1,0,0) <<= input(i); + result( 0, 1, 0, 0 ) <<= input( i ); } else if ( i % 4 == 2 ) { - result(0,0,1,0) <<= input(i); + result( 0, 0, 1, 0 ) <<= input( i ); } else if ( i % 4 == 3 ) { - result(0,0,0,1) <<= input(i); + result( 0, 0, 0, 1 ) <<= input( i ); } } } - }; - -template<class T, class execution_space > -T LSEqualAtomicView(const long input_length, const long value, const long remainder) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef Kokkos::View< T**** , execution_space > result_view_type ; - typedef typename result_view_type::HostMirror host_scalar_view_type ; +template< class T, class execution_space > +T LSEqualAtomicView( const long input_length, const long value, const long remainder ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef Kokkos::View< T****, execution_space > result_view_type; + typedef typename result_view_type::HostMirror host_scalar_view_type; const long length = input_length; - view_type input("input_view",length) ; - result_view_type result_view("result_view",2,2,2,2) ; - host_scalar_view_type h_result_view = Kokkos::create_mirror_view(result_view); - h_result_view(1,0,0,0) = value; - h_result_view(0,1,0,0) = value; - h_result_view(0,0,1,0) = value; - h_result_view(0,0,0,1) = value; - Kokkos::deep_copy( result_view , h_result_view ); + view_type input( "input_view", length ); + result_view_type result_view( "result_view", 2, 2, 2, 2 ); + host_scalar_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + h_result_view( 1, 0, 0, 0 ) = value; + h_result_view( 0, 1, 0, 0 ) = value; + h_result_view( 0, 0, 1, 0 ) = value; + h_result_view( 0, 0, 0, 1 ) = value; + Kokkos::deep_copy( result_view, h_result_view ); - InitFunctor_ModShift<T, execution_space> init_f( input , length , remainder ) ; - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), init_f ); + InitFunctor_ModShift< T, execution_space > init_f( input, length, remainder ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - LSEqualAtomicViewFunctor<T,execution_space> functor(input, result_view, length, value); - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), functor); + LSEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length, value ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - Kokkos::deep_copy(h_result_view, result_view); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(1,0,0,0)) ; + return (T) ( h_result_view( 1, 0, 0, 0 ) ); } -template<class T> +template< class T > T LSEqualAtomicViewCheck( const long input_length, const long value, const long remainder ) { - - T result[4] ; - result[0] = value ; - result[1] = value ; - result[2] = value ; - result[3] = value ; + T result[4]; + result[0] = value; + result[1] = value; + result[2] = value; + result[3] = value; T * input = new T[input_length]; for ( long i = 0; i < input_length; ++i ) { - if ( i % (remainder+1) == remainder ) { - input[i] = 1; - } - else { - input[i] = 0; - } + if ( i % ( remainder + 1 ) == remainder ) { + input[i] = 1; + } + else { + input[i] = 0; + } } for ( long i = 0; i < input_length; ++i ) { - if ( i % 4 == 0 ) { - result[0] <<= input[i]; - } - else if ( i % 4 == 1 ) { - result[1] <<= input[i]; - } - else if ( i % 4 == 2 ) { - result[2] <<= input[i]; - } - else if ( i % 4 == 3 ) { - result[3] <<= input[i]; - } + if ( i % 4 == 0 ) { + result[0] <<= input[i]; + } + else if ( i % 4 == 1 ) { + result[1] <<= input[i]; + } + else if ( i % 4 == 2 ) { + result[2] <<= input[i]; + } + else if ( i % 4 == 3 ) { + result[3] <<= input[i]; + } } delete [] input; - return (T)result[0]; + return (T) result[0]; } -template<class T, class DeviceType> -bool LSEqualAtomicViewTest(const long input_length) +template< class T, class DeviceType > +bool LSEqualAtomicViewTest( const long input_length ) { - - static_assert( std::is_integral<T>::value, "LSEqualAtomicViewTest: Must be integral type for test"); + static_assert( std::is_integral< T >::value, "LSEqualAtomicViewTest: Must be integral type for test" ); const long remainder = 61042; //prime - 1 - const long value = 1; // 2^30+1 - T res = LSEqualAtomicView<T,DeviceType>(input_length, value, remainder); - T resSerial = LSEqualAtomicViewCheck<T>(input_length, value, remainder); + const long value = 1; // 2^30+1 + T res = LSEqualAtomicView< T, DeviceType >( input_length, value, remainder ); + T resSerial = LSEqualAtomicViewCheck< T >( input_length, value, remainder ); bool passed = true; @@ -1166,104 +1098,96 @@ bool LSEqualAtomicViewTest(const long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = RSEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //-----------atomic view and-equal----------------- //--------------------------------------------------- -template<class T, class execution_space > +template< class T, class execution_space > struct AndEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type even_odd_result; const long length; - // Wrap the result view in an atomic view, use this for operator - AndEqualAtomicViewFunctor( const view_type & input_ , view_type & even_odd_result_ , const long length_) - : input(input_) - , even_odd_result(even_odd_result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + AndEqualAtomicViewFunctor( const view_type & input_, view_type & even_odd_result_, const long length_ ) + : input( input_ ) + , even_odd_result( even_odd_result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 2 == 0 ) { - even_odd_result(0) &= input(i); + even_odd_result( 0 ) &= input( i ); } else { - even_odd_result(1) &= input(i); + even_odd_result( 1 ) &= input( i ); } } } - }; - -template<class T, class execution_space > -T AndEqualAtomicView(const long input_length) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T AndEqualAtomicView( const long input_length ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",2) ; - Kokkos::deep_copy(result_view, 1); + view_type input( "input_view", length ); + view_type result_view( "result_view", 2 ); + Kokkos::deep_copy( result_view, 1 ); - InitFunctor_Seq<T, execution_space> init_f( input , length ) ; - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), init_f ); + InitFunctor_Seq< T, execution_space > init_f( input, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - AndEqualAtomicViewFunctor<T,execution_space> functor(input, result_view,length); - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), functor); + AndEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0)) ; + return (T) ( h_result_view( 0 ) ); } -template<class T> +template< class T > T AndEqualAtomicViewCheck( const long input_length ) { - const long N = input_length; - T result[2] = {1}; + T result[2] = { 1 }; for ( long i = 0; i < N; ++i ) { if ( N % 2 == 0 ) { - result[0] &= (T)i; + result[0] &= (T) i; } else { - result[1] &= (T)i; + result[1] &= (T) i; } } - return (result[0]); + return ( result[0] ); } -template<class T,class DeviceType> -bool AndEqualAtomicViewTest(long input_length) +template< class T, class DeviceType > +bool AndEqualAtomicViewTest( long input_length ) { + static_assert( std::is_integral< T >::value, "AndEqualAtomicViewTest: Must be integral type for test" ); - static_assert( std::is_integral<T>::value, "AndEqualAtomicViewTest: Must be integral type for test"); - - T res = AndEqualAtomicView<T,DeviceType>(input_length); - T resSerial = AndEqualAtomicViewCheck<T>(input_length); + T res = AndEqualAtomicView< T, DeviceType >( input_length ); + T resSerial = AndEqualAtomicViewCheck< T >( input_length ); bool passed = true; @@ -1271,103 +1195,96 @@ bool AndEqualAtomicViewTest(long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = AndEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //-----------atomic view or-equal----------------- //--------------------------------------------------- -template<class T, class execution_space > +template< class T, class execution_space > struct OrEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type even_odd_result; const long length; - // Wrap the result view in an atomic view, use this for operator - OrEqualAtomicViewFunctor( const view_type & input_ , view_type & even_odd_result_ , const long length_) - : input(input_) - , even_odd_result(even_odd_result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + OrEqualAtomicViewFunctor( const view_type & input_, view_type & even_odd_result_, const long length_ ) + : input( input_ ) + , even_odd_result( even_odd_result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 2 == 0 ) { - even_odd_result(0) |= input(i); + even_odd_result( 0 ) |= input( i ); } else { - even_odd_result(1) |= input(i); + even_odd_result( 1 ) |= input( i ); } } } - }; - -template<class T, class execution_space > -T OrEqualAtomicView(const long input_length) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T OrEqualAtomicView( const long input_length ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",2) ; + view_type input( "input_view", length ); + view_type result_view( "result_view", 2 ); - InitFunctor_Seq<T, execution_space> init_f( input , length ) ; - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), init_f ); + InitFunctor_Seq< T, execution_space > init_f( input, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - OrEqualAtomicViewFunctor<T,execution_space> functor(input, result_view,length); - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), functor); + OrEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0)) ; + return (T) ( h_result_view( 0 ) ); } -template<class T> +template< class T > T OrEqualAtomicViewCheck( const long input_length ) { const long N = input_length; - T result[2] = {0}; + T result[2] = { 0 }; for ( long i = 0; i < N; ++i ) { if ( i % 2 == 0 ) { - result[0] |= (T)i; + result[0] |= (T) i; } else { - result[1] |= (T)i; + result[1] |= (T) i; } } - return (T)(result[0]); + return (T) ( result[0] ); } -template<class T,class DeviceType> -bool OrEqualAtomicViewTest(long input_length) +template< class T, class DeviceType > +bool OrEqualAtomicViewTest( long input_length ) { - - static_assert( std::is_integral<T>::value, "OrEqualAtomicViewTest: Must be integral type for test"); + static_assert( std::is_integral< T >::value, "OrEqualAtomicViewTest: Must be integral type for test" ); - T res = OrEqualAtomicView<T,DeviceType>(input_length); - T resSerial = OrEqualAtomicViewCheck<T>(input_length); + T res = OrEqualAtomicView< T, DeviceType >( input_length ); + T resSerial = OrEqualAtomicViewCheck< T >( input_length ); bool passed = true; @@ -1375,103 +1292,95 @@ bool OrEqualAtomicViewTest(long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = OrEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - //--------------------------------------------------- //-----------atomic view xor-equal----------------- //--------------------------------------------------- -template<class T, class execution_space > +template< class T, class execution_space > struct XOrEqualAtomicViewFunctor { - - typedef Kokkos::View< T* , execution_space , Kokkos::MemoryTraits< Kokkos::Atomic > > atomic_view_type ; - - typedef Kokkos::View< T* , execution_space > view_type ; + typedef Kokkos::View< T*, execution_space, Kokkos::MemoryTraits<Kokkos::Atomic> > atomic_view_type; + typedef Kokkos::View< T*, execution_space > view_type; view_type input; atomic_view_type even_odd_result; const long length; - // Wrap the result view in an atomic view, use this for operator - XOrEqualAtomicViewFunctor( const view_type & input_ , view_type & even_odd_result_ , const long length_) - : input(input_) - , even_odd_result(even_odd_result_) - , length(length_) + // Wrap the result view in an atomic view, use this for operator. + XOrEqualAtomicViewFunctor( const view_type & input_, view_type & even_odd_result_, const long length_ ) + : input( input_ ) + , even_odd_result( even_odd_result_ ) + , length( length_ ) {} KOKKOS_INLINE_FUNCTION - void operator()(const long i) const { + void operator()( const long i ) const { if ( i < length ) { if ( i % 2 == 0 ) { - even_odd_result(0) ^= input(i); + even_odd_result( 0 ) ^= input( i ); } else { - even_odd_result(1) ^= input(i); + even_odd_result( 1 ) ^= input( i ); } } } - }; - -template<class T, class execution_space > -T XOrEqualAtomicView(const long input_length) { - - typedef Kokkos::View< T* , execution_space > view_type ; - typedef typename view_type::HostMirror host_view_type ; +template< class T, class execution_space > +T XOrEqualAtomicView( const long input_length ) { + typedef Kokkos::View< T*, execution_space > view_type; + typedef typename view_type::HostMirror host_view_type; const long length = input_length; - view_type input("input_view",length) ; - view_type result_view("result_view",2) ; + view_type input( "input_view", length ); + view_type result_view( "result_view", 2 ); - InitFunctor_Seq<T, execution_space> init_f( input , length ) ; - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), init_f ); + InitFunctor_Seq< T, execution_space > init_f( input, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), init_f ); - XOrEqualAtomicViewFunctor<T,execution_space> functor(input, result_view,length); - Kokkos::parallel_for( Kokkos::RangePolicy<execution_space>(0, length), functor); + XOrEqualAtomicViewFunctor< T, execution_space > functor( input, result_view, length ); + Kokkos::parallel_for( Kokkos::RangePolicy< execution_space >( 0, length ), functor ); Kokkos::fence(); - host_view_type h_result_view = Kokkos::create_mirror_view(result_view); - Kokkos::deep_copy(h_result_view, result_view); + host_view_type h_result_view = Kokkos::create_mirror_view( result_view ); + Kokkos::deep_copy( h_result_view, result_view ); - return (T) (h_result_view(0)) ; + return (T) ( h_result_view( 0 ) ); } -template<class T> +template< class T > T XOrEqualAtomicViewCheck( const long input_length ) { - const long N = input_length; - T result[2] = {0}; + T result[2] = { 0 }; for ( long i = 0; i < N; ++i ) { if ( i % 2 == 0 ) { - result[0] ^= (T)i; + result[0] ^= (T) i; } else { - result[1] ^= (T)i; + result[1] ^= (T) i; } } - return (T)(result[0]); + return (T) ( result[0] ); } -template<class T,class DeviceType> -bool XOrEqualAtomicViewTest(long input_length) +template< class T, class DeviceType > +bool XOrEqualAtomicViewTest( long input_length ) { + static_assert( std::is_integral< T >::value, "XOrEqualAtomicViewTest: Must be integral type for test" ); - static_assert( std::is_integral<T>::value, "XOrEqualAtomicViewTest: Must be integral type for test"); - - T res = XOrEqualAtomicView<T,DeviceType>(input_length); - T resSerial = XOrEqualAtomicViewCheck<T>(input_length); + T res = XOrEqualAtomicView< T, DeviceType >( input_length ); + T resSerial = XOrEqualAtomicViewCheck< T >( input_length ); bool passed = true; @@ -1479,54 +1388,52 @@ bool XOrEqualAtomicViewTest(long input_length) passed = false; std::cout << "Loop<" - << typeid(T).name() + << typeid( T ).name() << ">( test = XOrEqualAtomicViewTest" << " FAILED : " << resSerial << " != " << res - << std::endl ; + << std::endl; } - return passed ; + return passed; } - // inc/dec? - //--------------------------------------------------- //--------------atomic_test_control------------------ //--------------------------------------------------- -template<class T,class DeviceType> -bool AtomicViewsTestIntegralType( const int length , int test ) +template< class T, class DeviceType > +bool AtomicViewsTestIntegralType( const int length, int test ) { - static_assert( std::is_integral<T>::value, "TestAtomicViews Error: Non-integral type passed into IntegralType tests"); - - switch (test) { - case 1: return PlusEqualAtomicViewTest<T,DeviceType>( length ); - case 2: return MinusEqualAtomicViewTest<T,DeviceType>( length ); - case 3: return RSEqualAtomicViewTest<T,DeviceType>( length ); - case 4: return LSEqualAtomicViewTest<T,DeviceType>( length ); - case 5: return ModEqualAtomicViewTest<T,DeviceType>( length ); - case 6: return AndEqualAtomicViewTest<T,DeviceType>( length ); - case 7: return OrEqualAtomicViewTest<T,DeviceType>( length ); - case 8: return XOrEqualAtomicViewTest<T,DeviceType>( length ); + static_assert( std::is_integral< T >::value, "TestAtomicViews Error: Non-integral type passed into IntegralType tests" ); + + switch ( test ) { + case 1: return PlusEqualAtomicViewTest< T, DeviceType >( length ); + case 2: return MinusEqualAtomicViewTest< T, DeviceType >( length ); + case 3: return RSEqualAtomicViewTest< T, DeviceType >( length ); + case 4: return LSEqualAtomicViewTest< T, DeviceType >( length ); + case 5: return ModEqualAtomicViewTest< T, DeviceType >( length ); + case 6: return AndEqualAtomicViewTest< T, DeviceType >( length ); + case 7: return OrEqualAtomicViewTest< T, DeviceType >( length ); + case 8: return XOrEqualAtomicViewTest< T, DeviceType >( length ); } + return 0; } - -template<class T,class DeviceType> -bool AtomicViewsTestNonIntegralType( const int length , int test ) +template< class T, class DeviceType > +bool AtomicViewsTestNonIntegralType( const int length, int test ) { - switch (test) { - case 1: return PlusEqualAtomicViewTest<T,DeviceType>( length ); - case 2: return MinusEqualAtomicViewTest<T,DeviceType>( length ); - case 3: return TimesEqualAtomicViewTest<T,DeviceType>( length ); - case 4: return DivEqualAtomicViewTest<T,DeviceType>( length ); + switch ( test ) { + case 1: return PlusEqualAtomicViewTest< T, DeviceType >( length ); + case 2: return MinusEqualAtomicViewTest< T, DeviceType >( length ); + case 3: return TimesEqualAtomicViewTest< T, DeviceType >( length ); + case 4: return DivEqualAtomicViewTest< T, DeviceType >( length ); } + return 0; } -} // namespace - +} // namespace TestAtomicViews diff --git a/lib/kokkos/core/unit_test/TestCXX11.hpp b/lib/kokkos/core/unit_test/TestCXX11.hpp index d6dde5e963e1f0706fecd333b56dd9e8ed181d0e..e2ad623d9c89cef44c4e55a9096d3dba6796adf6 100644 --- a/lib/kokkos/core/unit_test/TestCXX11.hpp +++ b/lib/kokkos/core/unit_test/TestCXX11.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,283 +36,294 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ + #include <Kokkos_Core.hpp> namespace TestCXX11 { -template<class DeviceType> -struct FunctorAddTest{ - typedef Kokkos::View<double**,DeviceType> view_type; - view_type a_, b_; +template< class DeviceType > +struct FunctorAddTest { + typedef Kokkos::View< double**, DeviceType > view_type; typedef DeviceType execution_space; - FunctorAddTest(view_type & a, view_type &b):a_(a),b_(b) {} + typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member; + + view_type a_, b_; + + FunctorAddTest( view_type & a, view_type & b ) : a_( a ), b_( b ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int& i) const { - b_(i,0) = a_(i,1) + a_(i,2); - b_(i,1) = a_(i,0) - a_(i,3); - b_(i,2) = a_(i,4) + a_(i,0); - b_(i,3) = a_(i,2) - a_(i,1); - b_(i,4) = a_(i,3) + a_(i,4); + void operator() ( const int& i ) const { + b_( i, 0 ) = a_( i, 1 ) + a_( i, 2 ); + b_( i, 1 ) = a_( i, 0 ) - a_( i, 3 ); + b_( i, 2 ) = a_( i, 4 ) + a_( i, 0 ); + b_( i, 3 ) = a_( i, 2 ) - a_( i, 1 ); + b_( i, 4 ) = a_( i, 3 ) + a_( i, 4 ); } - typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member ; KOKKOS_INLINE_FUNCTION - void operator() (const team_member & dev) const { - const int begin = dev.league_rank() * 4 ; - const int end = begin + 4 ; - for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { - b_(i,0) = a_(i,1) + a_(i,2); - b_(i,1) = a_(i,0) - a_(i,3); - b_(i,2) = a_(i,4) + a_(i,0); - b_(i,3) = a_(i,2) - a_(i,1); - b_(i,4) = a_(i,3) + a_(i,4); + void operator() ( const team_member & dev ) const { + const int begin = dev.league_rank() * 4; + const int end = begin + 4; + for ( int i = begin + dev.team_rank(); i < end; i += dev.team_size() ) { + b_( i, 0 ) = a_( i, 1 ) + a_( i, 2 ); + b_( i, 1 ) = a_( i, 0 ) - a_( i, 3 ); + b_( i, 2 ) = a_( i, 4 ) + a_( i, 0 ); + b_( i, 3 ) = a_( i, 2 ) - a_( i, 1 ); + b_( i, 4 ) = a_( i, 3 ) + a_( i, 4 ); } } }; -template<class DeviceType, bool PWRTest> +template< class DeviceType, bool PWRTest > double AddTestFunctor() { + typedef Kokkos::TeamPolicy< DeviceType > policy_type; - typedef Kokkos::TeamPolicy<DeviceType> policy_type ; - - Kokkos::View<double**,DeviceType> a("A",100,5); - Kokkos::View<double**,DeviceType> b("B",100,5); - typename Kokkos::View<double**,DeviceType>::HostMirror h_a = Kokkos::create_mirror_view(a); - typename Kokkos::View<double**,DeviceType>::HostMirror h_b = Kokkos::create_mirror_view(b); + Kokkos::View< double**, DeviceType > a( "A", 100, 5 ); + Kokkos::View< double**, DeviceType > b( "B", 100, 5 ); + typename Kokkos::View< double**, DeviceType >::HostMirror h_a = Kokkos::create_mirror_view( a ); + typename Kokkos::View< double**, DeviceType >::HostMirror h_b = Kokkos::create_mirror_view( b ); - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + h_a( i, j ) = 0.1 * i / ( 1.1 * j + 1.0 ) + 0.5 * j; + } } - Kokkos::deep_copy(a,h_a); + Kokkos::deep_copy( a, h_a ); - if(PWRTest==false) - Kokkos::parallel_for(100,FunctorAddTest<DeviceType>(a,b)); - else - Kokkos::parallel_for(policy_type(25,Kokkos::AUTO),FunctorAddTest<DeviceType>(a,b)); - Kokkos::deep_copy(h_b,b); + if ( PWRTest == false ) { + Kokkos::parallel_for( 100, FunctorAddTest< DeviceType >( a, b ) ); + } + else { + Kokkos::parallel_for( policy_type( 25, Kokkos::AUTO ), FunctorAddTest< DeviceType >( a, b ) ); + } + Kokkos::deep_copy( h_b, b ); double result = 0; - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - result += h_b(i,j); + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + result += h_b( i, j ); } + } return result; } - -#if defined (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -template<class DeviceType, bool PWRTest> +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +template< class DeviceType, bool PWRTest > double AddTestLambda() { - - Kokkos::View<double**,DeviceType> a("A",100,5); - Kokkos::View<double**,DeviceType> b("B",100,5); - typename Kokkos::View<double**,DeviceType>::HostMirror h_a = Kokkos::create_mirror_view(a); - typename Kokkos::View<double**,DeviceType>::HostMirror h_b = Kokkos::create_mirror_view(b); - - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + Kokkos::View< double**, DeviceType > a( "A", 100, 5 ); + Kokkos::View< double**, DeviceType > b( "B", 100, 5 ); + typename Kokkos::View< double**, DeviceType >::HostMirror h_a = Kokkos::create_mirror_view( a ); + typename Kokkos::View< double**, DeviceType >::HostMirror h_b = Kokkos::create_mirror_view( b ); + + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + h_a( i, j ) = 0.1 * i / ( 1.1 * j + 1.0 ) + 0.5 * j; + } } - Kokkos::deep_copy(a,h_a); - - if(PWRTest==false) { - Kokkos::parallel_for(100,KOKKOS_LAMBDA(const int& i) { - b(i,0) = a(i,1) + a(i,2); - b(i,1) = a(i,0) - a(i,3); - b(i,2) = a(i,4) + a(i,0); - b(i,3) = a(i,2) - a(i,1); - b(i,4) = a(i,3) + a(i,4); + Kokkos::deep_copy( a, h_a ); + + if ( PWRTest == false ) { + Kokkos::parallel_for( 100, KOKKOS_LAMBDA( const int & i ) { + b( i, 0 ) = a( i, 1 ) + a( i, 2 ); + b( i, 1 ) = a( i, 0 ) - a( i, 3 ); + b( i, 2 ) = a( i, 4 ) + a( i, 0 ); + b( i, 3 ) = a( i, 2 ) - a( i, 1 ); + b( i, 4 ) = a( i, 3 ) + a( i, 4 ); }); - } else { - typedef Kokkos::TeamPolicy<DeviceType> policy_type ; - typedef typename policy_type::member_type team_member ; - - policy_type policy(25,Kokkos::AUTO); - - Kokkos::parallel_for(policy,KOKKOS_LAMBDA(const team_member & dev) { - const int begin = dev.league_rank() * 4 ; - const int end = begin + 4 ; - for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { - b(i,0) = a(i,1) + a(i,2); - b(i,1) = a(i,0) - a(i,3); - b(i,2) = a(i,4) + a(i,0); - b(i,3) = a(i,2) - a(i,1); - b(i,4) = a(i,3) + a(i,4); + } + else { + typedef Kokkos::TeamPolicy< DeviceType > policy_type; + typedef typename policy_type::member_type team_member; + + policy_type policy( 25, Kokkos::AUTO ); + + Kokkos::parallel_for( policy, KOKKOS_LAMBDA( const team_member & dev ) { + const int begin = dev.league_rank() * 4; + const int end = begin + 4; + for ( int i = begin + dev.team_rank(); i < end; i += dev.team_size() ) { + b( i, 0 ) = a( i, 1 ) + a( i, 2 ); + b( i, 1 ) = a( i, 0 ) - a( i, 3 ); + b( i, 2 ) = a( i, 4 ) + a( i, 0 ); + b( i, 3 ) = a( i, 2 ) - a( i, 1 ); + b( i, 4 ) = a( i, 3 ) + a( i, 4 ); } }); } - Kokkos::deep_copy(h_b,b); + Kokkos::deep_copy( h_b, b ); double result = 0; - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - result += h_b(i,j); + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + result += h_b( i, j ); } + } return result; } - #else -template<class DeviceType, bool PWRTest> +template< class DeviceType, bool PWRTest > double AddTestLambda() { - return AddTestFunctor<DeviceType,PWRTest>(); + return AddTestFunctor< DeviceType, PWRTest >(); } #endif - -template<class DeviceType> -struct FunctorReduceTest{ - typedef Kokkos::View<double**,DeviceType> view_type; - view_type a_; +template< class DeviceType > +struct FunctorReduceTest { + typedef Kokkos::View< double**, DeviceType > view_type; typedef DeviceType execution_space; typedef double value_type; - FunctorReduceTest(view_type & a):a_(a) {} + typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member; + + view_type a_; + + FunctorReduceTest( view_type & a ) : a_( a ) {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i, value_type& sum) const { - sum += a_(i,1) + a_(i,2); - sum += a_(i,0) - a_(i,3); - sum += a_(i,4) + a_(i,0); - sum += a_(i,2) - a_(i,1); - sum += a_(i,3) + a_(i,4); + void operator() ( const int & i, value_type & sum ) const { + sum += a_( i, 1 ) + a_( i, 2 ); + sum += a_( i, 0 ) - a_( i, 3 ); + sum += a_( i, 4 ) + a_( i, 0 ); + sum += a_( i, 2 ) - a_( i, 1 ); + sum += a_( i, 3 ) + a_( i, 4 ); } - typedef typename Kokkos::TeamPolicy< execution_space >::member_type team_member ; - KOKKOS_INLINE_FUNCTION - void operator() (const team_member & dev, value_type& sum) const { - const int begin = dev.league_rank() * 4 ; - const int end = begin + 4 ; - for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { - sum += a_(i,1) + a_(i,2); - sum += a_(i,0) - a_(i,3); - sum += a_(i,4) + a_(i,0); - sum += a_(i,2) - a_(i,1); - sum += a_(i,3) + a_(i,4); + void operator() ( const team_member & dev, value_type & sum ) const { + const int begin = dev.league_rank() * 4; + const int end = begin + 4; + for ( int i = begin + dev.team_rank(); i < end; i += dev.team_size() ) { + sum += a_( i, 1 ) + a_( i, 2 ); + sum += a_( i, 0 ) - a_( i, 3 ); + sum += a_( i, 4 ) + a_( i, 0 ); + sum += a_( i, 2 ) - a_( i, 1 ); + sum += a_( i, 3 ) + a_( i, 4 ); } } + KOKKOS_INLINE_FUNCTION - void init(value_type& update) const {update = 0.0;} + void init( value_type & update ) const { update = 0.0; } + KOKKOS_INLINE_FUNCTION - void join(volatile value_type& update, volatile value_type const& input) const {update += input;} + void join( volatile value_type & update, volatile value_type const & input ) const { update += input; } }; -template<class DeviceType, bool PWRTest> +template< class DeviceType, bool PWRTest > double ReduceTestFunctor() { + typedef Kokkos::TeamPolicy< DeviceType > policy_type; + typedef Kokkos::View< double**, DeviceType > view_type; + typedef Kokkos::View< double, typename view_type::host_mirror_space, Kokkos::MemoryUnmanaged > unmanaged_result; - typedef Kokkos::TeamPolicy<DeviceType> policy_type ; - typedef Kokkos::View<double**,DeviceType> view_type ; - typedef Kokkos::View<double,typename view_type::host_mirror_space,Kokkos::MemoryUnmanaged> unmanaged_result ; - - view_type a("A",100,5); - typename view_type::HostMirror h_a = Kokkos::create_mirror_view(a); + view_type a( "A", 100, 5 ); + typename view_type::HostMirror h_a = Kokkos::create_mirror_view( a ); - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + h_a( i, j ) = 0.1 * i / ( 1.1 * j + 1.0 ) + 0.5 * j; + } } - Kokkos::deep_copy(a,h_a); + Kokkos::deep_copy( a, h_a ); double result = 0.0; - if(PWRTest==false) - Kokkos::parallel_reduce(100,FunctorReduceTest<DeviceType>(a), unmanaged_result( & result )); - else - Kokkos::parallel_reduce(policy_type(25,Kokkos::AUTO),FunctorReduceTest<DeviceType>(a), unmanaged_result( & result )); + if ( PWRTest == false ) { + Kokkos::parallel_reduce( 100, FunctorReduceTest< DeviceType >( a ), unmanaged_result( & result ) ); + } + else { + Kokkos::parallel_reduce( policy_type( 25, Kokkos::AUTO ), FunctorReduceTest< DeviceType >( a ), unmanaged_result( & result ) ); + } return result; } -#if defined (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -template<class DeviceType, bool PWRTest> +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +template< class DeviceType, bool PWRTest > double ReduceTestLambda() { + typedef Kokkos::TeamPolicy< DeviceType > policy_type; + typedef Kokkos::View< double**, DeviceType > view_type; + typedef Kokkos::View< double, typename view_type::host_mirror_space, Kokkos::MemoryUnmanaged > unmanaged_result; - typedef Kokkos::TeamPolicy<DeviceType> policy_type ; - typedef Kokkos::View<double**,DeviceType> view_type ; - typedef Kokkos::View<double,typename view_type::host_mirror_space,Kokkos::MemoryUnmanaged> unmanaged_result ; - - view_type a("A",100,5); - typename view_type::HostMirror h_a = Kokkos::create_mirror_view(a); + view_type a( "A", 100, 5 ); + typename view_type::HostMirror h_a = Kokkos::create_mirror_view( a ); - for(int i=0;i<100;i++) { - for(int j=0;j<5;j++) - h_a(i,j) = 0.1*i/(1.1*j+1.0) + 0.5*j; + for ( int i = 0; i < 100; i++ ) { + for ( int j = 0; j < 5; j++ ) { + h_a( i, j ) = 0.1 * i / ( 1.1 * j + 1.0 ) + 0.5 * j; + } } - Kokkos::deep_copy(a,h_a); + Kokkos::deep_copy( a, h_a ); double result = 0.0; - if(PWRTest==false) { - Kokkos::parallel_reduce(100,KOKKOS_LAMBDA(const int& i, double& sum) { - sum += a(i,1) + a(i,2); - sum += a(i,0) - a(i,3); - sum += a(i,4) + a(i,0); - sum += a(i,2) - a(i,1); - sum += a(i,3) + a(i,4); + if ( PWRTest == false ) { + Kokkos::parallel_reduce( 100, KOKKOS_LAMBDA( const int & i, double & sum ) { + sum += a( i, 1 ) + a( i, 2 ); + sum += a( i, 0 ) - a( i, 3 ); + sum += a( i, 4 ) + a( i, 0 ); + sum += a( i, 2 ) - a( i, 1 ); + sum += a( i, 3 ) + a( i, 4 ); }, unmanaged_result( & result ) ); - } else { - typedef typename policy_type::member_type team_member ; - Kokkos::parallel_reduce(policy_type(25,Kokkos::AUTO),KOKKOS_LAMBDA(const team_member & dev, double& sum) { - const int begin = dev.league_rank() * 4 ; - const int end = begin + 4 ; - for ( int i = begin + dev.team_rank() ; i < end ; i += dev.team_size() ) { - sum += a(i,1) + a(i,2); - sum += a(i,0) - a(i,3); - sum += a(i,4) + a(i,0); - sum += a(i,2) - a(i,1); - sum += a(i,3) + a(i,4); + } + else { + typedef typename policy_type::member_type team_member; + Kokkos::parallel_reduce( policy_type( 25, Kokkos::AUTO ), KOKKOS_LAMBDA( const team_member & dev, double & sum ) { + const int begin = dev.league_rank() * 4; + const int end = begin + 4; + for ( int i = begin + dev.team_rank(); i < end; i += dev.team_size() ) { + sum += a( i, 1 ) + a( i, 2 ); + sum += a( i, 0 ) - a( i, 3 ); + sum += a( i, 4 ) + a( i, 0 ); + sum += a( i, 2 ) - a( i, 1 ); + sum += a( i, 3 ) + a( i, 4 ); } }, unmanaged_result( & result ) ); } return result; } - #else -template<class DeviceType, bool PWRTest> +template< class DeviceType, bool PWRTest > double ReduceTestLambda() { - return ReduceTestFunctor<DeviceType,PWRTest>(); + return ReduceTestFunctor< DeviceType, PWRTest >(); } #endif -template<class DeviceType> -double TestVariantLambda(int test) { - switch (test) { - case 1: return AddTestLambda<DeviceType,false>(); - case 2: return AddTestLambda<DeviceType,true>(); - case 3: return ReduceTestLambda<DeviceType,false>(); - case 4: return ReduceTestLambda<DeviceType,true>(); +template< class DeviceType > +double TestVariantLambda( int test ) { + switch ( test ) { + case 1: return AddTestLambda< DeviceType, false >(); + case 2: return AddTestLambda< DeviceType, true >(); + case 3: return ReduceTestLambda< DeviceType, false >(); + case 4: return ReduceTestLambda< DeviceType, true >(); } + return 0; } - -template<class DeviceType> -double TestVariantFunctor(int test) { - switch (test) { - case 1: return AddTestFunctor<DeviceType,false>(); - case 2: return AddTestFunctor<DeviceType,true>(); - case 3: return ReduceTestFunctor<DeviceType,false>(); - case 4: return ReduceTestFunctor<DeviceType,true>(); +template< class DeviceType > +double TestVariantFunctor( int test ) { + switch ( test ) { + case 1: return AddTestFunctor< DeviceType, false >(); + case 2: return AddTestFunctor< DeviceType, true >(); + case 3: return ReduceTestFunctor< DeviceType, false >(); + case 4: return ReduceTestFunctor< DeviceType, true >(); } + return 0; } -template<class DeviceType> -bool Test(int test) { - +template< class DeviceType > +bool Test( int test ) { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - double res_functor = TestVariantFunctor<DeviceType>(test); - double res_lambda = TestVariantLambda<DeviceType>(test); + double res_functor = TestVariantFunctor< DeviceType >( test ); + double res_lambda = TestVariantLambda< DeviceType >( test ); - char testnames[5][256] = {" " - ,"AddTest","AddTest TeamPolicy" - ,"ReduceTest","ReduceTest TeamPolicy" + char testnames[5][256] = { " " + , "AddTest", "AddTest TeamPolicy" + , "ReduceTest", "ReduceTest TeamPolicy" }; bool passed = true; @@ -322,13 +333,13 @@ bool Test(int test) { std::cout << "CXX11 ( test = '" << testnames[test] << "' FAILED : " << res_functor << " != " << res_lambda - << std::endl ; + << std::endl; } - return passed ; + return passed; #else return true; #endif } -} +} // namespace TestCXX11 diff --git a/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp b/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp index 359e17a44f1642d630b97987f8d049fc3217a9fb..b53b42b8e05bc906c17f2ad59bdf1ebb9fd62ef7 100644 --- a/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp +++ b/lib/kokkos/core/unit_test/TestCXX11Deduction.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,10 +36,11 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ + #include <Kokkos_Core.hpp> #ifndef TESTCXX11DEDUCTION_HPP @@ -52,43 +53,40 @@ struct TestReductionDeductionTagB {}; template < class ExecSpace > struct TestReductionDeductionFunctor { - // KOKKOS_INLINE_FUNCTION - // void operator()( long i , long & value ) const - // { value += i + 1 ; } + // void operator()( long i, long & value ) const + // { value += i + 1; } KOKKOS_INLINE_FUNCTION - void operator()( TestReductionDeductionTagA , long i , long & value ) const + void operator()( TestReductionDeductionTagA, long i, long & value ) const { value += ( 2 * i + 1 ) + ( 2 * i + 2 ); } KOKKOS_INLINE_FUNCTION - void operator()( const TestReductionDeductionTagB & , const long i , long & value ) const - { value += ( 3 * i + 1 ) + ( 3 * i + 2 ) + ( 3 * i + 3 ) ; } - + void operator()( const TestReductionDeductionTagB &, const long i, long & value ) const + { value += ( 3 * i + 1 ) + ( 3 * i + 2 ) + ( 3 * i + 3 ); } }; template< class ExecSpace > void test_reduction_deduction() { - typedef TestReductionDeductionFunctor< ExecSpace > Functor ; + typedef TestReductionDeductionFunctor< ExecSpace > Functor; - const long N = 50 ; - // const long answer = N % 2 ? ( N * ((N+1)/2 )) : ( (N/2) * (N+1) ); - const long answerA = N % 2 ? ( (2*N) * (((2*N)+1)/2 )) : ( ((2*N)/2) * ((2*N)+1) ); - const long answerB = N % 2 ? ( (3*N) * (((3*N)+1)/2 )) : ( ((3*N)/2) * ((3*N)+1) ); - long result = 0 ; + const long N = 50; + // const long answer = N % 2 ? ( N * ( ( N + 1 ) / 2 ) ) : ( ( N / 2 ) * ( N + 1 ) ); + const long answerA = N % 2 ? ( ( 2 * N ) * ( ( ( 2 * N ) + 1 ) / 2 ) ) : ( ( ( 2 * N ) / 2 ) * ( ( 2 * N ) + 1 ) ); + const long answerB = N % 2 ? ( ( 3 * N ) * ( ( ( 3 * N ) + 1 ) / 2 ) ) : ( ( ( 3 * N ) / 2 ) * ( ( 3 * N ) + 1 ) ); + long result = 0; - // Kokkos::parallel_reduce( Kokkos::RangePolicy<ExecSpace>(0,N) , Functor() , result ); - // ASSERT_EQ( answer , result ); - - Kokkos::parallel_reduce( Kokkos::RangePolicy<ExecSpace,TestReductionDeductionTagA>(0,N) , Functor() , result ); - ASSERT_EQ( answerA , result ); - - Kokkos::parallel_reduce( Kokkos::RangePolicy<ExecSpace,TestReductionDeductionTagB>(0,N) , Functor() , result ); - ASSERT_EQ( answerB , result ); -} + // Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), Functor(), result ); + // ASSERT_EQ( answer, result ); + + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TestReductionDeductionTagA >( 0, N ), Functor(), result ); + ASSERT_EQ( answerA, result ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TestReductionDeductionTagB >( 0, N ), Functor(), result ); + ASSERT_EQ( answerB, result ); } -#endif +} // namespace TestCXX11 +#endif diff --git a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp index 5add656a4d7aaa7b70bc247a9ed3af1599e27211..45554383446ec13794f9e22bb0819477a7bdb278 100644 --- a/lib/kokkos/core/unit_test/TestCompilerMacros.hpp +++ b/lib/kokkos/core/unit_test/TestCompilerMacros.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -47,17 +47,17 @@ namespace TestCompilerMacros { -template<class DEVICE_TYPE> +template< class DEVICE_TYPE > struct AddFunctor { typedef DEVICE_TYPE execution_space; - typedef typename Kokkos::View<int**,execution_space> type; - type a,b; + typedef typename Kokkos::View< int**, execution_space > type; + type a, b; int length; - AddFunctor(type a_, type b_):a(a_),b(b_),length(a.dimension_1()) {} + AddFunctor( type a_, type b_ ) : a( a_ ), b( b_ ), length( a.dimension_1() ) {} KOKKOS_INLINE_FUNCTION - void operator()(int i) const { + void operator()( int i ) const { #ifdef KOKKOS_ENABLE_PRAGMA_UNROLL #pragma unroll #endif @@ -75,21 +75,23 @@ struct AddFunctor { #pragma simd #endif #endif - for(int j=0;j<length;j++) - a(i,j) += b(i,j); + for ( int j = 0; j < length; j++ ) { + a( i, j ) += b( i, j ); + } } }; -template<class DeviceType> +template< class DeviceType > bool Test() { - typedef typename Kokkos::View<int**,DeviceType> type; - type a("A",1024,128); - type b("B",1024,128); + typedef typename Kokkos::View< int**, DeviceType > type; + type a( "A", 1024, 128 ); + type b( "B", 1024, 128 ); - AddFunctor<DeviceType> f(a,b); - Kokkos::parallel_for(1024,f); + AddFunctor< DeviceType > f( a, b ); + Kokkos::parallel_for( 1024, f ); DeviceType::fence(); + return true; } -} +} // namespace TestCompilerMacros diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp index 7e08f67e69721dc803f1ea4a23cbe3328af391dc..f85a35c096516fe77c39cfaaa1778a9d5bb895ef 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp @@ -45,13 +45,10 @@ #include <Kokkos_Core.hpp> -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) #include <TestAtomic.hpp> - #include <TestViewAPI.hpp> - #include <TestReduce.hpp> #include <TestScan.hpp> #include <TestTeam.hpp> @@ -78,24 +75,25 @@ protected: TEST_F( defaultdevicetype, host_space_access ) { - typedef Kokkos::HostSpace::execution_space host_exec_space ; - typedef Kokkos::Device< host_exec_space , Kokkos::HostSpace > device_space ; - typedef Kokkos::Impl::HostMirror< Kokkos::DefaultExecutionSpace >::Space mirror_space ; + typedef Kokkos::HostSpace::execution_space host_exec_space; + typedef Kokkos::Device< host_exec_space, Kokkos::HostSpace > device_space; + typedef Kokkos::Impl::HostMirror< Kokkos::DefaultExecutionSpace >::Space mirror_space; static_assert( - Kokkos::Impl::SpaceAccessibility< host_exec_space , Kokkos::HostSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< host_exec_space, Kokkos::HostSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< device_space , Kokkos::HostSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< device_space, Kokkos::HostSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< mirror_space , Kokkos::HostSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< mirror_space, Kokkos::HostSpace >::accessible, "" ); } -TEST_F( defaultdevicetype, view_api) { - TestViewAPI< double , Kokkos::DefaultExecutionSpace >(); +TEST_F( defaultdevicetype, view_api ) +{ + TestViewAPI< double, Kokkos::DefaultExecutionSpace >(); } -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp index 7778efde301bb9fd8856c9743bfcaaea2d7b3095..401da58a5838d7cab5adaf38a00d4231f51721d2 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,376 +44,425 @@ #include <gtest/gtest.h> #include <Kokkos_Core.hpp> + #ifdef KOKKOS_ENABLE_OPENMP #include <omp.h> #endif -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) namespace Test { namespace Impl { - char** init_kokkos_args(bool do_threads,bool do_numa,bool do_device,bool do_other, int& nargs, Kokkos::InitArguments& init_args) { - nargs = (do_threads?1:0) + - (do_numa?1:0) + - (do_device?1:0) + - (do_other?4:0); - char** args_kokkos = new char*[nargs]; - for(int i = 0; i < nargs; i++) - args_kokkos[i] = new char[20]; +char** init_kokkos_args( bool do_threads, bool do_numa, bool do_device, bool do_other, int & nargs, Kokkos::InitArguments & init_args ) { + nargs = ( do_threads ? 1 : 0 ) + + ( do_numa ? 1 : 0 ) + + ( do_device ? 1 : 0 ) + + ( do_other ? 4 : 0 ); - int threads_idx = do_other?1:0; - int numa_idx = (do_other?3:0) + (do_threads?1:0); - int device_idx = (do_other?3:0) + (do_threads?1:0) + (do_numa?1:0); + char** args_kokkos = new char*[nargs]; + for ( int i = 0; i < nargs; i++ ) { + args_kokkos[i] = new char[20]; + } + int threads_idx = do_other ? 1 : 0; + int numa_idx = ( do_other ? 3 : 0 ) + ( do_threads ? 1 : 0 ); + int device_idx = ( do_other ? 3 : 0 ) + ( do_threads ? 1 : 0 ) + ( do_numa ? 1 : 0 ); - if(do_threads) { - int nthreads = 3; + if ( do_threads ) { + int nthreads = 3; #ifdef KOKKOS_ENABLE_OPENMP - if(omp_get_max_threads() < 3) - nthreads = omp_get_max_threads(); + if ( omp_get_max_threads() < 3 ) + nthreads = omp_get_max_threads(); #endif - if(Kokkos::hwloc::available()) { - if(Kokkos::hwloc::get_available_threads_per_core()<3) - nthreads = Kokkos::hwloc::get_available_threads_per_core() - * Kokkos::hwloc::get_available_numa_count(); - } - -#ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || - std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { - nthreads = 1; - } -#endif - init_args.num_threads = nthreads; - sprintf(args_kokkos[threads_idx],"--threads=%i",nthreads); + if ( Kokkos::hwloc::available() ) { + if ( Kokkos::hwloc::get_available_threads_per_core() < 3 ) + nthreads = Kokkos::hwloc::get_available_threads_per_core() + * Kokkos::hwloc::get_available_numa_count(); } - if(do_numa) { - int numa = 1; - if(Kokkos::hwloc::available()) - numa = Kokkos::hwloc::get_available_numa_count(); #ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || - std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { - numa = 1; - } -#endif - - init_args.num_numa = numa; - sprintf(args_kokkos[numa_idx],"--numa=%i",numa); + if ( std::is_same< Kokkos::Serial, Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial, Kokkos::DefaultHostExecutionSpace >::value ) { + nthreads = 1; } +#endif - if(do_device) { + init_args.num_threads = nthreads; + sprintf( args_kokkos[threads_idx], "--threads=%i", nthreads ); + } - init_args.device_id = 0; - sprintf(args_kokkos[device_idx],"--device=%i",0); + if ( do_numa ) { + int numa = 1; + if ( Kokkos::hwloc::available() ) { + numa = Kokkos::hwloc::get_available_numa_count(); } - if(do_other) { - sprintf(args_kokkos[0],"--dummyarg=1"); - sprintf(args_kokkos[threads_idx+(do_threads?1:0)],"--dummy2arg"); - sprintf(args_kokkos[threads_idx+(do_threads?1:0)+1],"dummy3arg"); - sprintf(args_kokkos[device_idx+(do_device?1:0)],"dummy4arg=1"); +#ifdef KOKKOS_ENABLE_SERIAL + if ( std::is_same< Kokkos::Serial, Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial, Kokkos::DefaultHostExecutionSpace >::value ) { + numa = 1; } +#endif + init_args.num_numa = numa; + sprintf( args_kokkos[numa_idx], "--numa=%i", numa ); + } - return args_kokkos; + if ( do_device ) { + init_args.device_id = 0; + sprintf( args_kokkos[device_idx], "--device=%i", 0 ); } - Kokkos::InitArguments init_initstruct(bool do_threads, bool do_numa, bool do_device) { - Kokkos::InitArguments args; + if ( do_other ) { + sprintf( args_kokkos[0], "--dummyarg=1" ); + sprintf( args_kokkos[ threads_idx + ( do_threads ? 1 : 0 ) ], "--dummy2arg" ); + sprintf( args_kokkos[ threads_idx + ( do_threads ? 1 : 0 ) + 1 ], "dummy3arg" ); + sprintf( args_kokkos[ device_idx + ( do_device ? 1 : 0 ) ], "dummy4arg=1" ); + } + + return args_kokkos; +} + +Kokkos::InitArguments init_initstruct( bool do_threads, bool do_numa, bool do_device ) { + Kokkos::InitArguments args; - if(do_threads) { - int nthreads = 3; + if ( do_threads ) { + int nthreads = 3; #ifdef KOKKOS_ENABLE_OPENMP - if(omp_get_max_threads() < 3) - nthreads = omp_get_max_threads(); + if ( omp_get_max_threads() < 3 ) { + nthreads = omp_get_max_threads(); + } #endif - if(Kokkos::hwloc::available()) { - if(Kokkos::hwloc::get_available_threads_per_core()<3) - nthreads = Kokkos::hwloc::get_available_threads_per_core() - * Kokkos::hwloc::get_available_numa_count(); + if ( Kokkos::hwloc::available() ) { + if ( Kokkos::hwloc::get_available_threads_per_core() < 3 ) { + nthreads = Kokkos::hwloc::get_available_threads_per_core() + * Kokkos::hwloc::get_available_numa_count(); } + } + #ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || - std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { - nthreads = 1; - } + if ( std::is_same< Kokkos::Serial, Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial, Kokkos::DefaultHostExecutionSpace >::value ) { + nthreads = 1; + } #endif - args.num_threads = nthreads; + args.num_threads = nthreads; + } + + if ( do_numa ) { + int numa = 1; + if ( Kokkos::hwloc::available() ) { + numa = Kokkos::hwloc::get_available_numa_count(); } - if(do_numa) { - int numa = 1; - if(Kokkos::hwloc::available()) - numa = Kokkos::hwloc::get_available_numa_count(); #ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same<Kokkos::Serial,Kokkos::DefaultExecutionSpace>::value || - std::is_same<Kokkos::Serial,Kokkos::DefaultHostExecutionSpace>::value ) { - numa = 1; - } -#endif - args.num_numa = numa; + if ( std::is_same< Kokkos::Serial, Kokkos::DefaultExecutionSpace >::value || + std::is_same< Kokkos::Serial, Kokkos::DefaultHostExecutionSpace >::value ) { + numa = 1; } +#endif - if(do_device) { - args.device_id = 0; - } + args.num_numa = numa; + } - return args; + if ( do_device ) { + args.device_id = 0; } - void check_correct_initialization(const Kokkos::InitArguments& argstruct) { - ASSERT_EQ( Kokkos::DefaultExecutionSpace::is_initialized(), 1); - ASSERT_EQ( Kokkos::HostSpace::execution_space::is_initialized(), 1); - - //Figure out the number of threads the HostSpace ExecutionSpace should have initialized to - int expected_nthreads = argstruct.num_threads; - if(expected_nthreads<1) { - if(Kokkos::hwloc::available()) { - expected_nthreads = Kokkos::hwloc::get_available_numa_count() - * Kokkos::hwloc::get_available_cores_per_numa() - * Kokkos::hwloc::get_available_threads_per_core(); - } else { - #ifdef KOKKOS_ENABLE_OPENMP - if(std::is_same<Kokkos::HostSpace::execution_space,Kokkos::OpenMP>::value) { - expected_nthreads = omp_get_max_threads(); - } else - #endif - expected_nthreads = 1; + return args; +} + +void check_correct_initialization( const Kokkos::InitArguments & argstruct ) { + ASSERT_EQ( Kokkos::DefaultExecutionSpace::is_initialized(), 1 ); + ASSERT_EQ( Kokkos::HostSpace::execution_space::is_initialized(), 1 ); + + // Figure out the number of threads the HostSpace ExecutionSpace should have initialized to. + int expected_nthreads = argstruct.num_threads; + if ( expected_nthreads < 1 ) { + if ( Kokkos::hwloc::available() ) { + expected_nthreads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + * Kokkos::hwloc::get_available_threads_per_core(); + } + else { +#ifdef KOKKOS_ENABLE_OPENMP + if ( std::is_same< Kokkos::HostSpace::execution_space, Kokkos::OpenMP >::value ) { + expected_nthreads = omp_get_max_threads(); } - #ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value || - std::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value ) + else +#endif expected_nthreads = 1; - #endif } - int expected_numa = argstruct.num_numa; - if(expected_numa<1) { - if(Kokkos::hwloc::available()) { - expected_numa = Kokkos::hwloc::get_available_numa_count(); - } else { - expected_numa = 1; - } - #ifdef KOKKOS_ENABLE_SERIAL - if(std::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Serial>::value || - std::is_same<Kokkos::DefaultHostExecutionSpace,Kokkos::Serial>::value ) - expected_numa = 1; - #endif +#ifdef KOKKOS_ENABLE_SERIAL + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Serial >::value || + std::is_same< Kokkos::DefaultHostExecutionSpace, Kokkos::Serial >::value ) { + expected_nthreads = 1; } - ASSERT_EQ(Kokkos::HostSpace::execution_space::thread_pool_size(),expected_nthreads); +#endif + } -#ifdef KOKKOS_ENABLE_CUDA - if(std::is_same<Kokkos::DefaultExecutionSpace,Kokkos::Cuda>::value) { - int device; - cudaGetDevice( &device ); - int expected_device = argstruct.device_id; - if(argstruct.device_id<0) { - expected_device = 0; - } - ASSERT_EQ(expected_device,device); + int expected_numa = argstruct.num_numa; + + if ( expected_numa < 1 ) { + if ( Kokkos::hwloc::available() ) { + expected_numa = Kokkos::hwloc::get_available_numa_count(); + } + else { + expected_numa = 1; } + +#ifdef KOKKOS_ENABLE_SERIAL + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Serial >::value || + std::is_same< Kokkos::DefaultHostExecutionSpace, Kokkos::Serial >::value ) + expected_numa = 1; #endif } - //ToDo: Add check whether correct number of threads are actually started - void test_no_arguments() { - Kokkos::initialize(); - check_correct_initialization(Kokkos::InitArguments()); - Kokkos::finalize(); - } + ASSERT_EQ( Kokkos::HostSpace::execution_space::thread_pool_size(), expected_nthreads ); - void test_commandline_args(int nargs, char** args, const Kokkos::InitArguments& argstruct) { - Kokkos::initialize(nargs,args); - check_correct_initialization(argstruct); - Kokkos::finalize(); - } - void test_initstruct_args(const Kokkos::InitArguments& args) { - Kokkos::initialize(args); - check_correct_initialization(args); - Kokkos::finalize(); +#ifdef KOKKOS_ENABLE_CUDA + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Cuda >::value ) { + int device; + cudaGetDevice( &device ); + + int expected_device = argstruct.device_id; + if ( argstruct.device_id < 0 ) { + expected_device = 0; + } + + ASSERT_EQ( expected_device, device ); } +#endif +} + +// TODO: Add check whether correct number of threads are actually started. +void test_no_arguments() { + Kokkos::initialize(); + check_correct_initialization( Kokkos::InitArguments() ); + Kokkos::finalize(); } +void test_commandline_args( int nargs, char** args, const Kokkos::InitArguments & argstruct ) { + Kokkos::initialize( nargs, args ); + check_correct_initialization( argstruct ); + Kokkos::finalize(); +} + +void test_initstruct_args( const Kokkos::InitArguments & args ) { + Kokkos::initialize( args ); + check_correct_initialization( args ); + Kokkos::finalize(); +} + +} // namespace Impl + class defaultdevicetypeinit : public ::testing::Test { protected: - static void SetUpTestCase() - { - } + static void SetUpTestCase() {} - static void TearDownTestCase() - { - } + static void TearDownTestCase() {} }; #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_01 -TEST_F( defaultdevicetypeinit, no_args) { +TEST_F( defaultdevicetypeinit, no_args ) +{ Impl::test_no_arguments(); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_02 -TEST_F( defaultdevicetypeinit, commandline_args_empty) { +TEST_F( defaultdevicetypeinit, commandline_args_empty ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(false,false,false,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( false, false, false, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_03 -TEST_F( defaultdevicetypeinit, commandline_args_other) { +TEST_F( defaultdevicetypeinit, commandline_args_other ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(false,false,false,true,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( false, false, false, true, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_04 -TEST_F( defaultdevicetypeinit, commandline_args_nthreads) { +TEST_F( defaultdevicetypeinit, commandline_args_nthreads ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(true,false,false,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( true, false, false, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_05 -TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa) { +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(true,true,false,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( true, true, false, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_06 -TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa_device) { +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa_device ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(true,true,true,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( true, true, true, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_07 -TEST_F( defaultdevicetypeinit, commandline_args_nthreads_device) { +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_device ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(true,false,true,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( true, false, true, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_08 -TEST_F( defaultdevicetypeinit, commandline_args_numa_device) { +TEST_F( defaultdevicetypeinit, commandline_args_numa_device ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(false,true,true,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( false, true, true, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_09 -TEST_F( defaultdevicetypeinit, commandline_args_device) { +TEST_F( defaultdevicetypeinit, commandline_args_device ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(false,false,true,false,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( false, false, true, false, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_10 -TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa_device_other) { +TEST_F( defaultdevicetypeinit, commandline_args_nthreads_numa_device_other ) +{ Kokkos::InitArguments argstruct; int nargs = 0; - char** args = Impl::init_kokkos_args(true,true,true,true,nargs, argstruct); - Impl::test_commandline_args(nargs,args,argstruct); - for(int i = 0; i < nargs; i++) + char** args = Impl::init_kokkos_args( true, true, true, true, nargs, argstruct ); + Impl::test_commandline_args( nargs, args, argstruct ); + + for ( int i = 0; i < nargs; i++ ) { delete [] args[i]; + } delete [] args; } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_11 -TEST_F( defaultdevicetypeinit, initstruct_default) { +TEST_F( defaultdevicetypeinit, initstruct_default ) +{ Kokkos::InitArguments args; - Impl::test_initstruct_args(args); + Impl::test_initstruct_args( args ); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_12 -TEST_F( defaultdevicetypeinit, initstruct_nthreads) { - Kokkos::InitArguments args = Impl::init_initstruct(true,false,false); - Impl::test_initstruct_args(args); +TEST_F( defaultdevicetypeinit, initstruct_nthreads ) +{ + Kokkos::InitArguments args = Impl::init_initstruct( true, false, false ); + Impl::test_initstruct_args( args ); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_13 -TEST_F( defaultdevicetypeinit, initstruct_nthreads_numa) { - Kokkos::InitArguments args = Impl::init_initstruct(true,true,false); - Impl::test_initstruct_args(args); +TEST_F( defaultdevicetypeinit, initstruct_nthreads_numa ) +{ + Kokkos::InitArguments args = Impl::init_initstruct( true, true, false ); + Impl::test_initstruct_args( args ); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_14 -TEST_F( defaultdevicetypeinit, initstruct_device) { - Kokkos::InitArguments args = Impl::init_initstruct(false,false,true); - Impl::test_initstruct_args(args); +TEST_F( defaultdevicetypeinit, initstruct_device ) +{ + Kokkos::InitArguments args = Impl::init_initstruct( false, false, true ); + Impl::test_initstruct_args( args ); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_15 -TEST_F( defaultdevicetypeinit, initstruct_nthreads_device) { - Kokkos::InitArguments args = Impl::init_initstruct(true,false,true); - Impl::test_initstruct_args(args); +TEST_F( defaultdevicetypeinit, initstruct_nthreads_device ) +{ + Kokkos::InitArguments args = Impl::init_initstruct( true, false, true ); + Impl::test_initstruct_args( args ); } #endif #ifdef KOKKOS_DEFAULTDEVICETYPE_INIT_TEST_16 -TEST_F( defaultdevicetypeinit, initstruct_nthreads_numa_device) { - Kokkos::InitArguments args = Impl::init_initstruct(true,true,true); - Impl::test_initstruct_args(args); +TEST_F( defaultdevicetypeinit, initstruct_nthreads_numa_device ) +{ + Kokkos::InitArguments args = Impl::init_initstruct( true, true, true ); + Impl::test_initstruct_args( args ); } #endif - -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp index dd148a062446f253bbcbc854b775eefd85debf79..4fdfa959107becae384ffa5c5e09d444e9299670 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,12 +45,10 @@ #include <Kokkos_Core.hpp> -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) #include <TestReduce.hpp> - namespace Test { class defaultdevicetype : public ::testing::Test { @@ -66,11 +64,11 @@ protected: } }; - -TEST_F( defaultdevicetype, reduce_instantiation_a) { +TEST_F( defaultdevicetype, reduce_instantiation_a ) +{ TestReduceCombinatoricalInstantiation<>::execute_a(); } -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp index c8edfdd5c39d575400408e8dbf5fb3cdd2005d66..841f34e03dd1f9900d304a8f6e889a5d30dc2a65 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_b.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,12 +45,10 @@ #include <Kokkos_Core.hpp> -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) #include <TestReduce.hpp> - namespace Test { class defaultdevicetype : public ::testing::Test { @@ -66,11 +64,11 @@ protected: } }; - -TEST_F( defaultdevicetype, reduce_instantiation_b) { +TEST_F( defaultdevicetype, reduce_instantiation_b ) +{ TestReduceCombinatoricalInstantiation<>::execute_b(); } -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp index 405d49a9b891619f3d823a5559e7751b8f3b885b..602863be3852a603d6c8e803752ad4a67709c0d5 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_c.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,12 +45,10 @@ #include <Kokkos_Core.hpp> -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) #include <TestReduce.hpp> - namespace Test { class defaultdevicetype : public ::testing::Test { @@ -66,11 +64,11 @@ protected: } }; - -TEST_F( defaultdevicetype, reduce_instantiation_c) { +TEST_F( defaultdevicetype, reduce_instantiation_c ) +{ TestReduceCombinatoricalInstantiation<>::execute_c(); } -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp b/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp index 426cc4f06c6157d37db40ea2feeceac242710ea0..5d3665b905434d1310dc51e430940b17690baac1 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp @@ -45,13 +45,10 @@ #include <Kokkos_Core.hpp> -#if !defined(KOKKOS_ENABLE_CUDA) || defined(__CUDACC__) -//---------------------------------------------------------------------------- +#if !defined( KOKKOS_ENABLE_CUDA ) || defined( __CUDACC__ ) #include <TestAtomic.hpp> - #include <TestViewAPI.hpp> - #include <TestReduce.hpp> #include <TestScan.hpp> #include <TestTeam.hpp> @@ -76,162 +73,165 @@ protected: } }; -TEST_F( defaultdevicetype, test_utilities) { +TEST_F( defaultdevicetype, test_utilities ) +{ test_utilities(); } -TEST_F( defaultdevicetype, long_reduce) { - TestReduce< long , Kokkos::DefaultExecutionSpace >( 100000 ); +TEST_F( defaultdevicetype, long_reduce ) +{ + TestReduce< long, Kokkos::DefaultExecutionSpace >( 100000 ); } -TEST_F( defaultdevicetype, double_reduce) { - TestReduce< double , Kokkos::DefaultExecutionSpace >( 100000 ); +TEST_F( defaultdevicetype, double_reduce ) +{ + TestReduce< double, Kokkos::DefaultExecutionSpace >( 100000 ); } -TEST_F( defaultdevicetype, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::DefaultExecutionSpace >( 100000 ); +TEST_F( defaultdevicetype, long_reduce_dynamic ) +{ + TestReduceDynamic< long, Kokkos::DefaultExecutionSpace >( 100000 ); } -TEST_F( defaultdevicetype, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::DefaultExecutionSpace >( 100000 ); +TEST_F( defaultdevicetype, double_reduce_dynamic ) +{ + TestReduceDynamic< double, Kokkos::DefaultExecutionSpace >( 100000 ); } -TEST_F( defaultdevicetype, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::DefaultExecutionSpace >( 100000 ); +TEST_F( defaultdevicetype, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, Kokkos::DefaultExecutionSpace >( 100000 ); } - -TEST_F( defaultdevicetype , atomics ) +TEST_F( defaultdevicetype, atomics ) { - const int loop_count = 1e4 ; + const int loop_count = 1e4; - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::DefaultExecutionSpace>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::DefaultExecutionSpace >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::DefaultExecutionSpace >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::DefaultExecutionSpace >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::DefaultExecutionSpace>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::DefaultExecutionSpace >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::DefaultExecutionSpace >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::DefaultExecutionSpace >( 100, 3 ) ) ); } -/*TEST_F( defaultdevicetype , view_remap ) +/*TEST_F( defaultdevicetype, view_remap ) { - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::DefaultExecutionSpace > output_type ; - - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::DefaultExecutionSpace > input_type ; - - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::DefaultExecutionSpace > diff_type ; - - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); - - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} - - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} -}*/ - -//---------------------------------------------------------------------------- + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::DefaultExecutionSpace > output_type; + + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::DefaultExecutionSpace > input_type; + + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::DefaultExecutionSpace > diff_type; + + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); + + int value = 0; + for ( size_t i3 = 0; i3 < N3; ++i3 ) { + for ( size_t i2 = 0; i2 < N2; ++i2 ) { + for ( size_t i1 = 0; i1 < N1; ++i1 ) { + for ( size_t i0 = 0; i0 < N0; ++i0 ) { + input( i0, i1, i2, i3 ) = ++value; + } + } + } + } + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + + value = 0; + for ( size_t i3 = 0; i3 < N3; ++i3 ) { + for ( size_t i2 = 0; i2 < N2; ++i2 ) { + for ( size_t i1 = 0; i1 < N1; ++i1 ) { + for ( size_t i0 = 0; i0 < N0; ++i0 ) { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } + } + } + } +}*/ -TEST_F( defaultdevicetype , view_aggregate ) +TEST_F( defaultdevicetype, view_aggregate ) { TestViewAggregate< Kokkos::DefaultExecutionSpace >(); } -//---------------------------------------------------------------------------- - -TEST_F( defaultdevicetype , scan ) +TEST_F( defaultdevicetype, scan ) { - TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1 , 1000 ); + TestScan< Kokkos::DefaultExecutionSpace >::test_range( 1, 1000 ); TestScan< Kokkos::DefaultExecutionSpace >( 1000000 ); TestScan< Kokkos::DefaultExecutionSpace >( 10000000 ); Kokkos::DefaultExecutionSpace::fence(); } - -//---------------------------------------------------------------------------- - -TEST_F( defaultdevicetype , compiler_macros ) +TEST_F( defaultdevicetype, compiler_macros ) { ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::DefaultExecutionSpace >() ) ); } - -//---------------------------------------------------------------------------- -TEST_F( defaultdevicetype , cxx11 ) +TEST_F( defaultdevicetype, cxx11 ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >(4) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::DefaultExecutionSpace >( 4 ) ) ); } -TEST_F( defaultdevicetype , team_vector ) +#if !defined(KOKKOS_CUDA_CLANG_WORKAROUND) && !defined(KOKKOS_ARCH_PASCAL) +TEST_F( defaultdevicetype, team_vector ) { - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >(5) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::DefaultExecutionSpace >( 5 ) ) ); } +#endif -TEST_F( defaultdevicetype , malloc ) +TEST_F( defaultdevicetype, malloc ) { - int* data = (int*) Kokkos::kokkos_malloc(100*sizeof(int)); - ASSERT_NO_THROW(data = (int*) Kokkos::kokkos_realloc(data,120*sizeof(int))); - Kokkos::kokkos_free(data); + int* data = (int*) Kokkos::kokkos_malloc( 100 * sizeof( int ) ); + ASSERT_NO_THROW( data = (int*) Kokkos::kokkos_realloc( data, 120 * sizeof( int ) ) ); + Kokkos::kokkos_free( data ); - int* data2 = (int*) Kokkos::kokkos_malloc(0); - ASSERT_TRUE(data2==NULL); - Kokkos::kokkos_free(data2); + int* data2 = (int*) Kokkos::kokkos_malloc( 0 ); + ASSERT_TRUE( data2 == NULL ); + Kokkos::kokkos_free( data2 ); } -} // namespace test +} // namespace Test #endif diff --git a/lib/kokkos/core/unit_test/TestHWLOC.cpp b/lib/kokkos/core/unit_test/TestHWLOC.cpp index 1637dec5de4ff762cfbd259ee47932b5e85eb4d0..d03d9b816f9c3ac3ee85b61886baa243e5160714 100644 --- a/lib/kokkos/core/unit_test/TestHWLOC.cpp +++ b/lib/kokkos/core/unit_test/TestHWLOC.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -44,26 +44,24 @@ #include <gtest/gtest.h> #include <iostream> + #include <Kokkos_hwloc.hpp> namespace Test { class hwloc : public ::testing::Test { protected: - static void SetUpTestCase() - {} + static void SetUpTestCase() {} - static void TearDownTestCase() - {} + static void TearDownTestCase() {} }; -TEST_F( hwloc, query) +TEST_F( hwloc, query ) { std::cout << " NUMA[" << Kokkos::hwloc::get_available_numa_count() << "]" << " CORE[" << Kokkos::hwloc::get_available_cores_per_numa() << "]" << " PU[" << Kokkos::hwloc::get_available_threads_per_core() << "]" - << std::endl ; -} - + << std::endl; } +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestMDRange.hpp b/lib/kokkos/core/unit_test/TestMDRange.hpp index 9894d1ce697c1f109163f7711e62f12cfceef703..1dc349cc1268e680aabc0859a771c7a786a388de 100644 --- a/lib/kokkos/core/unit_test/TestMDRange.hpp +++ b/lib/kokkos/core/unit_test/TestMDRange.hpp @@ -47,509 +47,1675 @@ #include <Kokkos_Core.hpp> -/*--------------------------------------------------------------------------*/ - namespace Test { + namespace { template <typename ExecSpace > struct TestMDRange_2D { + using DataType = int; + using ViewType = typename Kokkos::View< DataType**, ExecSpace >; + using HostViewType = typename ViewType::HostMirror; - using DataType = int ; - using ViewType = typename Kokkos::View< DataType** , ExecSpace > ; - using HostViewType = typename ViewType::HostMirror ; + ViewType input_view; - ViewType input_view ; + TestMDRange_2D( const DataType N0, const DataType N1 ) : input_view( "input_view", N0, N1 ) {} - TestMDRange_2D( const DataType N0, const DataType N1 ) : input_view("input_view", N0, N1) {} + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j ) const + { + input_view( i, j ) = 1; + } KOKKOS_INLINE_FUNCTION - void operator()( const int i , const int j ) const + void operator()( const int i, const int j, double &lsum ) const { - input_view(i,j) = 1; + lsum += input_view( i, j ) * 2; } + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j ) const + { + input_view( i, j ) = 3; + } - static void test_for2( const int64_t N0, const int64_t N1 ) + static void test_reduce2( const int N0, const int N1 ) { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Default, Iterate::Default>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 6 } } ); + + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Left, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 6 } } ); + + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Left, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 6 } } ); + + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 6 } } ); + + TestMDRange_2D functor( N0, N1 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 6 } } ); + + TestMDRange_2D functor( N0, N1 ); + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 ); + } + } // end test_reduce2 + + static void test_for2( const int N0, const int N1 ) + { using namespace Kokkos::Experimental; { - using range_type = MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType<int> >; - range_type range( {0,0}, {N0,N1} ); - TestMDRange_2D functor(N0,N1); + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType<int>, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - if ( h_view(i,j) != 1 ) { - ++counter; - } - }} - if ( counter != 0 ) - printf(" Errors in test_for2; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Default Layouts + InitTag op(): Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - if ( h_view(i,j) != 1 ) { - ++counter; - } - }} - if ( counter != 0 ) - printf(" Errors in test_for2; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Default Layouts + InitTag op(): Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Default, Iterate::Flat >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, InitTag > range_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - if ( h_view(i,j) != 1 ) { - ++counter; - } - }} - if ( counter != 0 ) - printf(" Errors in test_for2; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Default Layouts + InitTag op() + Default Tile: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Flat >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - if ( h_view(i,j) != 1 ) { - ++counter; - } - }} - if ( counter != 0 ) - printf(" Errors in test_for2; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "No info: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Left, Iterate::Flat >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Default, Iterate::Default>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 4, 4 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - if ( h_view(i,j) != 1 ) { - ++counter; - } - }} - if ( counter != 0 ) - printf(" Errors in test_for2; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "D D: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Left , Iterate::Left >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Left, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1}, {3,3} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - if ( h_view(i,j) != 1 ) { - ++counter; - } - }} - if ( counter != 0 ) - printf(" Errors in test_for2; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "L L: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Left , Iterate::Right >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Left, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1}, {7,7} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 7, 7 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - if ( h_view(i,j) != 1 ) { - ++counter; - } - }} - if ( counter != 0 ) - printf(" Errors in test_for2; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "L R: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Left >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1}, {16,16} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 16, 16 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - if ( h_view(i,j) != 1 ) { - ++counter; - } - }} - if ( counter != 0 ) - printf(" Errors in test_for2; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "R L: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Right >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Right, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0}, {N0,N1}, {5,16} ); - TestMDRange_2D functor(N0,N1); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 5, 16 } } ); + TestMDRange_2D functor( N0, N1 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - if ( h_view(i,j) != 1 ) { - ++counter; - } - }} - if ( counter != 0 ) - printf(" Errors in test_for2; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "R R: Errors in test_for2; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } - - } //end test_for2 -}; //MDRange_2D + } // end test_for2 +}; // MDRange_2D template <typename ExecSpace > struct TestMDRange_3D { + using DataType = int; + using ViewType = typename Kokkos::View< DataType***, ExecSpace >; + using HostViewType = typename ViewType::HostMirror; - using DataType = int ; - using ViewType = typename Kokkos::View< DataType*** , ExecSpace > ; - using HostViewType = typename ViewType::HostMirror ; + ViewType input_view; - ViewType input_view ; + TestMDRange_3D( const DataType N0, const DataType N1, const DataType N2 ) : input_view( "input_view", N0, N1, N2 ) {} - TestMDRange_3D( const DataType N0, const DataType N1, const DataType N2 ) : input_view("input_view", N0, N1, N2) {} + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k ) const + { + input_view( i, j, k ) = 1; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, double &lsum ) const + { + lsum += input_view( i, j, k ) * 2; + } + // tagged operators + struct InitTag {}; KOKKOS_INLINE_FUNCTION - void operator()( const int i , const int j , const int k ) const + void operator()( const InitTag &, const int i, const int j, const int k ) const { - input_view(i,j,k) = 1; + input_view( i, j, k ) = 3; } - static void test_for3( const int64_t N0, const int64_t N1, const int64_t N2 ) + static void test_reduce3( const int N0, const int N1, const int N2 ) { using namespace Kokkos::Experimental; { - using range_type = MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + double sum = 0.0; + md_parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + } + } // end test_reduce3 + + static void test_for3( const int N0, const int N1, const int N2 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3> > range_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } } ); + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + No Tile: Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType<int>, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + InitTag op(): Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Default, Iterate::Default>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 2 } } ); + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - for ( int k=0; k<N2; ++k ) { - if ( h_view(i,j,k) != 1 ) { - ++counter; - } - }}} - if ( counter != 0 ) - printf(" Errors in test_for3; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Default, Iterate::Default >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 5, 7 } } ); + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - for ( int k=0; k<N2; ++k ) { - if ( h_view(i,j,k) != 1 ) { - ++counter; - } - }}} - if ( counter != 0 ) - printf(" Errors in test_for3; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Flat, Iterate::Default>, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 8, 8, 8 } } ); + TestMDRange_3D functor( N0, N1, N2 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - for ( int k=0; k<N2; ++k ) { - if ( h_view(i,j,k) != 1 ) { - ++counter; - } - }}} - if ( counter != 0 ) - printf(" Errors in test_for3; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Flat, Iterate::Flat >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 2 } } ); + TestMDRange_3D functor( N0, N1, N2 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for3; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + } // end test_for3 +}; + +template <typename ExecSpace > +struct TestMDRange_4D { + using DataType = int; + using ViewType = typename Kokkos::View< DataType****, ExecSpace >; + using HostViewType = typename ViewType::HostMirror; + + ViewType input_view; + + TestMDRange_4D( const DataType N0, const DataType N1, const DataType N2, const DataType N3 ) : input_view( "input_view", N0, N1, N2, N3 ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l ) const + { + input_view( i, j, k, l ) = 1; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l, double &lsum ) const + { + lsum += input_view( i, j, k, l ) * 2; + } + + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k, const int l ) const + { + input_view( i, j, k, l ) = 3; + } + + static void test_for4( const int N0, const int N1, const int N2, const int N3 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4> > range_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } } ); + TestMDRange_4D functor( N0, N1, N2, N3 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - for ( int k=0; k<N2; ++k ) { - if ( h_view(i,j,k) != 1 ) { - ++counter; - } - }}} - if ( counter != 0 ) - printf(" Errors in test_for3; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + No Tile: Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Flat >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4>, Kokkos::IndexType<int>, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 11, 3, 3 } } ); + TestMDRange_4D functor( N0, N1, N2, N3 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - for ( int k=0; k<N2; ++k ) { - if ( h_view(i,j,k) != 1 ) { - ++counter; - } - }}} - if ( counter != 0 ) - printf(" Errors in test_for3; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf("Defaults +m_tile > m_upper dim2 InitTag op(): Errors in test_for4; mismatches = %d\n\n",counter); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Flat >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); - range_type range( {0,0,0}, {N0,N1,N2} ); - TestMDRange_3D functor(N0,N1,N2); + TestMDRange_4D functor( N0, N1, N2, N3 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - for ( int k=0; k<N2; ++k ) { - if ( h_view(i,j,k) != 1 ) { - ++counter; - } - }}} - if ( counter != 0 ) - printf(" Errors in test_for3; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Left >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Default, Iterate::Default>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2}, {2,4,2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - for ( int k=0; k<N2; ++k ) { - if ( h_view(i,j,k) != 1 ) { - ++counter; - } - }}} - if ( counter != 0 ) - printf(" Errors in test_for3; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Left, Iterate::Right >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Left, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); - range_type range( {0,0,0}, {N0,N1,N2}, {3,5,7} ); - TestMDRange_3D functor(N0,N1,N2); + TestMDRange_4D functor( N0, N1, N2, N3 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - for ( int k=0; k<N2; ++k ) { - if ( h_view(i,j,k) != 1 ) { - ++counter; - } - }}} - if ( counter != 0 ) - printf(" Errors in test_for3; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Left >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Left, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); - range_type range( {0,0,0}, {N0,N1,N2}, {8,8,8} ); - TestMDRange_3D functor(N0,N1,N2); + TestMDRange_4D functor( N0, N1, N2, N3 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - for ( int k=0; k<N2; ++k ) { - if ( h_view(i,j,k) != 1 ) { - ++counter; - } - }}} - if ( counter != 0 ) - printf(" Errors in test_for3; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } { - using range_type = MDRangePolicy< ExecSpace, Rank<3, Iterate::Right, Iterate::Right >, Kokkos::IndexType<int> >; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Right, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; - range_type range( {0,0,0}, {N0,N1,N2}, {2,4,2} ); - TestMDRange_3D functor(N0,N1,N2); + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); md_parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); - Kokkos::deep_copy( h_view , functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); int counter = 0; - for ( int i=0; i<N0; ++i ) { - for ( int j=0; j<N1; ++j ) { - for ( int k=0; k<N2; ++k ) { - if ( h_view(i,j,k) != 1 ) { - ++counter; - } - }}} - if ( counter != 0 ) - printf(" Errors in test_for3; mismatches = %d\n\n",counter); - ASSERT_EQ( counter , 0 ); + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); } - } //end test_for3 + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Right, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 4, 4, 4, 4 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for4; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + } // end test_for4 }; -} /* namespace */ -} /* namespace Test */ +template <typename ExecSpace > +struct TestMDRange_5D { + using DataType = int; + using ViewType = typename Kokkos::View< DataType*****, ExecSpace >; + using HostViewType = typename ViewType::HostMirror; + + ViewType input_view; + + TestMDRange_5D( const DataType N0, const DataType N1, const DataType N2, const DataType N3, const DataType N4 ) : input_view( "input_view", N0, N1, N2, N3, N4 ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l, const int m ) const + { + input_view( i, j, k, l, m ) = 1; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l, const int m, double &lsum ) const + { + lsum += input_view( i, j, k, l, m ) * 2; + } + + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k, const int l, const int m ) const + { + input_view( i, j, k, l, m ) = 3; + } + + static void test_for5( const int N0, const int N1, const int N2, const int N3, const int N4 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5> > range_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } } ); + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + No Tile: Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5>, Kokkos::IndexType<int>, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 7 } } ); + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + InitTag op(): Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Default, Iterate::Default>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Left, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Left, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Right, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Right, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 4, 4, 4, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for5; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + } +}; + +template <typename ExecSpace > +struct TestMDRange_6D { + using DataType = int; + using ViewType = typename Kokkos::View< DataType******, ExecSpace >; + using HostViewType = typename ViewType::HostMirror; + + ViewType input_view; + + TestMDRange_6D( const DataType N0, const DataType N1, const DataType N2, const DataType N3, const DataType N4, const DataType N5 ) : input_view( "input_view", N0, N1, N2, N3, N4, N5 ) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l, const int m, const int n ) const + { + input_view( i, j, k, l, m, n ) = 1; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, const int l, const int m, const int n, double &lsum ) const + { + lsum += input_view( i, j, k, l, m, n ) * 2; + } + + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k, const int l, const int m, const int n ) const + { + input_view( i, j, k, l, m, n ) = 3; + } + + static void test_for6( const int N0, const int N1, const int N2, const int N3, const int N4, const int N5 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6> > range_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } } ); + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + No Tile: Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6>, Kokkos::IndexType<int>, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 2, 3 } } ); //tile dims 3,3,3,3,3,3 more than cuda can handle with debugging + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + InitTag op(): Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Default, Iterate::Default>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Left, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Left, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Right, Iterate::Left>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Right, Iterate::Right>, Kokkos::IndexType<int> > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 4, 4, 4, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + md_parallel_for( range, functor ); + + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 1 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( " Errors in test_for6; mismatches = %d\n\n", counter ); + } + + ASSERT_EQ( counter, 0 ); + } + } +}; -/*--------------------------------------------------------------------------*/ +} // namespace +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestMemoryPool.hpp b/lib/kokkos/core/unit_test/TestMemoryPool.hpp index 868e64e9da5e46ee0d06f59736a0f4b20d576ee0..925f0e35ed6d12d3a822daa63421827fe636c86c 100644 --- a/lib/kokkos/core/unit_test/TestMemoryPool.hpp +++ b/lib/kokkos/core/unit_test/TestMemoryPool.hpp @@ -156,7 +156,7 @@ struct fill_memory { void operator()( size_type i ) const { if ( i % STRIDE == 0 ) { - *m_pointers[i / STRIDE].ptr = i / STRIDE ; + *m_pointers[i / STRIDE].ptr = i / STRIDE; } } }; @@ -493,12 +493,12 @@ T smallest_power2_ge( T val ) // Find the most significant nonzero bit. int first_nonzero_bit = Kokkos::Impl::bit_scan_reverse( val ); - // If val is an integral power of 2, ceil( log2(val) ) is equal to the + // If val is an integral power of 2, ceil( log2( val ) ) is equal to the // most significant nonzero bit. Otherwise, you need to add 1. int lg2_size = first_nonzero_bit + !Kokkos::Impl::is_integral_power_of_two( val ); - return T(1) << T(lg2_size); + return T( 1 ) << T( lg2_size ); } // This test makes allocation requests for multiple sizes and interleaves @@ -547,7 +547,7 @@ void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes, phase1_size = ( ( phase1_size + num_chunk_sizes - 1 ) / num_chunk_sizes ) * num_chunk_sizes; - // Make sure the phase 2 size is multiples of (2 * num_chunk_sizes). + // Make sure the phase 2 size is multiples of ( 2 * num_chunk_sizes ). phase2_size = ( ( phase2_size + 2 * num_chunk_sizes - 1 ) / ( 2 * num_chunk_sizes ) ) * 2 * num_chunk_sizes; @@ -567,7 +567,7 @@ void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes, // each chunk size. work_view phase1_work( "Phase 1 Work", phase1_size ); typename work_view::HostMirror host_phase1_work = - create_mirror_view(phase1_work); + create_mirror_view( phase1_work ); size_t inner_size = phase1_size / num_chunk_sizes; unsigned chunk_size = base_chunk_size; @@ -589,7 +589,7 @@ void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes, // deallocations with an equal number of allocations for each chunk size. work_view phase2_work( "Phase 2 Work", phase2_size ); typename work_view::HostMirror host_phase2_work = - create_mirror_view(phase2_work); + create_mirror_view( phase2_work ); inner_size = half_phase2_size / num_chunk_sizes; chunk_size = base_chunk_size; @@ -614,7 +614,7 @@ void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes, // Initialize the phase 3 work view with all deallocations. work_view phase3_work( "Phase 3 Work", phase3_size ); typename work_view::HostMirror host_phase3_work = - create_mirror_view(phase3_work); + create_mirror_view( phase3_work ); inner_size = phase3_size / num_chunk_sizes; diff --git a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp index 1bb45481c9b76d6dde29ff9e9d192d5ae4531829..6f2ca6a61c34b84f96cefd1195a6a11e2a6d32d1 100644 --- a/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp +++ b/lib/kokkos/core/unit_test/TestPolicyConstruction.hpp @@ -48,7 +48,7 @@ #include <sstream> #include <iostream> -struct SomeTag{}; +struct SomeTag {}; template< class ExecutionSpace > class TestRangePolicyConstruction { @@ -56,179 +56,194 @@ public: TestRangePolicyConstruction() { test_compile_time_parameters(); } + private: void test_compile_time_parameters() { { Kokkos::Impl::expand_variadic(); - Kokkos::Impl::expand_variadic(1,2,3); + Kokkos::Impl::expand_variadic( 1, 2, 3 ); } + { typedef Kokkos::RangePolicy<> policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Static> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Static> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::RangePolicy<ExecutionSpace> policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Static> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::RangePolicy< ExecutionSpace > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Static> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::RangePolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::RangePolicy< ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::RangePolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::RangePolicy< ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::RangePolicy<Kokkos::IndexType<long>, ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::RangePolicy< Kokkos::IndexType<long>, ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::RangePolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::RangePolicy< ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long>, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } + { - typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,ExecutionSpace,Kokkos::IndexType<long>,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::RangePolicy< Kokkos::Schedule<Kokkos::Dynamic>, ExecutionSpace, Kokkos::IndexType<long>, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } + { - typedef Kokkos::RangePolicy<SomeTag,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,ExecutionSpace > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::RangePolicy< SomeTag, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long>, ExecutionSpace > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } + { - typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::RangePolicy< Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::RangePolicy< Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::RangePolicy<Kokkos::IndexType<long>, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::RangePolicy< Kokkos::IndexType<long>, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::RangePolicy< Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long>, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } + { - typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::RangePolicy< Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long>, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } + { - typedef Kokkos::RangePolicy<SomeTag,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::RangePolicy< SomeTag, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } } }; @@ -240,258 +255,274 @@ public: test_compile_time_parameters(); test_run_time_parameters(); } + private: void test_compile_time_parameters() { { typedef Kokkos::TeamPolicy<> policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Static> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Static> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Static> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Static> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::TeamPolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::TeamPolicy< ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::TeamPolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::TeamPolicy< ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::TeamPolicy<Kokkos::IndexType<long>, ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::TeamPolicy< Kokkos::IndexType<long>, ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::TeamPolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::TeamPolicy< ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long>, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } + { - typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,ExecutionSpace,Kokkos::IndexType<long>,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::TeamPolicy< Kokkos::Schedule<Kokkos::Dynamic>, ExecutionSpace, Kokkos::IndexType<long>, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } + { - typedef Kokkos::TeamPolicy<SomeTag,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,ExecutionSpace > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,ExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::TeamPolicy< SomeTag, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long>, ExecutionSpace > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, ExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } + { - typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,typename execution_space::size_type >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::TeamPolicy< Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, typename execution_space::size_type >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::TeamPolicy< Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::TeamPolicy<Kokkos::IndexType<long>, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,void >::value)); + typedef Kokkos::TeamPolicy< Kokkos::IndexType<long>, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, void >::value ) ); } + { - typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::TeamPolicy< Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long>, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } + { - typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,SomeTag > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::TeamPolicy< Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long>, SomeTag > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } + { - typedef Kokkos::TeamPolicy<SomeTag,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > policy_t; - typedef typename policy_t::execution_space execution_space; - typedef typename policy_t::index_type index_type; - typedef typename policy_t::schedule_type schedule_type; - typedef typename policy_t::work_tag work_tag; - - ASSERT_TRUE((std::is_same<execution_space ,Kokkos::DefaultExecutionSpace >::value)); - ASSERT_TRUE((std::is_same<index_type ,long >::value)); - ASSERT_TRUE((std::is_same<schedule_type ,Kokkos::Schedule<Kokkos::Dynamic> >::value)); - ASSERT_TRUE((std::is_same<work_tag ,SomeTag >::value)); + typedef Kokkos::TeamPolicy< SomeTag, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long> > policy_t; + typedef typename policy_t::execution_space execution_space; + typedef typename policy_t::index_type index_type; + typedef typename policy_t::schedule_type schedule_type; + typedef typename policy_t::work_tag work_tag; + + ASSERT_TRUE( ( std::is_same< execution_space, Kokkos::DefaultExecutionSpace >::value ) ); + ASSERT_TRUE( ( std::is_same< index_type, long >::value ) ); + ASSERT_TRUE( ( std::is_same< schedule_type, Kokkos::Schedule<Kokkos::Dynamic> >::value ) ); + ASSERT_TRUE( ( std::is_same< work_tag, SomeTag >::value ) ); } } - template<class policy_t> + template< class policy_t > void test_run_time_parameters_type() { int league_size = 131; - int team_size = 4<policy_t::execution_space::concurrency()?4:policy_t::execution_space::concurrency(); + int team_size = 4 < policy_t::execution_space::concurrency() ? 4 : policy_t::execution_space::concurrency(); int chunk_size = 4; int per_team_scratch = 1024; int per_thread_scratch = 16; - int scratch_size = per_team_scratch + per_thread_scratch*team_size; - policy_t p1(league_size,team_size); - ASSERT_EQ (p1.league_size() , league_size); - ASSERT_EQ (p1.team_size() , team_size); - ASSERT_TRUE(p1.chunk_size() > 0); - ASSERT_EQ (p1.scratch_size(0), 0); - - policy_t p2 = p1.set_chunk_size(chunk_size); - ASSERT_EQ (p1.league_size() , league_size); - ASSERT_EQ (p1.team_size() , team_size); - ASSERT_TRUE(p1.chunk_size() > 0); - ASSERT_EQ (p1.scratch_size(0), 0); - - ASSERT_EQ (p2.league_size() , league_size); - ASSERT_EQ (p2.team_size() , team_size); - ASSERT_EQ (p2.chunk_size() , chunk_size); - ASSERT_EQ (p2.scratch_size(0), 0); - - policy_t p3 = p2.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch)); - ASSERT_EQ (p2.league_size() , league_size); - ASSERT_EQ (p2.team_size() , team_size); - ASSERT_EQ (p2.chunk_size() , chunk_size); - ASSERT_EQ (p2.scratch_size(0), 0); - ASSERT_EQ (p3.league_size() , league_size); - ASSERT_EQ (p3.team_size() , team_size); - ASSERT_EQ (p3.chunk_size() , chunk_size); - ASSERT_EQ (p3.scratch_size(0), per_team_scratch); - - policy_t p4 = p2.set_scratch_size(0,Kokkos::PerThread(per_thread_scratch)); - ASSERT_EQ (p2.league_size() , league_size); - ASSERT_EQ (p2.team_size() , team_size); - ASSERT_EQ (p2.chunk_size() , chunk_size); - ASSERT_EQ (p2.scratch_size(0), 0); - ASSERT_EQ (p4.league_size() , league_size); - ASSERT_EQ (p4.team_size() , team_size); - ASSERT_EQ (p4.chunk_size() , chunk_size); - ASSERT_EQ (p4.scratch_size(0), per_thread_scratch*team_size); - - policy_t p5 = p2.set_scratch_size(0,Kokkos::PerThread(per_thread_scratch),Kokkos::PerTeam(per_team_scratch)); - ASSERT_EQ (p2.league_size() , league_size); - ASSERT_EQ (p2.team_size() , team_size); - ASSERT_EQ (p2.chunk_size() , chunk_size); - ASSERT_EQ (p2.scratch_size(0), 0); - ASSERT_EQ (p5.league_size() , league_size); - ASSERT_EQ (p5.team_size() , team_size); - ASSERT_EQ (p5.chunk_size() , chunk_size); - ASSERT_EQ (p5.scratch_size(0), scratch_size); - - policy_t p6 = p2.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch),Kokkos::PerThread(per_thread_scratch)); - ASSERT_EQ (p2.league_size() , league_size); - ASSERT_EQ (p2.team_size() , team_size); - ASSERT_EQ (p2.chunk_size() , chunk_size); - ASSERT_EQ (p2.scratch_size(0), 0); - ASSERT_EQ (p6.league_size() , league_size); - ASSERT_EQ (p6.team_size() , team_size); - ASSERT_EQ (p6.chunk_size() , chunk_size); - ASSERT_EQ (p6.scratch_size(0), scratch_size); - - policy_t p7 = p3.set_scratch_size(0,Kokkos::PerTeam(per_team_scratch),Kokkos::PerThread(per_thread_scratch)); - ASSERT_EQ (p3.league_size() , league_size); - ASSERT_EQ (p3.team_size() , team_size); - ASSERT_EQ (p3.chunk_size() , chunk_size); - ASSERT_EQ (p3.scratch_size(0), per_team_scratch); - ASSERT_EQ (p7.league_size() , league_size); - ASSERT_EQ (p7.team_size() , team_size); - ASSERT_EQ (p7.chunk_size() , chunk_size); - ASSERT_EQ (p7.scratch_size(0), scratch_size); -} + int scratch_size = per_team_scratch + per_thread_scratch * team_size; + + policy_t p1( league_size, team_size ); + ASSERT_EQ ( p1.league_size(), league_size ); + ASSERT_EQ ( p1.team_size(), team_size ); + ASSERT_TRUE( p1.chunk_size() > 0 ); + ASSERT_EQ ( p1.scratch_size( 0 ), 0 ); + + policy_t p2 = p1.set_chunk_size( chunk_size ); + ASSERT_EQ ( p1.league_size(), league_size ); + ASSERT_EQ ( p1.team_size(), team_size ); + ASSERT_TRUE( p1.chunk_size() > 0 ); + ASSERT_EQ ( p1.scratch_size( 0 ), 0 ); + + ASSERT_EQ ( p2.league_size(), league_size ); + ASSERT_EQ ( p2.team_size(), team_size ); + ASSERT_EQ ( p2.chunk_size(), chunk_size ); + ASSERT_EQ ( p2.scratch_size( 0 ), 0 ); + + policy_t p3 = p2.set_scratch_size( 0, Kokkos::PerTeam( per_team_scratch ) ); + ASSERT_EQ ( p2.league_size(), league_size ); + ASSERT_EQ ( p2.team_size(), team_size ); + ASSERT_EQ ( p2.chunk_size(), chunk_size ); + ASSERT_EQ ( p2.scratch_size( 0 ), 0 ); + ASSERT_EQ ( p3.league_size(), league_size ); + ASSERT_EQ ( p3.team_size(), team_size ); + ASSERT_EQ ( p3.chunk_size(), chunk_size ); + ASSERT_EQ ( p3.scratch_size( 0 ), per_team_scratch ); + + policy_t p4 = p2.set_scratch_size( 0, Kokkos::PerThread( per_thread_scratch ) ); + ASSERT_EQ ( p2.league_size(), league_size ); + ASSERT_EQ ( p2.team_size(), team_size ); + ASSERT_EQ ( p2.chunk_size(), chunk_size ); + ASSERT_EQ ( p2.scratch_size( 0 ), 0 ); + ASSERT_EQ ( p4.league_size(), league_size ); + ASSERT_EQ ( p4.team_size(), team_size ); + ASSERT_EQ ( p4.chunk_size(), chunk_size ); + ASSERT_EQ ( p4.scratch_size( 0 ), per_thread_scratch * team_size ); + + policy_t p5 = p2.set_scratch_size( 0, Kokkos::PerThread( per_thread_scratch ), Kokkos::PerTeam( per_team_scratch ) ); + ASSERT_EQ ( p2.league_size(), league_size ); + ASSERT_EQ ( p2.team_size(), team_size ); + ASSERT_EQ ( p2.chunk_size(), chunk_size ); + ASSERT_EQ ( p2.scratch_size( 0 ), 0 ); + ASSERT_EQ ( p5.league_size(), league_size ); + ASSERT_EQ ( p5.team_size(), team_size ); + ASSERT_EQ ( p5.chunk_size(), chunk_size ); + ASSERT_EQ ( p5.scratch_size( 0 ), scratch_size ); + + policy_t p6 = p2.set_scratch_size( 0, Kokkos::PerTeam( per_team_scratch ), Kokkos::PerThread( per_thread_scratch ) ); + ASSERT_EQ ( p2.league_size(), league_size ); + ASSERT_EQ ( p2.team_size(), team_size ); + ASSERT_EQ ( p2.chunk_size(), chunk_size ); + ASSERT_EQ ( p2.scratch_size( 0 ), 0 ); + ASSERT_EQ ( p6.league_size(), league_size ); + ASSERT_EQ ( p6.team_size(), team_size ); + ASSERT_EQ ( p6.chunk_size(), chunk_size ); + ASSERT_EQ ( p6.scratch_size( 0 ), scratch_size ); + + policy_t p7 = p3.set_scratch_size( 0, Kokkos::PerTeam( per_team_scratch ), Kokkos::PerThread( per_thread_scratch ) ); + ASSERT_EQ ( p3.league_size(), league_size ); + ASSERT_EQ ( p3.team_size(), team_size ); + ASSERT_EQ ( p3.chunk_size(), chunk_size ); + ASSERT_EQ ( p3.scratch_size( 0 ), per_team_scratch ); + ASSERT_EQ ( p7.league_size(), league_size ); + ASSERT_EQ ( p7.team_size(), team_size ); + ASSERT_EQ ( p7.chunk_size(), chunk_size ); + ASSERT_EQ ( p7.scratch_size( 0 ), scratch_size ); + } + void test_run_time_parameters() { - test_run_time_parameters_type<Kokkos::TeamPolicy<ExecutionSpace> >(); - test_run_time_parameters_type<Kokkos::TeamPolicy<ExecutionSpace,Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long> > >(); - test_run_time_parameters_type<Kokkos::TeamPolicy<Kokkos::IndexType<long>, ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic> > >(); - test_run_time_parameters_type<Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,Kokkos::IndexType<long>,ExecutionSpace,SomeTag > >(); + test_run_time_parameters_type< Kokkos::TeamPolicy<ExecutionSpace> >(); + test_run_time_parameters_type< Kokkos::TeamPolicy<ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long> > >(); + test_run_time_parameters_type< Kokkos::TeamPolicy<Kokkos::IndexType<long>, ExecutionSpace, Kokkos::Schedule<Kokkos::Dynamic> > >(); + test_run_time_parameters_type< Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>, Kokkos::IndexType<long>, ExecutionSpace, SomeTag > >(); } }; diff --git a/lib/kokkos/core/unit_test/TestQthread.cpp b/lib/kokkos/core/unit_test/TestQthread.cpp deleted file mode 100644 index a465f39ca8ab428b72b68c103ec3989c92fb670f..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestQthread.cpp +++ /dev/null @@ -1,287 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include <gtest/gtest.h> - -#include <Kokkos_Core.hpp> -#include <Kokkos_Qthread.hpp> - -//---------------------------------------------------------------------------- - -#include <TestAtomic.hpp> - -#include <TestViewAPI.hpp> -#include <TestViewOfClass.hpp> - -#include <TestTeam.hpp> -#include <TestRange.hpp> -#include <TestReduce.hpp> -#include <TestScan.hpp> -#include <TestAggregate.hpp> -#include <TestCompilerMacros.hpp> -#include <TestTaskScheduler.hpp> -// #include <TestTeamVector.hpp> - -namespace Test { - -class qthread : public ::testing::Test { -protected: - static void SetUpTestCase() - { - const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - - int threads_count = std::max( 1u , numa_count ) - * std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); - Kokkos::Qthread::initialize( threads_count ); - Kokkos::Qthread::print_configuration( std::cout , true ); - } - - static void TearDownTestCase() - { - Kokkos::Qthread::finalize(); - } -}; - -TEST_F( qthread , compiler_macros ) -{ - ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Qthread >() ) ); -} - -TEST_F( qthread, view_impl) { - test_view_impl< Kokkos::Qthread >(); -} - -TEST_F( qthread, view_api) { - TestViewAPI< double , Kokkos::Qthread >(); -} - -TEST_F( qthread , view_nested_view ) -{ - ::Test::view_nested_view< Kokkos::Qthread >(); -} - -TEST_F( qthread , range_tag ) -{ - TestRange< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestRange< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestRange< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); -} - -TEST_F( qthread , team_tag ) -{ - TestTeamPolicy< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); - TestTeamPolicy< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); -} - -TEST_F( qthread, long_reduce) { - TestReduce< long , Kokkos::Qthread >( 1000000 ); -} - -TEST_F( qthread, double_reduce) { - TestReduce< double , Kokkos::Qthread >( 1000000 ); -} - -TEST_F( qthread, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::Qthread >( 1000000 ); -} - -TEST_F( qthread, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::Qthread >( 1000000 ); -} - -TEST_F( qthread, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::Qthread >( 1000000 ); -} - -TEST_F( qthread, team_long_reduce) { - TestReduceTeam< long , Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >( 1000000 ); -} - -TEST_F( qthread, team_double_reduce) { - TestReduceTeam< double , Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >( 1000000 ); -} - - -TEST_F( qthread , atomics ) -{ - const int loop_count = 1e4 ; - - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Qthread>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Qthread>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Qthread>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Qthread>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Qthread>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Qthread>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Qthread>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Qthread>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Qthread>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Qthread>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Qthread>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Qthread>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Qthread>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Qthread>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Qthread>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Qthread>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Qthread>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Qthread>(loop_count,3) ) ); - - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Qthread>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Qthread>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Qthread>(100,3) ) ); - -#if defined( KOKKOS_ENABLE_ASM ) - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Qthread>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Qthread>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Qthread>(100,3) ) ); -#endif - -} - -TEST_F( qthread , view_remap ) -{ - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::Qthread > output_type ; - - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Qthread > input_type ; - - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Qthread > diff_type ; - - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); - - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} - - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} -} - -//---------------------------------------------------------------------------- - -TEST_F( qthread , view_aggregate ) -{ - TestViewAggregate< Kokkos::Qthread >(); -} - -//---------------------------------------------------------------------------- - -TEST_F( qthread , scan ) -{ - TestScan< Kokkos::Qthread >::test_range( 1 , 1000 ); - TestScan< Kokkos::Qthread >( 1000000 ); - TestScan< Kokkos::Qthread >( 10000000 ); - Kokkos::Qthread::fence(); -} - -TEST_F( qthread, team_shared ) { - TestSharedTeam< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >(); -} - -TEST_F( qthread, shmem_size) { - TestShmemSize< Kokkos::Qthread >(); -} - -TEST_F( qthread , team_scan ) -{ - TestScanTeam< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::Qthread , Kokkos::Schedule<Kokkos::Static> >( 10000 ); -} - -#if 0 /* disable */ -TEST_F( qthread , team_vector ) -{ - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthread >(4) ) ); -} -#endif - -//---------------------------------------------------------------------------- - -TEST_F( qthread , task_policy ) -{ - TestTaskScheduler::test_task_dep< Kokkos::Qthread >( 10 ); - for ( long i = 0 ; i < 25 ; ++i ) TestTaskScheduler::test_fib< Kokkos::Qthread >(i); - for ( long i = 0 ; i < 35 ; ++i ) TestTaskScheduler::test_fib2< Kokkos::Qthread >(i); -} - -TEST_F( qthread , task_team ) -{ - TestTaskScheduler::test_task_team< Kokkos::Qthread >(1000); -} - -//---------------------------------------------------------------------------- - -} // namespace test - diff --git a/lib/kokkos/core/unit_test/TestRange.hpp b/lib/kokkos/core/unit_test/TestRange.hpp index e342e844c7665650732a38e49063abee626a4a8c..90411a57a0c9c871f946dd3a8b04b4af0554b380 100644 --- a/lib/kokkos/core/unit_test/TestRange.hpp +++ b/lib/kokkos/core/unit_test/TestRange.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,198 +45,204 @@ #include <Kokkos_Core.hpp> -/*--------------------------------------------------------------------------*/ - namespace Test { + namespace { template< class ExecSpace, class ScheduleType > struct TestRange { + typedef int value_type; ///< typedef required for the parallel_reduce - typedef int value_type ; ///< typedef required for the parallel_reduce - - typedef Kokkos::View<int*,ExecSpace> view_type ; + typedef Kokkos::View< int*, ExecSpace > view_type; - view_type m_flags ; + view_type m_flags; struct VerifyInitTag {}; struct ResetTag {}; struct VerifyResetTag {}; TestRange( const size_t N ) - : m_flags( Kokkos::ViewAllocateWithoutInitializing("flags"), N ) + : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), N ) {} static void test_for( const size_t N ) - { - TestRange functor(N); + { + TestRange functor( N ); - typename view_type::HostMirror host_flags = Kokkos::create_mirror_view( functor.m_flags ); + typename view_type::HostMirror host_flags = Kokkos::create_mirror_view( functor.m_flags ); - Kokkos::parallel_for( Kokkos::RangePolicy<ExecSpace,ScheduleType>(0,N) , functor ); - Kokkos::parallel_for( Kokkos::RangePolicy<ExecSpace,ScheduleType,VerifyInitTag>(0,N) , functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyInitTag >( 0, N ), functor ); - Kokkos::deep_copy( host_flags , functor.m_flags ); + Kokkos::deep_copy( host_flags, functor.m_flags ); - size_t error_count = 0 ; - for ( size_t i = 0 ; i < N ; ++i ) { - if ( int(i) != host_flags(i) ) ++error_count ; - } - ASSERT_EQ( error_count , size_t(0) ); + size_t error_count = 0; + for ( size_t i = 0; i < N; ++i ) { + if ( int( i ) != host_flags( i ) ) ++error_count; + } + ASSERT_EQ( error_count, size_t( 0 ) ); - Kokkos::parallel_for( Kokkos::RangePolicy<ExecSpace,ScheduleType,ResetTag>(0,N) , functor ); - Kokkos::parallel_for( std::string("TestKernelFor") , Kokkos::RangePolicy<ExecSpace,ScheduleType,VerifyResetTag>(0,N) , functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, ResetTag >( 0, N ), functor ); + Kokkos::parallel_for( std::string( "TestKernelFor" ), Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyResetTag >( 0, N ), functor ); - Kokkos::deep_copy( host_flags , functor.m_flags ); + Kokkos::deep_copy( host_flags, functor.m_flags ); - error_count = 0 ; - for ( size_t i = 0 ; i < N ; ++i ) { - if ( int(2*i) != host_flags(i) ) ++error_count ; - } - ASSERT_EQ( error_count , size_t(0) ); + error_count = 0; + for ( size_t i = 0; i < N; ++i ) { + if ( int( 2 * i ) != host_flags( i ) ) ++error_count; } + ASSERT_EQ( error_count, size_t( 0 ) ); + } KOKKOS_INLINE_FUNCTION void operator()( const int i ) const - { m_flags(i) = i ; } + { m_flags( i ) = i; } KOKKOS_INLINE_FUNCTION - void operator()( const VerifyInitTag & , const int i ) const - { if ( i != m_flags(i) ) { printf("TestRange::test_for error at %d != %d\n",i,m_flags(i)); } } + void operator()( const VerifyInitTag &, const int i ) const + { + if ( i != m_flags( i ) ) { + printf( "TestRange::test_for error at %d != %d\n", i, m_flags( i ) ); + } + } KOKKOS_INLINE_FUNCTION - void operator()( const ResetTag & , const int i ) const - { m_flags(i) = 2 * m_flags(i); } + void operator()( const ResetTag &, const int i ) const + { m_flags( i ) = 2 * m_flags( i ); } KOKKOS_INLINE_FUNCTION - void operator()( const VerifyResetTag & , const int i ) const - { if ( 2 * i != m_flags(i) ) { printf("TestRange::test_for error at %d != %d\n",i,m_flags(i)); } } + void operator()( const VerifyResetTag &, const int i ) const + { + if ( 2 * i != m_flags( i ) ) + { + printf( "TestRange::test_for error at %d != %d\n", i, m_flags( i ) ); + } + } //---------------------------------------- struct OffsetTag {}; static void test_reduce( const size_t N ) - { - TestRange functor(N); - int total = 0 ; + { + TestRange functor( N ); + int total = 0; - Kokkos::parallel_for( Kokkos::RangePolicy<ExecSpace,ScheduleType>(0,N) , functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor ); - Kokkos::parallel_reduce( "TestKernelReduce" , Kokkos::RangePolicy<ExecSpace,ScheduleType>(0,N) , functor , total ); - // sum( 0 .. N-1 ) - ASSERT_EQ( size_t((N-1)*(N)/2) , size_t(total) ); + Kokkos::parallel_reduce( "TestKernelReduce", Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor, total ); + // sum( 0 .. N-1 ) + ASSERT_EQ( size_t( ( N - 1 ) * ( N ) / 2 ), size_t( total ) ); - Kokkos::parallel_reduce( Kokkos::RangePolicy<ExecSpace,ScheduleType,OffsetTag>(0,N) , functor , total ); - // sum( 1 .. N ) - ASSERT_EQ( size_t((N)*(N+1)/2) , size_t(total) ); - } + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), functor, total ); + // sum( 1 .. N ) + ASSERT_EQ( size_t( ( N ) * ( N + 1 ) / 2 ), size_t( total ) ); + } KOKKOS_INLINE_FUNCTION - void operator()( const int i , value_type & update ) const - { update += m_flags(i); } + void operator()( const int i, value_type & update ) const + { update += m_flags( i ); } KOKKOS_INLINE_FUNCTION - void operator()( const OffsetTag & , const int i , value_type & update ) const - { update += 1 + m_flags(i); } + void operator()( const OffsetTag &, const int i, value_type & update ) const + { update += 1 + m_flags( i ); } //---------------------------------------- static void test_scan( const size_t N ) - { - TestRange functor(N); + { + TestRange functor( N ); - Kokkos::parallel_for( Kokkos::RangePolicy<ExecSpace,ScheduleType>(0,N) , functor ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), functor ); - Kokkos::parallel_scan( "TestKernelScan" , Kokkos::RangePolicy<ExecSpace,ScheduleType,OffsetTag>(0,N) , functor ); - } + Kokkos::parallel_scan( "TestKernelScan", Kokkos::RangePolicy< ExecSpace, ScheduleType, OffsetTag>( 0, N ), functor ); + } KOKKOS_INLINE_FUNCTION - void operator()( const OffsetTag & , const int i , value_type & update , bool final ) const - { - update += m_flags(i); + void operator()( const OffsetTag &, const int i, value_type & update, bool final ) const + { + update += m_flags( i ); - if ( final ) { - if ( update != (i*(i+1))/2 ) { - printf("TestRange::test_scan error %d : %d != %d\n",i,(i*(i+1))/2,m_flags(i)); - } + if ( final ) { + if ( update != ( i * ( i + 1 ) ) / 2 ) { + printf( "TestRange::test_scan error %d : %d != %d\n", i, ( i * ( i + 1 ) ) / 2, m_flags( i ) ); } } + } - static void test_dynamic_policy( const size_t N ) { - - - typedef Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> > policy_t; + static void test_dynamic_policy( const size_t N ) + { + typedef Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> > policy_t; { - Kokkos::View<size_t*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > count("Count",ExecSpace::concurrency()); - Kokkos::View<int*,ExecSpace> a("A",N); - - Kokkos::parallel_for( policy_t(0,N), - KOKKOS_LAMBDA (const typename policy_t::member_type& i) { - for(int k=0; k<(i<N/2?1:10000); k++ ) - a(i)++; - count(ExecSpace::hardware_thread_id())++; + Kokkos::View< size_t*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > count( "Count", ExecSpace::concurrency() ); + Kokkos::View< int*, ExecSpace > a( "A", N ); + + Kokkos::parallel_for( policy_t( 0, N ), KOKKOS_LAMBDA ( const typename policy_t::member_type& i ) { + for ( int k = 0; k < ( i < N / 2 ? 1 : 10000 ); k++ ) { + a( i )++; + } + count( ExecSpace::hardware_thread_id() )++; }); int error = 0; - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N), KOKKOS_LAMBDA(const typename policy_t::member_type& i, int& lsum) { - lsum += ( a(i)!= (i<N/2?1:10000) ); - },error); - ASSERT_EQ(error,0); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), KOKKOS_LAMBDA( const typename policy_t::member_type & i, int & lsum ) { + lsum += ( a( i ) != ( i < N / 2 ? 1 : 10000 ) ); + }, error ); + ASSERT_EQ( error, 0 ); - if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<size_t>(4*ExecSpace::concurrency())) ) { + if ( ( ExecSpace::concurrency() > (int) 1 ) && ( N > static_cast<size_t>( 4 * ExecSpace::concurrency() ) ) ) { size_t min = N; size_t max = 0; - for(int t=0; t<ExecSpace::concurrency(); t++) { - if(count(t)<min) min = count(t); - if(count(t)>max) max = count(t); + for ( int t = 0; t < ExecSpace::concurrency(); t++ ) { + if ( count( t ) < min ) min = count( t ); + if ( count( t ) > max ) max = count( t ); } - ASSERT_TRUE(min<max); - //if(ExecSpace::concurrency()>2) - // ASSERT_TRUE(2*min<max); + ASSERT_TRUE( min < max ); + + //if ( ExecSpace::concurrency() > 2 ) { + // ASSERT_TRUE( 2 * min < max ); + //} } - } { - Kokkos::View<size_t*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > count("Count",ExecSpace::concurrency()); - Kokkos::View<int*,ExecSpace> a("A",N); + Kokkos::View< size_t*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > count( "Count", ExecSpace::concurrency() ); + Kokkos::View< int*, ExecSpace> a( "A", N ); int sum = 0; - Kokkos::parallel_reduce( policy_t(0,N), - KOKKOS_LAMBDA (const typename policy_t::member_type& i, int& lsum) { - for(int k=0; k<(i<N/2?1:10000); k++ ) - a(i)++; - count(ExecSpace::hardware_thread_id())++; + Kokkos::parallel_reduce( policy_t( 0, N ), KOKKOS_LAMBDA( const typename policy_t::member_type & i, int & lsum ) { + for ( int k = 0; k < ( i < N / 2 ? 1 : 10000 ); k++ ) { + a( i )++; + } + count( ExecSpace::hardware_thread_id() )++; lsum++; - },sum); - ASSERT_EQ(sum,N); + }, sum ); + ASSERT_EQ( sum, N ); int error = 0; - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N), KOKKOS_LAMBDA(const typename policy_t::member_type& i, int& lsum) { - lsum += ( a(i)!= (i<N/2?1:10000) ); - },error); - ASSERT_EQ(error,0); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), KOKKOS_LAMBDA( const typename policy_t::member_type & i, int & lsum ) { + lsum += ( a( i ) != ( i < N / 2 ? 1 : 10000 ) ); + }, error ); + ASSERT_EQ( error, 0 ); - if( ( ExecSpace::concurrency()>(int)1) && (N>static_cast<size_t>(4*ExecSpace::concurrency())) ) { + if ( ( ExecSpace::concurrency() > (int) 1 ) && ( N > static_cast<size_t>( 4 * ExecSpace::concurrency() ) ) ) { size_t min = N; size_t max = 0; - for(int t=0; t<ExecSpace::concurrency(); t++) { - if(count(t)<min) min = count(t); - if(count(t)>max) max = count(t); + for ( int t = 0; t < ExecSpace::concurrency(); t++ ) { + if ( count( t ) < min ) min = count( t ); + if ( count( t ) > max ) max = count( t ); } - ASSERT_TRUE(min<max); - //if(ExecSpace::concurrency()>2) - // ASSERT_TRUE(2*min<max); + ASSERT_TRUE( min < max ); + + //if ( ExecSpace::concurrency() > 2 ) { + // ASSERT_TRUE( 2 * min < max ); + //} } } - } }; -} /* namespace */ -} /* namespace Test */ - -/*--------------------------------------------------------------------------*/ +} // namespace +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestReduce.hpp b/lib/kokkos/core/unit_test/TestReduce.hpp index 645fc9e31b3b1cf86d06779304343cc93cc2242a..7e77dadf6249fe3eaa763c0c9848b93965379e7e 100644 --- a/lib/kokkos/core/unit_test/TestReduce.hpp +++ b/lib/kokkos/core/unit_test/TestReduce.hpp @@ -48,24 +48,23 @@ #include <Kokkos_Core.hpp> -/*--------------------------------------------------------------------------*/ - namespace Test { -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class ReduceFunctor { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; struct value_type { - ScalarType value[3] ; + ScalarType value[3]; }; - const size_type nwork ; + const size_type nwork; - ReduceFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} + ReduceFunctor( const size_type & arg_nwork ) + : nwork( arg_nwork ) {} ReduceFunctor( const ReduceFunctor & rhs ) : nwork( rhs.nwork ) {} @@ -74,66 +73,63 @@ public: KOKKOS_INLINE_FUNCTION void init( value_type & dst ) const { - dst.value[0] = 0 ; - dst.value[1] = 0 ; - dst.value[2] = 0 ; + dst.value[0] = 0; + dst.value[1] = 0; + dst.value[2] = 0; } */ KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dst , + void join( volatile value_type & dst, const volatile value_type & src ) const { - dst.value[0] += src.value[0] ; - dst.value[1] += src.value[1] ; - dst.value[2] += src.value[2] ; + dst.value[0] += src.value[0]; + dst.value[1] += src.value[1]; + dst.value[2] += src.value[2]; } KOKKOS_INLINE_FUNCTION - void operator()( size_type iwork , value_type & dst ) const + void operator()( size_type iwork, value_type & dst ) const { - dst.value[0] += 1 ; - dst.value[1] += iwork + 1 ; - dst.value[2] += nwork - iwork ; + dst.value[0] += 1; + dst.value[1] += iwork + 1; + dst.value[2] += nwork - iwork; } }; template< class DeviceType > -class ReduceFunctorFinal : public ReduceFunctor< long , DeviceType > { +class ReduceFunctorFinal : public ReduceFunctor< long, DeviceType > { public: - - typedef typename ReduceFunctor< long , DeviceType >::value_type value_type ; + typedef typename ReduceFunctor< long, DeviceType >::value_type value_type; ReduceFunctorFinal( const size_t n ) - : ReduceFunctor<long,DeviceType>(n) - {} + : ReduceFunctor< long, DeviceType >( n ) {} KOKKOS_INLINE_FUNCTION void final( value_type & dst ) const { - dst.value[0] = - dst.value[0] ; - dst.value[1] = - dst.value[1] ; - dst.value[2] = - dst.value[2] ; + dst.value[0] = -dst.value[0]; + dst.value[1] = -dst.value[1]; + dst.value[2] = -dst.value[2]; } }; -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class RuntimeReduceFunctor { public: // Required for functor: - typedef DeviceType execution_space ; - typedef ScalarType value_type[] ; - const unsigned value_count ; - + typedef DeviceType execution_space; + typedef ScalarType value_type[]; + const unsigned value_count; // Unit test details: - typedef typename execution_space::size_type size_type ; + typedef typename execution_space::size_type size_type; - const size_type nwork ; + const size_type nwork; - RuntimeReduceFunctor( const size_type arg_nwork , + RuntimeReduceFunctor( const size_type arg_nwork, const size_type arg_count ) : value_count( arg_count ) , nwork( arg_nwork ) {} @@ -141,247 +137,251 @@ public: KOKKOS_INLINE_FUNCTION void init( ScalarType dst[] ) const { - for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] = 0 ; + for ( unsigned i = 0; i < value_count; ++i ) dst[i] = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile ScalarType dst[] , + void join( volatile ScalarType dst[], const volatile ScalarType src[] ) const { - for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] += src[i] ; + for ( unsigned i = 0; i < value_count; ++i ) dst[i] += src[i]; } KOKKOS_INLINE_FUNCTION - void operator()( size_type iwork , ScalarType dst[] ) const + void operator()( size_type iwork, ScalarType dst[] ) const { - const size_type tmp[3] = { 1 , iwork + 1 , nwork - iwork }; + const size_type tmp[3] = { 1, iwork + 1, nwork - iwork }; - for ( size_type i = 0 ; i < value_count ; ++i ) { + for ( size_type i = 0; i < value_count; ++i ) { dst[i] += tmp[ i % 3 ]; } } }; -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class RuntimeReduceMinMax { public: // Required for functor: - typedef DeviceType execution_space ; - typedef ScalarType value_type[] ; - const unsigned value_count ; + typedef DeviceType execution_space; + typedef ScalarType value_type[]; + const unsigned value_count; // Unit test details: - typedef typename execution_space::size_type size_type ; + typedef typename execution_space::size_type size_type; - const size_type nwork ; - const ScalarType amin ; - const ScalarType amax ; + const size_type nwork; + const ScalarType amin; + const ScalarType amax; - RuntimeReduceMinMax( const size_type arg_nwork , + RuntimeReduceMinMax( const size_type arg_nwork, const size_type arg_count ) : value_count( arg_count ) , nwork( arg_nwork ) - , amin( std::numeric_limits<ScalarType>::min() ) - , amax( std::numeric_limits<ScalarType>::max() ) + , amin( std::numeric_limits< ScalarType >::min() ) + , amax( std::numeric_limits< ScalarType >::max() ) {} KOKKOS_INLINE_FUNCTION void init( ScalarType dst[] ) const { - for ( unsigned i = 0 ; i < value_count ; ++i ) { - dst[i] = i % 2 ? amax : amin ; + for ( unsigned i = 0; i < value_count; ++i ) { + dst[i] = i % 2 ? amax : amin; } } KOKKOS_INLINE_FUNCTION - void join( volatile ScalarType dst[] , + void join( volatile ScalarType dst[], const volatile ScalarType src[] ) const { - for ( unsigned i = 0 ; i < value_count ; ++i ) { + for ( unsigned i = 0; i < value_count; ++i ) { dst[i] = i % 2 ? ( dst[i] < src[i] ? dst[i] : src[i] ) // min : ( dst[i] > src[i] ? dst[i] : src[i] ); // max } } KOKKOS_INLINE_FUNCTION - void operator()( size_type iwork , ScalarType dst[] ) const + void operator()( size_type iwork, ScalarType dst[] ) const { - const ScalarType tmp[2] = { ScalarType(iwork + 1) - , ScalarType(nwork - iwork) }; + const ScalarType tmp[2] = { ScalarType( iwork + 1 ) + , ScalarType( nwork - iwork ) }; - for ( size_type i = 0 ; i < value_count ; ++i ) { - dst[i] = i % 2 ? ( dst[i] < tmp[i%2] ? dst[i] : tmp[i%2] ) - : ( dst[i] > tmp[i%2] ? dst[i] : tmp[i%2] ); + for ( size_type i = 0; i < value_count; ++i ) { + dst[i] = i % 2 ? ( dst[i] < tmp[i % 2] ? dst[i] : tmp[i % 2] ) + : ( dst[i] > tmp[i % 2] ? dst[i] : tmp[i % 2] ); } } }; template< class DeviceType > -class RuntimeReduceFunctorFinal : public RuntimeReduceFunctor< long , DeviceType > { +class RuntimeReduceFunctorFinal : public RuntimeReduceFunctor< long, DeviceType > { public: + typedef RuntimeReduceFunctor< long, DeviceType > base_type; + typedef typename base_type::value_type value_type; + typedef long scalar_type; - typedef RuntimeReduceFunctor< long , DeviceType > base_type ; - typedef typename base_type::value_type value_type ; - typedef long scalar_type ; - - RuntimeReduceFunctorFinal( const size_t theNwork , const size_t count ) : base_type(theNwork,count) {} + RuntimeReduceFunctorFinal( const size_t theNwork, const size_t count ) + : base_type( theNwork, count ) {} KOKKOS_INLINE_FUNCTION void final( value_type dst ) const { - for ( unsigned i = 0 ; i < base_type::value_count ; ++i ) { - dst[i] = - dst[i] ; + for ( unsigned i = 0; i < base_type::value_count; ++i ) { + dst[i] = -dst[i]; } } }; + } // namespace Test namespace { -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class TestReduce { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; - - //------------------------------------ + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; TestReduce( const size_type & nwork ) { - run_test(nwork); - run_test_final(nwork); + run_test( nwork ); + run_test_final( nwork ); } void run_test( const size_type & nwork ) { - typedef Test::ReduceFunctor< ScalarType , execution_space > functor_type ; - typedef typename functor_type::value_type value_type ; + typedef Test::ReduceFunctor< ScalarType, execution_space > functor_type; + typedef typename functor_type::value_type value_type; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - Kokkos::parallel_reduce( nwork , functor_type(nwork) , result[i] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + Kokkos::parallel_reduce( nwork, functor_type( nwork ), result[i] ); } - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( (ScalarType) correct , result[i].value[j] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( (ScalarType) correct, result[i].value[j] ); } } } void run_test_final( const size_type & nwork ) { - typedef Test::ReduceFunctorFinal< execution_space > functor_type ; - typedef typename functor_type::value_type value_type ; + typedef Test::ReduceFunctorFinal< execution_space > functor_type; + typedef typename functor_type::value_type value_type; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - if(i%2==0) - Kokkos::parallel_reduce( nwork , functor_type(nwork) , result[i] ); - else - Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork) , result[i] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + if ( i % 2 == 0 ) { + Kokkos::parallel_reduce( nwork, functor_type( nwork ), result[i] ); + } + else { + Kokkos::parallel_reduce( "Reduce", nwork, functor_type( nwork ), result[i] ); + } } - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( (ScalarType) correct , - result[i].value[j] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( (ScalarType) correct, -result[i].value[j] ); } } } }; -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class TestReduceDynamic { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; - - //------------------------------------ + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; TestReduceDynamic( const size_type nwork ) { - run_test_dynamic(nwork); - run_test_dynamic_minmax(nwork); - run_test_dynamic_final(nwork); + run_test_dynamic( nwork ); + run_test_dynamic_minmax( nwork ); + run_test_dynamic_final( nwork ); } void run_test_dynamic( const size_type nwork ) { - typedef Test::RuntimeReduceFunctor< ScalarType , execution_space > functor_type ; + typedef Test::RuntimeReduceFunctor< ScalarType, execution_space > functor_type; enum { Count = 3 }; enum { Repeat = 100 }; - ScalarType result[ Repeat ][ Count ] ; + ScalarType result[ Repeat ][ Count ]; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - if(i%2==0) - Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); - else - Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork,Count) , result[i] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + if ( i % 2 == 0 ) { + Kokkos::parallel_reduce( nwork, functor_type( nwork, Count ), result[i] ); + } + else { + Kokkos::parallel_reduce( "Reduce", nwork, functor_type( nwork, Count ), result[i] ); + } } - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( (ScalarType) correct , result[i][j] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( (ScalarType) correct, result[i][j] ); } } } void run_test_dynamic_minmax( const size_type nwork ) { - typedef Test::RuntimeReduceMinMax< ScalarType , execution_space > functor_type ; + typedef Test::RuntimeReduceMinMax< ScalarType, execution_space > functor_type; enum { Count = 2 }; enum { Repeat = 100 }; - ScalarType result[ Repeat ][ Count ] ; + ScalarType result[ Repeat ][ Count ]; - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - if(i%2==0) - Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); - else - Kokkos::parallel_reduce( "Reduce", nwork , functor_type(nwork,Count) , result[i] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + if ( i % 2 == 0 ) { + Kokkos::parallel_reduce( nwork, functor_type( nwork, Count ), result[i] ); + } + else { + Kokkos::parallel_reduce( "Reduce", nwork, functor_type( nwork, Count ), result[i] ); + } } - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { if ( nwork == 0 ) { - ScalarType amin( std::numeric_limits<ScalarType>::min() ); - ScalarType amax( std::numeric_limits<ScalarType>::max() ); - const ScalarType correct = (j%2) ? amax : amin; - ASSERT_EQ( (ScalarType) correct , result[i][j] ); - } else { - const unsigned long correct = j % 2 ? 1 : nwork ; - ASSERT_EQ( (ScalarType) correct , result[i][j] ); + ScalarType amin( std::numeric_limits< ScalarType >::min() ); + ScalarType amax( std::numeric_limits< ScalarType >::max() ); + const ScalarType correct = ( j % 2 ) ? amax : amin; + ASSERT_EQ( (ScalarType) correct, result[i][j] ); + } + else { + const unsigned long correct = j % 2 ? 1 : nwork; + ASSERT_EQ( (ScalarType) correct, result[i][j] ); } } } @@ -389,169 +389,172 @@ public: void run_test_dynamic_final( const size_type nwork ) { - typedef Test::RuntimeReduceFunctorFinal< execution_space > functor_type ; + typedef Test::RuntimeReduceFunctorFinal< execution_space > functor_type; enum { Count = 3 }; enum { Repeat = 100 }; - typename functor_type::scalar_type result[ Repeat ][ Count ] ; + typename functor_type::scalar_type result[ Repeat ][ Count ]; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - if(i%2==0) - Kokkos::parallel_reduce( nwork , functor_type(nwork,Count) , result[i] ); - else - Kokkos::parallel_reduce( "TestKernelReduce" , nwork , functor_type(nwork,Count) , result[i] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + if ( i % 2 == 0 ) { + Kokkos::parallel_reduce( nwork, functor_type( nwork, Count ), result[i] ); + } + else { + Kokkos::parallel_reduce( "TestKernelReduce", nwork, functor_type( nwork, Count ), result[i] ); + } } - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( (ScalarType) correct , - result[i][j] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( (ScalarType) correct, -result[i][j] ); } } } }; -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class TestReduceDynamicView { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; - - //------------------------------------ + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; TestReduceDynamicView( const size_type nwork ) { - run_test_dynamic_view(nwork); + run_test_dynamic_view( nwork ); } void run_test_dynamic_view( const size_type nwork ) { - typedef Test::RuntimeReduceFunctor< ScalarType , execution_space > functor_type ; + typedef Test::RuntimeReduceFunctor< ScalarType, execution_space > functor_type; - typedef Kokkos::View< ScalarType* , DeviceType > result_type ; - typedef typename result_type::HostMirror result_host_type ; + typedef Kokkos::View< ScalarType*, DeviceType > result_type; + typedef typename result_type::HostMirror result_host_type; - const unsigned CountLimit = 23 ; + const unsigned CountLimit = 23; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - for ( unsigned count = 0 ; count < CountLimit ; ++count ) { + for ( unsigned count = 0; count < CountLimit; ++count ) { - result_type result("result",count); + result_type result( "result", count ); result_host_type host_result = Kokkos::create_mirror( result ); // Test result to host pointer: - std::string str("TestKernelReduce"); - if(count%2==0) - Kokkos::parallel_reduce( nw , functor_type(nw,count) , host_result.ptr_on_device() ); - else - Kokkos::parallel_reduce( str , nw , functor_type(nw,count) , host_result.ptr_on_device() ); + std::string str( "TestKernelReduce" ); + if ( count % 2 == 0 ) { + Kokkos::parallel_reduce( nw, functor_type( nw, count ), host_result.ptr_on_device() ); + } + else { + Kokkos::parallel_reduce( str, nw, functor_type( nw, count ), host_result.ptr_on_device() ); + } - for ( unsigned j = 0 ; j < count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( host_result(j), (ScalarType) correct ); - host_result(j) = 0 ; + for ( unsigned j = 0; j < count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( host_result( j ), (ScalarType) correct ); + host_result( j ) = 0; } } } }; -} + +} // namespace // Computes y^T*A*x -// (modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) +// ( modified from kokkos-tutorials/GTC2016/Exercises/ThreeLevelPar ) #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class TestTripleNestedReduce { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; - //------------------------------------ - - TestTripleNestedReduce( const size_type & nrows , const size_type & ncols - , const size_type & team_size , const size_type & vector_length ) + TestTripleNestedReduce( const size_type & nrows, const size_type & ncols + , const size_type & team_size, const size_type & vector_length ) { - run_test( nrows , ncols , team_size, vector_length ); + run_test( nrows, ncols, team_size, vector_length ); } - void run_test( const size_type & nrows , const size_type & ncols + void run_test( const size_type & nrows, const size_type & ncols , const size_type & team_size, const size_type & vector_length ) { //typedef Kokkos::LayoutLeft Layout; typedef Kokkos::LayoutRight Layout; - typedef Kokkos::View<ScalarType* , DeviceType> ViewVector; - typedef Kokkos::View<ScalarType** , Layout , DeviceType> ViewMatrix; - ViewVector y( "y" , nrows ); - ViewVector x( "x" , ncols ); - ViewMatrix A( "A" , nrows , ncols ); + typedef Kokkos::View< ScalarType*, DeviceType > ViewVector; + typedef Kokkos::View< ScalarType**, Layout, DeviceType > ViewMatrix; + + ViewVector y( "y", nrows ); + ViewVector x( "x", ncols ); + ViewMatrix A( "A", nrows, ncols ); typedef Kokkos::RangePolicy<DeviceType> range_policy; - // Initialize y vector - Kokkos::parallel_for( range_policy( 0 , nrows ) , KOKKOS_LAMBDA( const int i ) { y( i ) = 1; } ); + // Initialize y vector. + Kokkos::parallel_for( range_policy( 0, nrows ), KOKKOS_LAMBDA ( const int i ) { y( i ) = 1; } ); - // Initialize x vector - Kokkos::parallel_for( range_policy( 0 , ncols ) , KOKKOS_LAMBDA( const int i ) { x( i ) = 1; } ); + // Initialize x vector. + Kokkos::parallel_for( range_policy( 0, ncols ), KOKKOS_LAMBDA ( const int i ) { x( i ) = 1; } ); - typedef Kokkos::TeamPolicy<DeviceType> team_policy; - typedef typename Kokkos::TeamPolicy<DeviceType>::member_type member_type; + typedef Kokkos::TeamPolicy< DeviceType > team_policy; + typedef typename Kokkos::TeamPolicy< DeviceType >::member_type member_type; - // Initialize A matrix, note 2D indexing computation - Kokkos::parallel_for( team_policy( nrows , Kokkos::AUTO ) , KOKKOS_LAMBDA( const member_type& teamMember ) { + // Initialize A matrix, note 2D indexing computation. + Kokkos::parallel_for( team_policy( nrows, Kokkos::AUTO ), KOKKOS_LAMBDA ( const member_type & teamMember ) { const int j = teamMember.league_rank(); - Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember , ncols ) , [&] ( const int i ) { - A( j , i ) = 1; + Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, ncols ), [&] ( const int i ) { + A( j, i ) = 1; } ); } ); - // Three level parallelism kernel to force caching of vector x + // Three level parallelism kernel to force caching of vector x. ScalarType result = 0.0; int chunk_size = 128; - Kokkos::parallel_reduce( team_policy( nrows/chunk_size , team_size , vector_length ) , KOKKOS_LAMBDA ( const member_type& teamMember , double &update ) { + Kokkos::parallel_reduce( team_policy( nrows / chunk_size, team_size, vector_length ), + KOKKOS_LAMBDA ( const member_type & teamMember, double & update ) { const int row_start = teamMember.league_rank() * chunk_size; const int row_end = row_start + chunk_size; - Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember , row_start , row_end ) , [&] ( const int i ) { + Kokkos::parallel_for( Kokkos::TeamThreadRange( teamMember, row_start, row_end ), [&] ( const int i ) { ScalarType sum_i = 0.0; - Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( teamMember , ncols ) , [&] ( const int j , ScalarType &innerUpdate ) { - innerUpdate += A( i , j ) * x( j ); - } , sum_i ); - Kokkos::single( Kokkos::PerThread( teamMember ) , [&] () { + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( teamMember, ncols ), [&] ( const int j, ScalarType &innerUpdate ) { + innerUpdate += A( i, j ) * x( j ); + }, sum_i ); + Kokkos::single( Kokkos::PerThread( teamMember ), [&] () { update += y( i ) * sum_i; } ); } ); - } , result ); + }, result ); - const ScalarType solution= ( ScalarType ) nrows * ( ScalarType ) ncols; - ASSERT_EQ( solution , result ); + const ScalarType solution = (ScalarType) nrows * (ScalarType) ncols; + ASSERT_EQ( solution, result ); } }; -#else /* #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) */ +#else // #if ( ! defined( KOKKOS_ENABLE_CUDA ) ) || defined( KOKKOS_ENABLE_CUDA_LAMBDA ) -template< typename ScalarType , class DeviceType > +template< typename ScalarType, class DeviceType > class TestTripleNestedReduce { public: - typedef DeviceType execution_space ; - typedef typename execution_space::size_type size_type ; + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; - TestTripleNestedReduce( const size_type & , const size_type - , const size_type & , const size_type ) - { } + TestTripleNestedReduce( const size_type &, const size_type + , const size_type &, const size_type ) + {} }; #endif @@ -559,38 +562,38 @@ public: //-------------------------------------------------------------------------- namespace Test { + namespace ReduceCombinatorical { -template<class Scalar,class Space = Kokkos::HostSpace> +template< class Scalar, class Space = Kokkos::HostSpace > struct AddPlus { public: - //Required + // Required. typedef AddPlus reducer_type; typedef Scalar value_type; - typedef Kokkos::View<value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; + typedef Kokkos::View< value_type, Space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type; private: result_view_type result; public: + AddPlus( value_type & result_ ) : result( &result_ ) {} - AddPlus(value_type& result_):result(&result_) {} - - //Required + // Required. KOKKOS_INLINE_FUNCTION - void join(value_type& dest, const value_type& src) const { + void join( value_type & dest, const value_type & src ) const { dest += src + 1; } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& dest, const volatile value_type& src) const { + void join( volatile value_type & dest, const volatile value_type & src ) const { dest += src + 1; } - //Optional + // Optional. KOKKOS_INLINE_FUNCTION - void init( value_type& val) const { + void init( value_type & val ) const { val = value_type(); } @@ -599,624 +602,651 @@ public: } }; -template<int ISTEAM> +template< int ISTEAM > struct FunctorScalar; template<> -struct FunctorScalar<0>{ - FunctorScalar(Kokkos::View<double> r):result(r) {} - Kokkos::View<double> result; +struct FunctorScalar< 0 > { + Kokkos::View< double > result; + + FunctorScalar( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i,double& update) const { - update+=i; + void operator()( const int & i, double & update ) const { + update += i; } }; template<> -struct FunctorScalar<1>{ - FunctorScalar(Kokkos::View<double> r):result(r) {} - Kokkos::View<double> result; - +struct FunctorScalar< 1 > { typedef Kokkos::TeamPolicy<>::member_type team_type; + + Kokkos::View< double > result; + + FunctorScalar( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } }; -template<int ISTEAM> +template< int ISTEAM > struct FunctorScalarInit; template<> -struct FunctorScalarInit<0> { - FunctorScalarInit(Kokkos::View<double> r):result(r) {} +struct FunctorScalarInit< 0 > { + Kokkos::View< double > result; - Kokkos::View<double> result; + FunctorScalarInit( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; template<> -struct FunctorScalarInit<1> { - FunctorScalarInit(Kokkos::View<double> r):result(r) {} +struct FunctorScalarInit< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; - Kokkos::View<double> result; + Kokkos::View< double > result; + + FunctorScalarInit( Kokkos::View< double > r ) : result( r ) {} - typedef Kokkos::TeamPolicy<>::member_type team_type; KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; -template<int ISTEAM> +template< int ISTEAM > struct FunctorScalarFinal; - template<> -struct FunctorScalarFinal<0> { - FunctorScalarFinal(Kokkos::View<double> r):result(r) {} - +struct FunctorScalarFinal< 0 > { Kokkos::View<double> result; + + FunctorScalarFinal( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } }; template<> -struct FunctorScalarFinal<1> { - FunctorScalarFinal(Kokkos::View<double> r):result(r) {} +struct FunctorScalarFinal< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; - Kokkos::View<double> result; + Kokkos::View< double > result; - typedef Kokkos::TeamPolicy<>::member_type team_type; + FunctorScalarFinal( Kokkos::View< double > r ) : result( r ) {} KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team, double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } + KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } }; -template<int ISTEAM> +template< int ISTEAM > struct FunctorScalarJoin; template<> -struct FunctorScalarJoin<0> { - FunctorScalarJoin(Kokkos::View<double> r):result(r) {} - +struct FunctorScalarJoin< 0 > { Kokkos::View<double> result; + + FunctorScalarJoin( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } }; template<> -struct FunctorScalarJoin<1> { - FunctorScalarJoin(Kokkos::View<double> r):result(r) {} +struct FunctorScalarJoin< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; - Kokkos::View<double> result; + Kokkos::View< double > result; + + FunctorScalarJoin( Kokkos::View< double > r ) : result( r ) {} - typedef Kokkos::TeamPolicy<>::member_type team_type; KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } }; -template<int ISTEAM> +template< int ISTEAM > struct FunctorScalarJoinFinal; template<> -struct FunctorScalarJoinFinal<0> { - FunctorScalarJoinFinal(Kokkos::View<double> r):result(r) {} +struct FunctorScalarJoinFinal< 0 > { + Kokkos::View< double > result; + + FunctorScalarJoinFinal( Kokkos::View< double > r ) : result( r ) {} - Kokkos::View<double> result; KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } }; template<> -struct FunctorScalarJoinFinal<1> { - FunctorScalarJoinFinal(Kokkos::View<double> r):result(r) {} +struct FunctorScalarJoinFinal< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; - Kokkos::View<double> result; + Kokkos::View< double > result; + + FunctorScalarJoinFinal( Kokkos::View< double > r ) : result( r ) {} - typedef Kokkos::TeamPolicy<>::member_type team_type; KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } }; -template<int ISTEAM> +template< int ISTEAM > struct FunctorScalarJoinInit; template<> -struct FunctorScalarJoinInit<0> { - FunctorScalarJoinInit(Kokkos::View<double> r):result(r) {} +struct FunctorScalarJoinInit< 0 > { + Kokkos::View< double > result; + + FunctorScalarJoinInit( Kokkos::View< double > r ) : result( r ) {} - Kokkos::View<double> result; KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; template<> -struct FunctorScalarJoinInit<1> { - FunctorScalarJoinInit(Kokkos::View<double> r):result(r) {} +struct FunctorScalarJoinInit< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; - Kokkos::View<double> result; + Kokkos::View< double > result; + + FunctorScalarJoinInit( Kokkos::View< double > r ) : result( r ) {} - typedef Kokkos::TeamPolicy<>::member_type team_type; KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; -template<int ISTEAM> +template< int ISTEAM > struct FunctorScalarJoinFinalInit; template<> -struct FunctorScalarJoinFinalInit<0> { - FunctorScalarJoinFinalInit(Kokkos::View<double> r):result(r) {} - +struct FunctorScalarJoinFinalInit< 0 > { Kokkos::View<double> result; + FunctorScalarJoinFinalInit( Kokkos::View< double > r ) : result( r ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, double& update) const { + void operator()( const int & i, double & update ) const { update += i; } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; template<> -struct FunctorScalarJoinFinalInit<1> { - FunctorScalarJoinFinalInit(Kokkos::View<double> r):result(r) {} +struct FunctorScalarJoinFinalInit< 1 > { + typedef Kokkos::TeamPolicy<>::member_type team_type; - Kokkos::View<double> result; + Kokkos::View< double > result; + + FunctorScalarJoinFinalInit( Kokkos::View< double > r ) : result( r ) {} - typedef Kokkos::TeamPolicy<>::member_type team_type; KOKKOS_INLINE_FUNCTION - void operator() (const team_type& team,double& update) const { - update+=1.0/team.team_size()*team.league_rank(); + void operator()( const team_type & team, double & update ) const { + update += 1.0 / team.team_size() * team.league_rank(); } KOKKOS_INLINE_FUNCTION - void join(volatile double& dst, const volatile double& update) const { + void join( volatile double & dst, const volatile double & update ) const { dst += update; } KOKKOS_INLINE_FUNCTION - void final(double& update) const { + void final( double & update ) const { result() = update; } KOKKOS_INLINE_FUNCTION - void init(double& update) const { + void init( double & update ) const { update = 0.0; } }; + struct Functor1 { KOKKOS_INLINE_FUNCTION - void operator() (const int& i,double& update) const { - update+=i; + void operator()( const int & i, double & update ) const { + update += i; } }; struct Functor2 { typedef double value_type[]; + const unsigned value_count; - Functor2(unsigned n):value_count(n){} + Functor2( unsigned n ) : value_count( n ) {} KOKKOS_INLINE_FUNCTION - void operator() (const unsigned& i,double update[]) const { - for(unsigned j=0;j<value_count;j++) - update[j]+=i; + void operator()( const unsigned & i, double update[] ) const { + for ( unsigned j = 0; j < value_count; j++ ) { + update[j] += i; + } } KOKKOS_INLINE_FUNCTION void init( double dst[] ) const { - for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] = 0 ; + for ( unsigned i = 0; i < value_count; ++i ) dst[i] = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile double dst[] , + void join( volatile double dst[], const volatile double src[] ) const { - for ( unsigned i = 0 ; i < value_count ; ++i ) dst[i] += src[i] ; + for ( unsigned i = 0; i < value_count; ++i ) dst[i] += src[i]; } }; -} -} +} // namespace ReduceCombinatorical + +} // namespace Test namespace Test { -template<class ExecSpace = Kokkos::DefaultExecutionSpace> +template< class ExecSpace = Kokkos::DefaultExecutionSpace > struct TestReduceCombinatoricalInstantiation { - template<class ... Args> - static void CallParallelReduce(Args... args) { - Kokkos::parallel_reduce(args...); + template< class ... Args > + static void CallParallelReduce( Args... args ) { + Kokkos::parallel_reduce( args... ); } - template<class ... Args> - static void AddReturnArgument(Args... args) { - Kokkos::View<double,Kokkos::HostSpace> result_view("ResultView"); - double expected_result = 1000.0*999.0/2.0; + template< class ... Args > + static void AddReturnArgument( Args... args ) { + Kokkos::View< double, Kokkos::HostSpace > result_view( "ResultView" ); + double expected_result = 1000.0 * 999.0 / 2.0; double value = 0; - Kokkos::parallel_reduce(args...,value); - ASSERT_EQ(expected_result,value); + Kokkos::parallel_reduce( args..., value ); + ASSERT_EQ( expected_result, value ); result_view() = 0; - CallParallelReduce(args...,result_view); - ASSERT_EQ(expected_result,result_view()); + CallParallelReduce( args..., result_view ); + ASSERT_EQ( expected_result, result_view() ); value = 0; - CallParallelReduce(args...,Kokkos::View<double,Kokkos::HostSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>(&value)); - ASSERT_EQ(expected_result,value); + CallParallelReduce( args..., Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >( &value ) ); + ASSERT_EQ( expected_result, value ); result_view() = 0; - const Kokkos::View<double,Kokkos::HostSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> result_view_const_um = result_view; - CallParallelReduce(args...,result_view_const_um); - ASSERT_EQ(expected_result,result_view_const_um()); + const Kokkos::View< double, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_const_um = result_view; + CallParallelReduce( args..., result_view_const_um ); + ASSERT_EQ( expected_result, result_view_const_um() ); value = 0; - CallParallelReduce(args...,Test::ReduceCombinatorical::AddPlus<double>(value)); - if((Kokkos::DefaultExecutionSpace::concurrency() > 1) && (ExecSpace::concurrency()>1)) - ASSERT_TRUE(expected_result<value); - else if((Kokkos::DefaultExecutionSpace::concurrency() > 1) || (ExecSpace::concurrency()>1)) - ASSERT_TRUE(expected_result<=value); - else - ASSERT_EQ(expected_result,value); + CallParallelReduce( args..., Test::ReduceCombinatorical::AddPlus< double >( value ) ); + if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) && ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result < value ); + } + else if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) || ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result <= value ); + } + else { + ASSERT_EQ( expected_result, value ); + } value = 0; - Test::ReduceCombinatorical::AddPlus<double> add(value); - CallParallelReduce(args...,add); - if((Kokkos::DefaultExecutionSpace::concurrency() > 1) && (ExecSpace::concurrency()>1)) - ASSERT_TRUE(expected_result<value); - else if((Kokkos::DefaultExecutionSpace::concurrency() > 1) || (ExecSpace::concurrency()>1)) - ASSERT_TRUE(expected_result<=value); - else - ASSERT_EQ(expected_result,value); + Test::ReduceCombinatorical::AddPlus< double > add( value ); + CallParallelReduce( args..., add ); + if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) && ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result < value ); + } + else if ( ( Kokkos::DefaultExecutionSpace::concurrency() > 1 ) || ( ExecSpace::concurrency() > 1 ) ) { + ASSERT_TRUE( expected_result <= value ); + } + else { + ASSERT_EQ( expected_result, value ); + } } - - template<class ... Args> - static void AddLambdaRange(void*,Args... args) { - AddReturnArgument(args..., KOKKOS_LAMBDA (const int&i , double& lsum) { + template< class ... Args > + static void AddLambdaRange( void*, Args... args ) { + AddReturnArgument( args..., KOKKOS_LAMBDA ( const int & i, double & lsum ) { lsum += i; }); } - template<class ... Args> - static void AddLambdaTeam(void*,Args... args) { - AddReturnArgument(args..., KOKKOS_LAMBDA (const Kokkos::TeamPolicy<>::member_type& team, double& update) { - update+=1.0/team.team_size()*team.league_rank(); + template< class ... Args > + static void AddLambdaTeam( void*, Args... args ) { + AddReturnArgument( args..., KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type & team, double & update ) { + update += 1.0 / team.team_size() * team.league_rank(); }); } - template<class ... Args> - static void AddLambdaRange(Kokkos::InvalidType,Args... args) { - } + template< class ... Args > + static void AddLambdaRange( Kokkos::InvalidType, Args... args ) {} - template<class ... Args> - static void AddLambdaTeam(Kokkos::InvalidType,Args... args) { - } + template< class ... Args > + static void AddLambdaTeam( Kokkos::InvalidType, Args... args ) {} - template<int ISTEAM, class ... Args> - static void AddFunctor(Args... args) { - Kokkos::View<double> result_view("FunctorView"); - auto h_r = Kokkos::create_mirror_view(result_view); - Test::ReduceCombinatorical::FunctorScalar<ISTEAM> functor(result_view); - double expected_result = 1000.0*999.0/2.0; + template< int ISTEAM, class ... Args > + static void AddFunctor( Args... args ) { + Kokkos::View< double > result_view( "FunctorView" ); + auto h_r = Kokkos::create_mirror_view( result_view ); + Test::ReduceCombinatorical::FunctorScalar< ISTEAM > functor( result_view ); + double expected_result = 1000.0 * 999.0 / 2.0; - AddReturnArgument(args..., functor); - AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalar<ISTEAM>(result_view)); - AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarInit<ISTEAM>(result_view)); - AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarJoin<ISTEAM>(result_view)); - AddReturnArgument(args..., Test::ReduceCombinatorical::FunctorScalarJoinInit<ISTEAM>(result_view)); + AddReturnArgument( args..., functor ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalar< ISTEAM >( result_view ) ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarInit< ISTEAM >( result_view ) ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarJoin< ISTEAM >( result_view ) ); + AddReturnArgument( args..., Test::ReduceCombinatorical::FunctorScalarJoinInit< ISTEAM >( result_view ) ); h_r() = 0; - Kokkos::deep_copy(result_view,h_r); - CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarFinal<ISTEAM>(result_view)); - Kokkos::deep_copy(h_r,result_view); - ASSERT_EQ(expected_result,h_r()); + Kokkos::deep_copy( result_view, h_r ); + CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarFinal< ISTEAM >( result_view ) ); + Kokkos::deep_copy( h_r, result_view ); + ASSERT_EQ( expected_result, h_r() ); h_r() = 0; - Kokkos::deep_copy(result_view,h_r); - CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal<ISTEAM>(result_view)); - Kokkos::deep_copy(h_r,result_view); - ASSERT_EQ(expected_result,h_r()); + Kokkos::deep_copy( result_view, h_r ); + CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinal< ISTEAM >( result_view ) ); + Kokkos::deep_copy( h_r, result_view ); + ASSERT_EQ( expected_result, h_r() ); h_r() = 0; - Kokkos::deep_copy(result_view,h_r); - CallParallelReduce(args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit<ISTEAM>(result_view)); - Kokkos::deep_copy(h_r,result_view); - ASSERT_EQ(expected_result,h_r()); + Kokkos::deep_copy( result_view, h_r ); + CallParallelReduce( args..., Test::ReduceCombinatorical::FunctorScalarJoinFinalInit< ISTEAM >( result_view ) ); + Kokkos::deep_copy( h_r, result_view ); + ASSERT_EQ( expected_result, h_r() ); } - template<class ... Args> - static void AddFunctorLambdaRange(Args... args) { - AddFunctor<0,Args...>(args...); - #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - AddLambdaRange(typename std::conditional<std::is_same<ExecSpace,Kokkos::DefaultExecutionSpace>::value,void*,Kokkos::InvalidType>::type(), args...); - #endif + template< class ... Args > + static void AddFunctorLambdaRange( Args... args ) { + AddFunctor< 0, Args... >( args... ); +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA + AddLambdaRange( typename std::conditional< std::is_same<ExecSpace, Kokkos::DefaultExecutionSpace>::value, void*, Kokkos::InvalidType >::type(), args... ); +#endif } - template<class ... Args> - static void AddFunctorLambdaTeam(Args... args) { - AddFunctor<1,Args...>(args...); - #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - AddLambdaTeam(typename std::conditional<std::is_same<ExecSpace,Kokkos::DefaultExecutionSpace>::value,void*,Kokkos::InvalidType>::type(), args...); - #endif + template< class ... Args > + static void AddFunctorLambdaTeam( Args... args ) { + AddFunctor< 1, Args... >( args... ); +#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA + AddLambdaTeam( typename std::conditional< std::is_same<ExecSpace, Kokkos::DefaultExecutionSpace>::value, void*, Kokkos::InvalidType >::type(), args... ); +#endif } - template<class ... Args> - static void AddPolicy(Args... args) { + template< class ... Args > + static void AddPolicy( Args... args ) { int N = 1000; - Kokkos::RangePolicy<ExecSpace> policy(0,N); + Kokkos::RangePolicy< ExecSpace > policy( 0, N ); - AddFunctorLambdaRange(args...,1000); - AddFunctorLambdaRange(args...,N); - AddFunctorLambdaRange(args...,policy); - AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace>(0,N)); - AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(0,N)); - AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Static> >(0,N).set_chunk_size(10)); - AddFunctorLambdaRange(args...,Kokkos::RangePolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(0,N).set_chunk_size(10)); + AddFunctorLambdaRange( args..., 1000 ); + AddFunctorLambdaRange( args..., N ); + AddFunctorLambdaRange( args..., policy ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace >( 0, N ) ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( 0, N ) ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Static> >( 0, N ).set_chunk_size( 10 ) ); + AddFunctorLambdaRange( args..., Kokkos::RangePolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( 0, N ).set_chunk_size( 10 ) ); - AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace>(N,Kokkos::AUTO)); - AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(N,Kokkos::AUTO)); - AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace,Kokkos::Schedule<Kokkos::Static> >(N,Kokkos::AUTO).set_chunk_size(10)); - AddFunctorLambdaTeam(args...,Kokkos::TeamPolicy<ExecSpace,Kokkos::Schedule<Kokkos::Dynamic> >(N,Kokkos::AUTO).set_chunk_size(10)); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace >( N, Kokkos::AUTO ) ); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( N, Kokkos::AUTO ) ); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule<Kokkos::Static> >( N, Kokkos::AUTO ).set_chunk_size( 10 ) ); + AddFunctorLambdaTeam( args..., Kokkos::TeamPolicy< ExecSpace, Kokkos::Schedule<Kokkos::Dynamic> >( N, Kokkos::AUTO ).set_chunk_size( 10 ) ); } - static void execute_a() { AddPolicy(); } static void execute_b() { - std::string s("Std::String"); - AddPolicy(s.c_str()); - AddPolicy("Char Constant"); + std::string s( "Std::String" ); + AddPolicy( s.c_str() ); + AddPolicy( "Char Constant" ); } static void execute_c() { - std::string s("Std::String"); - AddPolicy(s); + std::string s( "Std::String" ); + AddPolicy( s ); } }; -template<class Scalar, class ExecSpace = Kokkos::DefaultExecutionSpace> +template< class Scalar, class ExecSpace = Kokkos::DefaultExecutionSpace > struct TestReducers { - struct SumFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value += values(i); + void operator()( const int & i, Scalar & value ) const { + value += values( i ); } }; struct ProdFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value *= values(i); + void operator()( const int & i, Scalar & value ) const { + value *= values( i ); } }; struct MinFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - if(values(i) < value) - value = values(i); + void operator()( const int & i, Scalar & value ) const { + if ( values( i ) < value ) value = values( i ); } }; struct MaxFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - if(values(i) > value) - value = values(i); + void operator()( const int & i, Scalar & value ) const { + if ( values( i ) > value ) value = values( i ); } }; struct MinLocFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, - typename Kokkos::Experimental::MinLoc<Scalar,int>::value_type& value) const { - if(values(i) < value.val) { - value.val = values(i); + void operator()( const int & i, typename Kokkos::Experimental::MinLoc< Scalar, int >::value_type & value ) const { + if ( values( i ) < value.val ) { + value.val = values( i ); value.loc = i; } } }; struct MaxLocFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, - typename Kokkos::Experimental::MaxLoc<Scalar,int>::value_type& value) const { - if(values(i) > value.val) { - value.val = values(i); + void operator()( const int & i, typename Kokkos::Experimental::MaxLoc< Scalar, int >::value_type & value ) const { + if ( values( i ) > value.val ) { + value.val = values( i ); value.loc = i; } } }; struct MinMaxLocFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, - typename Kokkos::Experimental::MinMaxLoc<Scalar,int>::value_type& value) const { - if(values(i) > value.max_val) { - value.max_val = values(i); + void operator()( const int & i, typename Kokkos::Experimental::MinMaxLoc< Scalar, int >::value_type & value ) const { + if ( values( i ) > value.max_val ) { + value.max_val = values( i ); value.max_loc = i; } - if(values(i) < value.min_val) { - value.min_val = values(i); + + if ( values( i ) < value.min_val ) { + value.min_val = values( i ); value.min_loc = i; } } }; struct BAndFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value & values(i); + void operator()( const int & i, Scalar & value ) const { + value = value & values( i ); } }; struct BOrFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value | values(i); + void operator()( const int & i, Scalar & value ) const { + value = value | values( i ); } }; struct BXorFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value ^ values(i); + void operator()( const int & i, Scalar & value ) const { + value = value ^ values( i ); } }; struct LAndFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value && values(i); + void operator()( const int & i, Scalar & value ) const { + value = value && values( i ); } }; struct LOrFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value || values(i); + void operator()( const int & i, Scalar & value ) const { + value = value || values( i ); } }; struct LXorFunctor { - Kokkos::View<const Scalar*,ExecSpace> values; + Kokkos::View< const Scalar*, ExecSpace > values; + KOKKOS_INLINE_FUNCTION - void operator() (const int& i, Scalar& value) const { - value = value ? (!values(i)) : values(i); + void operator()( const int & i, Scalar & value ) const { + value = value ? ( !values( i ) ) : values( i ); } }; - static void test_sum(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); + static void test_sum( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_sum = 0; - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%100); - reference_sum += h_values(i); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100 ); + reference_sum += h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); SumFunctor f; f.values = values; @@ -1224,556 +1254,669 @@ struct TestReducers { { Scalar sum_scalar = init; - Kokkos::Experimental::Sum<Scalar> reducer_scalar(sum_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(sum_scalar,reference_sum); + Kokkos::Experimental::Sum< Scalar > reducer_scalar( sum_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( sum_scalar, reference_sum ); + Scalar sum_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(sum_scalar_view,reference_sum); + ASSERT_EQ( sum_scalar_view, reference_sum ); } + { Scalar sum_scalar_init = init; - Kokkos::Experimental::Sum<Scalar> reducer_scalar_init(sum_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); - ASSERT_EQ(sum_scalar_init,reference_sum); + Kokkos::Experimental::Sum< Scalar > reducer_scalar_init( sum_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( sum_scalar_init, reference_sum ); + Scalar sum_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(sum_scalar_init_view,reference_sum); + ASSERT_EQ( sum_scalar_init_view, reference_sum ); } + { - Kokkos::View<Scalar,Kokkos::HostSpace> sum_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace> sum_view( "View" ); sum_view() = init; - Kokkos::Experimental::Sum<Scalar> reducer_view(sum_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::Experimental::Sum< Scalar > reducer_view( sum_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar sum_view_scalar = sum_view(); - ASSERT_EQ(sum_view_scalar,reference_sum); + ASSERT_EQ( sum_view_scalar, reference_sum ); + Scalar sum_view_view = reducer_view.result_view()(); - ASSERT_EQ(sum_view_view,reference_sum); + ASSERT_EQ( sum_view_view, reference_sum ); } + { - Kokkos::View<Scalar,Kokkos::HostSpace> sum_view_init("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > sum_view_init( "View" ); sum_view_init() = init; - Kokkos::Experimental::Sum<Scalar> reducer_view_init(sum_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Kokkos::Experimental::Sum< Scalar > reducer_view_init( sum_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + Scalar sum_view_init_scalar = sum_view_init(); - ASSERT_EQ(sum_view_init_scalar,reference_sum); + ASSERT_EQ( sum_view_init_scalar, reference_sum ); + Scalar sum_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(sum_view_init_view,reference_sum); + ASSERT_EQ( sum_view_init_view, reference_sum ); } } - static void test_prod(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); + static void test_prod( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_prod = 1; - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%4+1); - reference_prod *= h_values(i); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 4 + 1 ); + reference_prod *= h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); ProdFunctor f; f.values = values; Scalar init = 1; - if(std::is_arithmetic<Scalar>::value) + if ( std::is_arithmetic< Scalar >::value ) { Scalar prod_scalar = init; - Kokkos::Experimental::Prod<Scalar> reducer_scalar(prod_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(prod_scalar,reference_prod); + Kokkos::Experimental::Prod< Scalar > reducer_scalar( prod_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( prod_scalar, reference_prod ); + Scalar prod_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(prod_scalar_view,reference_prod); + ASSERT_EQ( prod_scalar_view, reference_prod ); } + { Scalar prod_scalar_init = init; - Kokkos::Experimental::Prod<Scalar> reducer_scalar_init(prod_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); - ASSERT_EQ(prod_scalar_init,reference_prod); + Kokkos::Experimental::Prod< Scalar > reducer_scalar_init( prod_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( prod_scalar_init, reference_prod ); + Scalar prod_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(prod_scalar_init_view,reference_prod); + ASSERT_EQ( prod_scalar_init_view, reference_prod ); } - if(std::is_arithmetic<Scalar>::value) + if ( std::is_arithmetic< Scalar >::value ) { - Kokkos::View<Scalar,Kokkos::HostSpace> prod_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > prod_view( "View" ); prod_view() = init; - Kokkos::Experimental::Prod<Scalar> reducer_view(prod_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::Experimental::Prod< Scalar > reducer_view( prod_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar prod_view_scalar = prod_view(); - ASSERT_EQ(prod_view_scalar,reference_prod); + ASSERT_EQ( prod_view_scalar, reference_prod ); + Scalar prod_view_view = reducer_view.result_view()(); - ASSERT_EQ(prod_view_view,reference_prod); + ASSERT_EQ( prod_view_view, reference_prod ); } + { - Kokkos::View<Scalar,Kokkos::HostSpace> prod_view_init("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > prod_view_init( "View" ); prod_view_init() = init; - Kokkos::Experimental::Prod<Scalar> reducer_view_init(prod_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Kokkos::Experimental::Prod< Scalar > reducer_view_init( prod_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + Scalar prod_view_init_scalar = prod_view_init(); - ASSERT_EQ(prod_view_init_scalar,reference_prod); + ASSERT_EQ( prod_view_init_scalar, reference_prod ); + Scalar prod_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(prod_view_init_view,reference_prod); + ASSERT_EQ( prod_view_init_view, reference_prod ); } } - static void test_min(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_min = std::numeric_limits<Scalar>::max(); - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%100000); - if(h_values(i)<reference_min) - reference_min = h_values(i); + static void test_min( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_min = std::numeric_limits< Scalar >::max(); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100000 ); + + if ( h_values( i ) < reference_min ) reference_min = h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); MinFunctor f; f.values = values; - Scalar init = std::numeric_limits<Scalar>::max(); + Scalar init = std::numeric_limits< Scalar >::max(); { Scalar min_scalar = init; - Kokkos::Experimental::Min<Scalar> reducer_scalar(min_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(min_scalar,reference_min); + Kokkos::Experimental::Min< Scalar > reducer_scalar( min_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( min_scalar, reference_min ); + Scalar min_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(min_scalar_view,reference_min); + ASSERT_EQ( min_scalar_view, reference_min ); } + { Scalar min_scalar_init = init; - Kokkos::Experimental::Min<Scalar> reducer_scalar_init(min_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); - ASSERT_EQ(min_scalar_init,reference_min); + Kokkos::Experimental::Min< Scalar > reducer_scalar_init( min_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( min_scalar_init, reference_min ); + Scalar min_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(min_scalar_init_view,reference_min); + ASSERT_EQ( min_scalar_init_view, reference_min ); } + { - Kokkos::View<Scalar,Kokkos::HostSpace> min_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > min_view( "View" ); min_view() = init; - Kokkos::Experimental::Min<Scalar> reducer_view(min_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::Experimental::Min< Scalar > reducer_view( min_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar min_view_scalar = min_view(); - ASSERT_EQ(min_view_scalar,reference_min); + ASSERT_EQ( min_view_scalar, reference_min ); + Scalar min_view_view = reducer_view.result_view()(); - ASSERT_EQ(min_view_view,reference_min); + ASSERT_EQ( min_view_view, reference_min ); } + { - Kokkos::View<Scalar,Kokkos::HostSpace> min_view_init("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > min_view_init( "View" ); min_view_init() = init; - Kokkos::Experimental::Min<Scalar> reducer_view_init(min_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Kokkos::Experimental::Min< Scalar > reducer_view_init( min_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + Scalar min_view_init_scalar = min_view_init(); - ASSERT_EQ(min_view_init_scalar,reference_min); + ASSERT_EQ( min_view_init_scalar, reference_min ); + Scalar min_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(min_view_init_view,reference_min); + ASSERT_EQ( min_view_init_view, reference_min ); } } - static void test_max(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_max = std::numeric_limits<Scalar>::min(); - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%100000+1); - if(h_values(i)>reference_max) - reference_max = h_values(i); + static void test_max( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_max = std::numeric_limits< Scalar >::min(); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100000 + 1 ); + + if ( h_values( i ) > reference_max ) reference_max = h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); MaxFunctor f; f.values = values; - Scalar init = std::numeric_limits<Scalar>::min(); + Scalar init = std::numeric_limits< Scalar >::min(); { Scalar max_scalar = init; - Kokkos::Experimental::Max<Scalar> reducer_scalar(max_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(max_scalar,reference_max); + Kokkos::Experimental::Max< Scalar > reducer_scalar( max_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( max_scalar, reference_max ); + Scalar max_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(max_scalar_view,reference_max); + ASSERT_EQ( max_scalar_view, reference_max ); } + { Scalar max_scalar_init = init; - Kokkos::Experimental::Max<Scalar> reducer_scalar_init(max_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); - ASSERT_EQ(max_scalar_init,reference_max); + Kokkos::Experimental::Max< Scalar > reducer_scalar_init( max_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( max_scalar_init, reference_max ); + Scalar max_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(max_scalar_init_view,reference_max); + ASSERT_EQ( max_scalar_init_view, reference_max ); } + { - Kokkos::View<Scalar,Kokkos::HostSpace> max_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > max_view( "View" ); max_view() = init; - Kokkos::Experimental::Max<Scalar> reducer_view(max_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::Experimental::Max< Scalar > reducer_view( max_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar max_view_scalar = max_view(); - ASSERT_EQ(max_view_scalar,reference_max); + ASSERT_EQ( max_view_scalar, reference_max ); + Scalar max_view_view = reducer_view.result_view()(); - ASSERT_EQ(max_view_view,reference_max); + ASSERT_EQ( max_view_view, reference_max ); } + { - Kokkos::View<Scalar,Kokkos::HostSpace> max_view_init("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > max_view_init( "View" ); max_view_init() = init; - Kokkos::Experimental::Max<Scalar> reducer_view_init(max_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Kokkos::Experimental::Max< Scalar > reducer_view_init( max_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + Scalar max_view_init_scalar = max_view_init(); - ASSERT_EQ(max_view_init_scalar,reference_max); + ASSERT_EQ( max_view_init_scalar, reference_max ); + Scalar max_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(max_view_init_view,reference_max); + ASSERT_EQ( max_view_init_view, reference_max ); } } - static void test_minloc(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_min = std::numeric_limits<Scalar>::max(); + static void test_minloc( int N ) { + typedef typename Kokkos::Experimental::MinLoc< Scalar, int >::value_type value_type; + + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_min = std::numeric_limits< Scalar >::max(); int reference_loc = -1; - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%100000); - if(h_values(i)<reference_min) { - reference_min = h_values(i); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100000 ); + + if ( h_values( i ) < reference_min ) { + reference_min = h_values( i ); reference_loc = i; - } else if (h_values(i) == reference_min) { - // make min unique - h_values(i) += std::numeric_limits<Scalar>::epsilon(); + } + else if ( h_values( i ) == reference_min ) { + // Make min unique. + h_values( i ) += std::numeric_limits< Scalar >::epsilon(); } } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); MinLocFunctor f; - typedef typename Kokkos::Experimental::MinLoc<Scalar,int>::value_type value_type; f.values = values; - Scalar init = std::numeric_limits<Scalar>::max(); - + Scalar init = std::numeric_limits< Scalar >::max(); { value_type min_scalar; - Kokkos::Experimental::MinLoc<Scalar,int> reducer_scalar(min_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(min_scalar.val,reference_min); - ASSERT_EQ(min_scalar.loc,reference_loc); + Kokkos::Experimental::MinLoc< Scalar, int > reducer_scalar( min_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( min_scalar.val, reference_min ); + ASSERT_EQ( min_scalar.loc, reference_loc ); + value_type min_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(min_scalar_view.val,reference_min); - ASSERT_EQ(min_scalar_view.loc,reference_loc); + ASSERT_EQ( min_scalar_view.val, reference_min ); + ASSERT_EQ( min_scalar_view.loc, reference_loc ); } + { value_type min_scalar_init; - Kokkos::Experimental::MinLoc<Scalar,int> reducer_scalar_init(min_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); - ASSERT_EQ(min_scalar_init.val,reference_min); - ASSERT_EQ(min_scalar_init.loc,reference_loc); + Kokkos::Experimental::MinLoc< Scalar, int > reducer_scalar_init( min_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( min_scalar_init.val, reference_min ); + ASSERT_EQ( min_scalar_init.loc, reference_loc ); + value_type min_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(min_scalar_init_view.val,reference_min); - ASSERT_EQ(min_scalar_init_view.loc,reference_loc); + ASSERT_EQ( min_scalar_init_view.val, reference_min ); + ASSERT_EQ( min_scalar_init_view.loc, reference_loc ); } + { - Kokkos::View<value_type,Kokkos::HostSpace> min_view("View"); - Kokkos::Experimental::MinLoc<Scalar,int> reducer_view(min_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::View< value_type, Kokkos::HostSpace > min_view( "View" ); + Kokkos::Experimental::MinLoc< Scalar, int > reducer_view( min_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + value_type min_view_scalar = min_view(); - ASSERT_EQ(min_view_scalar.val,reference_min); - ASSERT_EQ(min_view_scalar.loc,reference_loc); + ASSERT_EQ( min_view_scalar.val, reference_min ); + ASSERT_EQ( min_view_scalar.loc, reference_loc ); + value_type min_view_view = reducer_view.result_view()(); - ASSERT_EQ(min_view_view.val,reference_min); - ASSERT_EQ(min_view_view.loc,reference_loc); + ASSERT_EQ( min_view_view.val, reference_min ); + ASSERT_EQ( min_view_view.loc, reference_loc ); } + { - Kokkos::View<value_type,Kokkos::HostSpace> min_view_init("View"); - Kokkos::Experimental::MinLoc<Scalar,int> reducer_view_init(min_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Kokkos::View< value_type, Kokkos::HostSpace > min_view_init( "View" ); + Kokkos::Experimental::MinLoc< Scalar, int > reducer_view_init( min_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + value_type min_view_init_scalar = min_view_init(); - ASSERT_EQ(min_view_init_scalar.val,reference_min); - ASSERT_EQ(min_view_init_scalar.loc,reference_loc); + ASSERT_EQ( min_view_init_scalar.val, reference_min ); + ASSERT_EQ( min_view_init_scalar.loc, reference_loc ); + value_type min_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(min_view_init_view.val,reference_min); - ASSERT_EQ(min_view_init_view.loc,reference_loc); + ASSERT_EQ( min_view_init_view.val, reference_min ); + ASSERT_EQ( min_view_init_view.loc, reference_loc ); } } - static void test_maxloc(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_max = std::numeric_limits<Scalar>::min(); + static void test_maxloc( int N ) { + typedef typename Kokkos::Experimental::MaxLoc< Scalar, int >::value_type value_type; + + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_max = std::numeric_limits< Scalar >::min(); int reference_loc = -1; - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%100000); - if(h_values(i)>reference_max) { - reference_max = h_values(i); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100000 ); + + if ( h_values( i ) > reference_max ) { + reference_max = h_values( i ); reference_loc = i; - } else if (h_values(i) == reference_max) { - // make max unique - h_values(i) -= std::numeric_limits<Scalar>::epsilon(); + } + else if ( h_values( i ) == reference_max ) { + // Make max unique. + h_values( i ) -= std::numeric_limits< Scalar >::epsilon(); } } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); MaxLocFunctor f; - typedef typename Kokkos::Experimental::MaxLoc<Scalar,int>::value_type value_type; f.values = values; - Scalar init = std::numeric_limits<Scalar>::min(); - + Scalar init = std::numeric_limits< Scalar >::min(); { value_type max_scalar; - Kokkos::Experimental::MaxLoc<Scalar,int> reducer_scalar(max_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(max_scalar.val,reference_max); - ASSERT_EQ(max_scalar.loc,reference_loc); + Kokkos::Experimental::MaxLoc< Scalar, int > reducer_scalar( max_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( max_scalar.val, reference_max ); + ASSERT_EQ( max_scalar.loc, reference_loc ); + value_type max_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(max_scalar_view.val,reference_max); - ASSERT_EQ(max_scalar_view.loc,reference_loc); + ASSERT_EQ( max_scalar_view.val, reference_max ); + ASSERT_EQ( max_scalar_view.loc, reference_loc ); } + { value_type max_scalar_init; - Kokkos::Experimental::MaxLoc<Scalar,int> reducer_scalar_init(max_scalar_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); - ASSERT_EQ(max_scalar_init.val,reference_max); - ASSERT_EQ(max_scalar_init.loc,reference_loc); + Kokkos::Experimental::MaxLoc< Scalar, int > reducer_scalar_init( max_scalar_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( max_scalar_init.val, reference_max ); + ASSERT_EQ( max_scalar_init.loc, reference_loc ); + value_type max_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(max_scalar_init_view.val,reference_max); - ASSERT_EQ(max_scalar_init_view.loc,reference_loc); + ASSERT_EQ( max_scalar_init_view.val, reference_max ); + ASSERT_EQ( max_scalar_init_view.loc, reference_loc ); } + { - Kokkos::View<value_type,Kokkos::HostSpace> max_view("View"); - Kokkos::Experimental::MaxLoc<Scalar,int> reducer_view(max_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::View< value_type, Kokkos::HostSpace > max_view( "View" ); + Kokkos::Experimental::MaxLoc< Scalar, int > reducer_view( max_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + value_type max_view_scalar = max_view(); - ASSERT_EQ(max_view_scalar.val,reference_max); - ASSERT_EQ(max_view_scalar.loc,reference_loc); + ASSERT_EQ( max_view_scalar.val, reference_max ); + ASSERT_EQ( max_view_scalar.loc, reference_loc ); + value_type max_view_view = reducer_view.result_view()(); - ASSERT_EQ(max_view_view.val,reference_max); - ASSERT_EQ(max_view_view.loc,reference_loc); + ASSERT_EQ( max_view_view.val, reference_max ); + ASSERT_EQ( max_view_view.loc, reference_loc ); } + { - Kokkos::View<value_type,Kokkos::HostSpace> max_view_init("View"); - Kokkos::Experimental::MaxLoc<Scalar,int> reducer_view_init(max_view_init,init); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Kokkos::View< value_type, Kokkos::HostSpace > max_view_init( "View" ); + Kokkos::Experimental::MaxLoc< Scalar, int > reducer_view_init( max_view_init, init ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + value_type max_view_init_scalar = max_view_init(); - ASSERT_EQ(max_view_init_scalar.val,reference_max); - ASSERT_EQ(max_view_init_scalar.loc,reference_loc); + ASSERT_EQ( max_view_init_scalar.val, reference_max ); + ASSERT_EQ( max_view_init_scalar.loc, reference_loc ); + value_type max_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(max_view_init_view.val,reference_max); - ASSERT_EQ(max_view_init_view.loc,reference_loc); + ASSERT_EQ( max_view_init_view.val, reference_max ); + ASSERT_EQ( max_view_init_view.loc, reference_loc ); } } - static void test_minmaxloc(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_max = std::numeric_limits<Scalar>::min(); - Scalar reference_min = std::numeric_limits<Scalar>::max(); + static void test_minmaxloc( int N ) { + typedef typename Kokkos::Experimental::MinMaxLoc< Scalar, int >::value_type value_type; + + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_max = std::numeric_limits< Scalar >::min(); + Scalar reference_min = std::numeric_limits< Scalar >::max(); int reference_minloc = -1; int reference_maxloc = -1; - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%100000); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100000 ); } - for(int i=0; i<N; i++) { - if(h_values(i)>reference_max) { - reference_max = h_values(i); + + for ( int i = 0; i < N; i++ ) { + if ( h_values( i ) > reference_max ) { + reference_max = h_values( i ); reference_maxloc = i; - } else if (h_values(i) == reference_max) { - // make max unique - h_values(i) -= std::numeric_limits<Scalar>::epsilon(); + } + else if ( h_values( i ) == reference_max ) { + // Make max unique. + h_values( i ) -= std::numeric_limits< Scalar >::epsilon(); } } - for(int i=0; i<N; i++) { - if(h_values(i)<reference_min) { - reference_min = h_values(i); + + for ( int i = 0; i < N; i++ ) { + if ( h_values( i ) < reference_min ) { + reference_min = h_values( i ); reference_minloc = i; - } else if (h_values(i) == reference_min) { - // make min unique - h_values(i) += std::numeric_limits<Scalar>::epsilon(); + } + else if ( h_values( i ) == reference_min ) { + // Make min unique. + h_values( i ) += std::numeric_limits< Scalar >::epsilon(); } } - Kokkos::deep_copy(values,h_values); + + Kokkos::deep_copy( values, h_values ); MinMaxLocFunctor f; - typedef typename Kokkos::Experimental::MinMaxLoc<Scalar,int>::value_type value_type; f.values = values; - Scalar init_min = std::numeric_limits<Scalar>::max(); - Scalar init_max = std::numeric_limits<Scalar>::min(); - + Scalar init_min = std::numeric_limits< Scalar >::max(); + Scalar init_max = std::numeric_limits< Scalar >::min(); { value_type minmax_scalar; - Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_scalar(minmax_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(minmax_scalar.min_val,reference_min); - for(int i=0; i<N; i++) { - if((i == minmax_scalar.min_loc) && (h_values(i)==reference_min)) + Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_scalar( minmax_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( minmax_scalar.min_val, reference_min ); + + for ( int i = 0; i < N; i++ ) { + if ( ( i == minmax_scalar.min_loc ) && ( h_values( i ) == reference_min ) ) { reference_minloc = i; + } } - ASSERT_EQ(minmax_scalar.min_loc,reference_minloc); - ASSERT_EQ(minmax_scalar.max_val,reference_max); - for(int i=0; i<N; i++) { - if((i == minmax_scalar.max_loc) && (h_values(i)==reference_max)) + + ASSERT_EQ( minmax_scalar.min_loc, reference_minloc ); + ASSERT_EQ( minmax_scalar.max_val, reference_max ); + + for ( int i = 0; i < N; i++ ) { + if ( ( i == minmax_scalar.max_loc ) && ( h_values( i ) == reference_max ) ) { reference_maxloc = i; + } } - ASSERT_EQ(minmax_scalar.max_loc,reference_maxloc); + + ASSERT_EQ( minmax_scalar.max_loc, reference_maxloc ); + value_type minmax_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(minmax_scalar_view.min_val,reference_min); - ASSERT_EQ(minmax_scalar_view.min_loc,reference_minloc); - ASSERT_EQ(minmax_scalar_view.max_val,reference_max); - ASSERT_EQ(minmax_scalar_view.max_loc,reference_maxloc); + ASSERT_EQ( minmax_scalar_view.min_val, reference_min ); + ASSERT_EQ( minmax_scalar_view.min_loc, reference_minloc ); + ASSERT_EQ( minmax_scalar_view.max_val, reference_max ); + ASSERT_EQ( minmax_scalar_view.max_loc, reference_maxloc ); } + { value_type minmax_scalar_init; - Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_scalar_init(minmax_scalar_init,init_min,init_max); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar_init); - ASSERT_EQ(minmax_scalar_init.min_val,reference_min); - ASSERT_EQ(minmax_scalar_init.min_loc,reference_minloc); - ASSERT_EQ(minmax_scalar_init.max_val,reference_max); - ASSERT_EQ(minmax_scalar_init.max_loc,reference_maxloc); + Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_scalar_init( minmax_scalar_init, init_min, init_max ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar_init ); + + ASSERT_EQ( minmax_scalar_init.min_val, reference_min ); + ASSERT_EQ( minmax_scalar_init.min_loc, reference_minloc ); + ASSERT_EQ( minmax_scalar_init.max_val, reference_max ); + ASSERT_EQ( minmax_scalar_init.max_loc, reference_maxloc ); + value_type minmax_scalar_init_view = reducer_scalar_init.result_view()(); - ASSERT_EQ(minmax_scalar_init_view.min_val,reference_min); - ASSERT_EQ(minmax_scalar_init_view.min_loc,reference_minloc); - ASSERT_EQ(minmax_scalar_init_view.max_val,reference_max); - ASSERT_EQ(minmax_scalar_init_view.max_loc,reference_maxloc); + ASSERT_EQ( minmax_scalar_init_view.min_val, reference_min ); + ASSERT_EQ( minmax_scalar_init_view.min_loc, reference_minloc ); + ASSERT_EQ( minmax_scalar_init_view.max_val, reference_max ); + ASSERT_EQ( minmax_scalar_init_view.max_loc, reference_maxloc ); } + { - Kokkos::View<value_type,Kokkos::HostSpace> minmax_view("View"); - Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_view(minmax_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::View< value_type, Kokkos::HostSpace > minmax_view( "View" ); + Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_view( minmax_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + value_type minmax_view_scalar = minmax_view(); - ASSERT_EQ(minmax_view_scalar.min_val,reference_min); - ASSERT_EQ(minmax_view_scalar.min_loc,reference_minloc); - ASSERT_EQ(minmax_view_scalar.max_val,reference_max); - ASSERT_EQ(minmax_view_scalar.max_loc,reference_maxloc); + ASSERT_EQ( minmax_view_scalar.min_val, reference_min ); + ASSERT_EQ( minmax_view_scalar.min_loc, reference_minloc ); + ASSERT_EQ( minmax_view_scalar.max_val, reference_max ); + ASSERT_EQ( minmax_view_scalar.max_loc, reference_maxloc ); + value_type minmax_view_view = reducer_view.result_view()(); - ASSERT_EQ(minmax_view_view.min_val,reference_min); - ASSERT_EQ(minmax_view_view.min_loc,reference_minloc); - ASSERT_EQ(minmax_view_view.max_val,reference_max); - ASSERT_EQ(minmax_view_view.max_loc,reference_maxloc); + ASSERT_EQ( minmax_view_view.min_val, reference_min ); + ASSERT_EQ( minmax_view_view.min_loc, reference_minloc ); + ASSERT_EQ( minmax_view_view.max_val, reference_max ); + ASSERT_EQ( minmax_view_view.max_loc, reference_maxloc ); } + { - Kokkos::View<value_type,Kokkos::HostSpace> minmax_view_init("View"); - Kokkos::Experimental::MinMaxLoc<Scalar,int> reducer_view_init(minmax_view_init,init_min,init_max); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view_init); + Kokkos::View< value_type, Kokkos::HostSpace > minmax_view_init( "View" ); + Kokkos::Experimental::MinMaxLoc< Scalar, int > reducer_view_init( minmax_view_init, init_min, init_max ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view_init ); + value_type minmax_view_init_scalar = minmax_view_init(); - ASSERT_EQ(minmax_view_init_scalar.min_val,reference_min); - ASSERT_EQ(minmax_view_init_scalar.min_loc,reference_minloc); - ASSERT_EQ(minmax_view_init_scalar.max_val,reference_max); - ASSERT_EQ(minmax_view_init_scalar.max_loc,reference_maxloc); + ASSERT_EQ( minmax_view_init_scalar.min_val, reference_min ); + ASSERT_EQ( minmax_view_init_scalar.min_loc, reference_minloc ); + ASSERT_EQ( minmax_view_init_scalar.max_val, reference_max ); + ASSERT_EQ( minmax_view_init_scalar.max_loc, reference_maxloc ); + value_type minmax_view_init_view = reducer_view_init.result_view()(); - ASSERT_EQ(minmax_view_init_view.min_val,reference_min); - ASSERT_EQ(minmax_view_init_view.min_loc,reference_minloc); - ASSERT_EQ(minmax_view_init_view.max_val,reference_max); - ASSERT_EQ(minmax_view_init_view.max_loc,reference_maxloc); + ASSERT_EQ( minmax_view_init_view.min_val, reference_min ); + ASSERT_EQ( minmax_view_init_view.min_loc, reference_minloc ); + ASSERT_EQ( minmax_view_init_view.max_val, reference_max ); + ASSERT_EQ( minmax_view_init_view.max_loc, reference_maxloc ); } } - static void test_BAnd(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_band = Scalar() | (~Scalar()); - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%100000+1); - reference_band = reference_band & h_values(i); + static void test_BAnd( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_band = Scalar() | ( ~Scalar() ); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 100000 + 1 ); + reference_band = reference_band & h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); BAndFunctor f; f.values = values; - Scalar init = Scalar() | (~Scalar()); + Scalar init = Scalar() | ( ~Scalar() ); { Scalar band_scalar = init; - Kokkos::Experimental::BAnd<Scalar> reducer_scalar(band_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(band_scalar,reference_band); + Kokkos::Experimental::BAnd< Scalar > reducer_scalar( band_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( band_scalar, reference_band ); Scalar band_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(band_scalar_view,reference_band); + + ASSERT_EQ( band_scalar_view, reference_band ); } { - Kokkos::View<Scalar,Kokkos::HostSpace> band_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > band_view( "View" ); band_view() = init; - Kokkos::Experimental::BAnd<Scalar> reducer_view(band_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::Experimental::BAnd< Scalar > reducer_view( band_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar band_view_scalar = band_view(); - ASSERT_EQ(band_view_scalar,reference_band); + ASSERT_EQ( band_view_scalar, reference_band ); + Scalar band_view_view = reducer_view.result_view()(); - ASSERT_EQ(band_view_view,reference_band); + ASSERT_EQ( band_view_view, reference_band ); } } - static void test_BOr(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_bor = Scalar() & (~Scalar()); - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)((rand()%100000+1)*2); - reference_bor = reference_bor | h_values(i); + static void test_BOr( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_bor = Scalar() & ( ~Scalar() ); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( ( rand() % 100000 + 1 ) * 2 ); + reference_bor = reference_bor | h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); BOrFunctor f; f.values = values; - Scalar init = Scalar() & (~Scalar()); + Scalar init = Scalar() & ( ~Scalar() ); { Scalar bor_scalar = init; - Kokkos::Experimental::BOr<Scalar> reducer_scalar(bor_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(bor_scalar,reference_bor); + Kokkos::Experimental::BOr< Scalar > reducer_scalar( bor_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( bor_scalar, reference_bor ); + Scalar bor_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(bor_scalar_view,reference_bor); + ASSERT_EQ( bor_scalar_view, reference_bor ); } { - Kokkos::View<Scalar,Kokkos::HostSpace> bor_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > bor_view( "View" ); bor_view() = init; - Kokkos::Experimental::BOr<Scalar> reducer_view(bor_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::Experimental::BOr< Scalar > reducer_view( bor_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar bor_view_scalar = bor_view(); - ASSERT_EQ(bor_view_scalar,reference_bor); + ASSERT_EQ( bor_view_scalar, reference_bor ); + Scalar bor_view_view = reducer_view.result_view()(); - ASSERT_EQ(bor_view_view,reference_bor); + ASSERT_EQ( bor_view_view, reference_bor ); } } - static void test_BXor(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); - Scalar reference_bxor = Scalar() & (~Scalar()); - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)((rand()%100000+1)*2); - reference_bxor = reference_bxor ^ h_values(i); + static void test_BXor( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); + Scalar reference_bxor = Scalar() & ( ~Scalar() ); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( ( rand() % 100000 + 1 ) * 2 ); + reference_bxor = reference_bxor ^ h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); BXorFunctor f; f.values = values; - Scalar init = Scalar() & (~Scalar()); + Scalar init = Scalar() & ( ~Scalar() ); { Scalar bxor_scalar = init; - Kokkos::Experimental::BXor<Scalar> reducer_scalar(bxor_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(bxor_scalar,reference_bxor); + Kokkos::Experimental::BXor< Scalar > reducer_scalar( bxor_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( bxor_scalar, reference_bxor ); + Scalar bxor_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(bxor_scalar_view,reference_bxor); + ASSERT_EQ( bxor_scalar_view, reference_bxor ); } { - Kokkos::View<Scalar,Kokkos::HostSpace> bxor_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > bxor_view( "View" ); bxor_view() = init; - Kokkos::Experimental::BXor<Scalar> reducer_view(bxor_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::Experimental::BXor< Scalar > reducer_view( bxor_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar bxor_view_scalar = bxor_view(); - ASSERT_EQ(bxor_view_scalar,reference_bxor); + ASSERT_EQ( bxor_view_scalar, reference_bxor ); + Scalar bxor_view_view = reducer_view.result_view()(); - ASSERT_EQ(bxor_view_view,reference_bxor); + ASSERT_EQ( bxor_view_view, reference_bxor ); } } - static void test_LAnd(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); + static void test_LAnd( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_land = 1; - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%2); - reference_land = reference_land && h_values(i); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 2 ); + reference_land = reference_land && h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); LAndFunctor f; f.values = values; @@ -1781,34 +1924,39 @@ struct TestReducers { { Scalar land_scalar = init; - Kokkos::Experimental::LAnd<Scalar> reducer_scalar(land_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(land_scalar,reference_land); + Kokkos::Experimental::LAnd< Scalar > reducer_scalar( land_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( land_scalar, reference_land ); + Scalar land_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(land_scalar_view,reference_land); + ASSERT_EQ( land_scalar_view, reference_land ); } { - Kokkos::View<Scalar,Kokkos::HostSpace> land_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > land_view( "View" ); land_view() = init; - Kokkos::Experimental::LAnd<Scalar> reducer_view(land_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::Experimental::LAnd< Scalar > reducer_view( land_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar land_view_scalar = land_view(); - ASSERT_EQ(land_view_scalar,reference_land); + ASSERT_EQ( land_view_scalar, reference_land ); + Scalar land_view_view = reducer_view.result_view()(); - ASSERT_EQ(land_view_view,reference_land); + ASSERT_EQ( land_view_view, reference_land ); } } - static void test_LOr(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); + static void test_LOr( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_lor = 0; - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%2); - reference_lor = reference_lor || h_values(i); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 2 ); + reference_lor = reference_lor || h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); LOrFunctor f; f.values = values; @@ -1816,34 +1964,39 @@ struct TestReducers { { Scalar lor_scalar = init; - Kokkos::Experimental::LOr<Scalar> reducer_scalar(lor_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(lor_scalar,reference_lor); + Kokkos::Experimental::LOr< Scalar > reducer_scalar( lor_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( lor_scalar, reference_lor ); + Scalar lor_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(lor_scalar_view,reference_lor); + ASSERT_EQ( lor_scalar_view, reference_lor ); } { - Kokkos::View<Scalar,Kokkos::HostSpace> lor_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > lor_view( "View" ); lor_view() = init; - Kokkos::Experimental::LOr<Scalar> reducer_view(lor_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::Experimental::LOr< Scalar > reducer_view( lor_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar lor_view_scalar = lor_view(); - ASSERT_EQ(lor_view_scalar,reference_lor); + ASSERT_EQ( lor_view_scalar, reference_lor ); + Scalar lor_view_view = reducer_view.result_view()(); - ASSERT_EQ(lor_view_view,reference_lor); + ASSERT_EQ( lor_view_view, reference_lor ); } } - static void test_LXor(int N) { - Kokkos::View<Scalar*,ExecSpace> values("Values",N); - auto h_values = Kokkos::create_mirror_view(values); + static void test_LXor( int N ) { + Kokkos::View< Scalar*, ExecSpace > values( "Values", N ); + auto h_values = Kokkos::create_mirror_view( values ); Scalar reference_lxor = 0; - for(int i=0; i<N; i++) { - h_values(i) = (Scalar)(rand()%2); - reference_lxor = reference_lxor ? (!h_values(i)) : h_values(i); + + for ( int i = 0; i < N; i++ ) { + h_values( i ) = (Scalar) ( rand() % 2 ); + reference_lxor = reference_lxor ? ( !h_values( i ) ) : h_values( i ); } - Kokkos::deep_copy(values,h_values); + Kokkos::deep_copy( values, h_values ); LXorFunctor f; f.values = values; @@ -1851,57 +2004,59 @@ struct TestReducers { { Scalar lxor_scalar = init; - Kokkos::Experimental::LXor<Scalar> reducer_scalar(lxor_scalar); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_scalar); - ASSERT_EQ(lxor_scalar,reference_lxor); + Kokkos::Experimental::LXor< Scalar > reducer_scalar( lxor_scalar ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_scalar ); + + ASSERT_EQ( lxor_scalar, reference_lxor ); + Scalar lxor_scalar_view = reducer_scalar.result_view()(); - ASSERT_EQ(lxor_scalar_view,reference_lxor); + ASSERT_EQ( lxor_scalar_view, reference_lxor ); } { - Kokkos::View<Scalar,Kokkos::HostSpace> lxor_view("View"); + Kokkos::View< Scalar, Kokkos::HostSpace > lxor_view( "View" ); lxor_view() = init; - Kokkos::Experimental::LXor<Scalar> reducer_view(lxor_view); - Kokkos::parallel_reduce(Kokkos::RangePolicy<ExecSpace>(0,N),f,reducer_view); + Kokkos::Experimental::LXor< Scalar > reducer_view( lxor_view ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, N ), f, reducer_view ); + Scalar lxor_view_scalar = lxor_view(); - ASSERT_EQ(lxor_view_scalar,reference_lxor); + ASSERT_EQ( lxor_view_scalar, reference_lxor ); + Scalar lxor_view_view = reducer_view.result_view()(); - ASSERT_EQ(lxor_view_view,reference_lxor); + ASSERT_EQ( lxor_view_view, reference_lxor ); } } static void execute_float() { - test_sum(10001); - test_prod(35); - test_min(10003); - test_minloc(10003); - test_max(10007); - test_maxloc(10007); - test_minmaxloc(10007); + test_sum( 10001 ); + test_prod( 35 ); + test_min( 10003 ); + test_minloc( 10003 ); + test_max( 10007 ); + test_maxloc( 10007 ); + test_minmaxloc( 10007 ); } static void execute_integer() { - test_sum(10001); - test_prod(35); - test_min(10003); - test_minloc(10003); - test_max(10007); - test_maxloc(10007); - test_minmaxloc(10007); - test_BAnd(35); - test_BOr(35); - test_BXor(35); - test_LAnd(35); - test_LOr(35); - test_LXor(35); + test_sum( 10001 ); + test_prod( 35 ); + test_min( 10003 ); + test_minloc( 10003 ); + test_max( 10007 ); + test_maxloc( 10007 ); + test_minmaxloc( 10007 ); + test_BAnd( 35 ); + test_BOr( 35 ); + test_BXor( 35 ); + test_LAnd( 35 ); + test_LOr( 35 ); + test_LXor( 35 ); } static void execute_basic() { - test_sum(10001); - test_prod(35); + test_sum( 10001 ); + test_prod( 35 ); } }; -} - -/*--------------------------------------------------------------------------*/ +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestScan.hpp b/lib/kokkos/core/unit_test/TestScan.hpp index 1a9811a854f85e2b7ef918ff2d1e36b268ae6c28..547e03497601a0a7da8bc3d0027ee9fef603e196 100644 --- a/lib/kokkos/core/unit_test/TestScan.hpp +++ b/lib/kokkos/core/unit_test/TestScan.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,82 +36,81 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -/*--------------------------------------------------------------------------*/ - #include <stdio.h> namespace Test { -template< class Device , class WorkSpec = size_t > +template< class Device, class WorkSpec = size_t > struct TestScan { + typedef Device execution_space; + typedef long int value_type; - typedef Device execution_space ; - typedef long int value_type ; - - Kokkos::View<int,Device,Kokkos::MemoryTraits<Kokkos::Atomic> > errors; + Kokkos::View< int, Device, Kokkos::MemoryTraits<Kokkos::Atomic> > errors; KOKKOS_INLINE_FUNCTION - void operator()( const int iwork , value_type & update , const bool final_pass ) const + void operator()( const int iwork, value_type & update, const bool final_pass ) const { - const value_type n = iwork + 1 ; - const value_type imbalance = ( (1000 <= n) && (0 == n % 1000) ) ? 1000 : 0 ; + const value_type n = iwork + 1; + const value_type imbalance = ( ( 1000 <= n ) && ( 0 == n % 1000 ) ) ? 1000 : 0; // Insert an artificial load imbalance - for ( value_type i = 0 ; i < imbalance ; ++i ) { ++update ; } + for ( value_type i = 0; i < imbalance; ++i ) { ++update; } - update += n - imbalance ; + update += n - imbalance; if ( final_pass ) { const value_type answer = n & 1 ? ( n * ( ( n + 1 ) / 2 ) ) : ( ( n / 2 ) * ( n + 1 ) ); if ( answer != update ) { errors()++; - if(errors()<20) - printf("TestScan(%d,%ld) != %ld\n",iwork,update,answer); + + if ( errors() < 20 ) { + printf( "TestScan(%d,%ld) != %ld\n", iwork, update, answer ); + } } } } KOKKOS_INLINE_FUNCTION - void init( value_type & update ) const { update = 0 ; } + void init( value_type & update ) const { update = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile value_type & update , + void join( volatile value_type & update, volatile const value_type & input ) const - { update += input ; } + { update += input; } TestScan( const WorkSpec & N ) - { - Kokkos::View<int,Device > errors_a("Errors"); - Kokkos::deep_copy(errors_a,0); - errors = errors_a; - parallel_scan( N , *this ); - } + { + Kokkos::View< int, Device > errors_a( "Errors" ); + Kokkos::deep_copy( errors_a, 0 ); + errors = errors_a; + + parallel_scan( N , *this ); + } TestScan( const WorkSpec & Start , const WorkSpec & N ) - { - typedef Kokkos::RangePolicy<execution_space> exec_policy ; + { + typedef Kokkos::RangePolicy< execution_space > exec_policy ; - Kokkos::View<int,Device > errors_a("Errors"); - Kokkos::deep_copy(errors_a,0); - errors = errors_a; + Kokkos::View< int, Device > errors_a( "Errors" ); + Kokkos::deep_copy( errors_a, 0 ); + errors = errors_a; - parallel_scan( exec_policy( Start , N ) , *this ); - } + parallel_scan( exec_policy( Start , N ) , *this ); + } - static void test_range( const WorkSpec & begin , const WorkSpec & end ) - { - for ( WorkSpec i = begin ; i < end ; ++i ) { - (void) TestScan( i ); - } + static void test_range( const WorkSpec & begin, const WorkSpec & end ) + { + for ( WorkSpec i = begin; i < end; ++i ) { + (void) TestScan( i ); } + } }; -} - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp index 291f9f60e4b8050e11b653f3f3ae975f1d1e8c91..6eca6bb38db08d562672d39b32eb22663da9f5b2 100644 --- a/lib/kokkos/core/unit_test/TestSharedAlloc.hpp +++ b/lib/kokkos/core/unit_test/TestSharedAlloc.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -54,162 +54,157 @@ namespace Test { struct SharedAllocDestroy { + volatile int * count; - volatile int * count ; - - SharedAllocDestroy() = default ; + SharedAllocDestroy() = default; SharedAllocDestroy( int * arg ) : count( arg ) {} void destroy_shared_allocation() - { - Kokkos::atomic_increment( count ); - } - + { + Kokkos::atomic_increment( count ); + } }; -template< class MemorySpace , class ExecutionSpace > +template< class MemorySpace, class ExecutionSpace > void test_shared_alloc() { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + typedef const Kokkos::Impl::SharedAllocationHeader Header; + typedef Kokkos::Impl::SharedAllocationTracker Tracker; + typedef Kokkos::Impl::SharedAllocationRecord< void, void > RecordBase; + typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace, void > RecordMemS; + typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace, SharedAllocDestroy > RecordFull; - typedef const Kokkos::Impl::SharedAllocationHeader Header ; - typedef Kokkos::Impl::SharedAllocationTracker Tracker ; - typedef Kokkos::Impl::SharedAllocationRecord< void , void > RecordBase ; - typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace , void > RecordMemS ; - typedef Kokkos::Impl::SharedAllocationRecord< MemorySpace , SharedAllocDestroy > RecordFull ; - - static_assert( sizeof(Tracker) == sizeof(int*), "SharedAllocationTracker has wrong size!" ); + static_assert( sizeof( Tracker ) == sizeof( int* ), "SharedAllocationTracker has wrong size!" ); - MemorySpace s ; + MemorySpace s; - const size_t N = 1200 ; - const size_t size = 8 ; + const size_t N = 1200; + const size_t size = 8; RecordMemS * rarray[ N ]; Header * harray[ N ]; - RecordMemS ** const r = rarray ; - Header ** const h = harray ; + RecordMemS ** const r = rarray; + Header ** const h = harray; + + Kokkos::RangePolicy< ExecutionSpace > range( 0, N ); - Kokkos::RangePolicy< ExecutionSpace > range(0,N); - - //---------------------------------------- { - // Since always executed on host space, leave [=] - Kokkos::parallel_for( range , [=]( size_t i ){ - char name[64] ; - sprintf(name,"test_%.2d",int(i)); + // Since always executed on host space, leave [=] + Kokkos::parallel_for( range, [=] ( size_t i ) { + char name[64]; + sprintf( name, "test_%.2d", int( i ) ); - r[i] = RecordMemS::allocate( s , name , size * ( i + 1 ) ); + r[i] = RecordMemS::allocate( s, name, size * ( i + 1 ) ); h[i] = Header::get_header( r[i]->data() ); - ASSERT_EQ( r[i]->use_count() , 0 ); + ASSERT_EQ( r[i]->use_count(), 0 ); - for ( size_t j = 0 ; j < ( i / 10 ) + 1 ; ++j ) RecordBase::increment( r[i] ); + for ( size_t j = 0; j < ( i / 10 ) + 1; ++j ) RecordBase::increment( r[i] ); - ASSERT_EQ( r[i]->use_count() , ( i / 10 ) + 1 ); - ASSERT_EQ( r[i] , RecordMemS::get_record( r[i]->data() ) ); + ASSERT_EQ( r[i]->use_count(), ( i / 10 ) + 1 ); + ASSERT_EQ( r[i], RecordMemS::get_record( r[i]->data() ) ); }); // Sanity check for the whole set of allocation records to which this record belongs. RecordBase::is_sane( r[0] ); - // RecordMemS::print_records( std::cout , s , true ); + // RecordMemS::print_records( std::cout, s, true ); - Kokkos::parallel_for( range , [=]( size_t i ){ - while ( 0 != ( r[i] = static_cast< RecordMemS *>( RecordBase::decrement( r[i] ) ) ) ) { + Kokkos::parallel_for( range, [=] ( size_t i ) { + while ( 0 != ( r[i] = static_cast< RecordMemS * >( RecordBase::decrement( r[i] ) ) ) ) { if ( r[i]->use_count() == 1 ) RecordBase::is_sane( r[i] ); } }); } - //---------------------------------------- + { - int destroy_count = 0 ; - SharedAllocDestroy counter( & destroy_count ); + int destroy_count = 0; + SharedAllocDestroy counter( &destroy_count ); - Kokkos::parallel_for( range , [=]( size_t i ){ - char name[64] ; - sprintf(name,"test_%.2d",int(i)); + Kokkos::parallel_for( range, [=] ( size_t i ) { + char name[64]; + sprintf( name, "test_%.2d", int( i ) ); - RecordFull * rec = RecordFull::allocate( s , name , size * ( i + 1 ) ); + RecordFull * rec = RecordFull::allocate( s, name, size * ( i + 1 ) ); - rec->m_destroy = counter ; + rec->m_destroy = counter; - r[i] = rec ; + r[i] = rec; h[i] = Header::get_header( r[i]->data() ); - ASSERT_EQ( r[i]->use_count() , 0 ); + ASSERT_EQ( r[i]->use_count(), 0 ); - for ( size_t j = 0 ; j < ( i / 10 ) + 1 ; ++j ) RecordBase::increment( r[i] ); + for ( size_t j = 0; j < ( i / 10 ) + 1; ++j ) RecordBase::increment( r[i] ); - ASSERT_EQ( r[i]->use_count() , ( i / 10 ) + 1 ); - ASSERT_EQ( r[i] , RecordMemS::get_record( r[i]->data() ) ); + ASSERT_EQ( r[i]->use_count(), ( i / 10 ) + 1 ); + ASSERT_EQ( r[i], RecordMemS::get_record( r[i]->data() ) ); }); RecordBase::is_sane( r[0] ); - Kokkos::parallel_for( range , [=]( size_t i ){ - while ( 0 != ( r[i] = static_cast< RecordMemS *>( RecordBase::decrement( r[i] ) ) ) ) { + Kokkos::parallel_for( range, [=] ( size_t i ) { + while ( 0 != ( r[i] = static_cast< RecordMemS * >( RecordBase::decrement( r[i] ) ) ) ) { if ( r[i]->use_count() == 1 ) RecordBase::is_sane( r[i] ); } }); - ASSERT_EQ( destroy_count , int(N) ); + ASSERT_EQ( destroy_count, int( N ) ); } - //---------------------------------------- { - int destroy_count = 0 ; + int destroy_count = 0; { - RecordFull * rec = RecordFull::allocate( s , "test" , size ); + RecordFull * rec = RecordFull::allocate( s, "test", size ); - // ... Construction of the allocated { rec->data() , rec->size() } + // ... Construction of the allocated { rec->data(), rec->size() } - // Copy destruction function object into the allocation record + // Copy destruction function object into the allocation record. rec->m_destroy = SharedAllocDestroy( & destroy_count ); - ASSERT_EQ( rec->use_count() , 0 ); + ASSERT_EQ( rec->use_count(), 0 ); - // Start tracking, increments the use count from 0 to 1 - Tracker track ; + // Start tracking, increments the use count from 0 to 1. + Tracker track; track.assign_allocated_record_to_uninitialized( rec ); - ASSERT_EQ( rec->use_count() , 1 ); - ASSERT_EQ( track.use_count() , 1 ); + ASSERT_EQ( rec->use_count(), 1 ); + ASSERT_EQ( track.use_count(), 1 ); + + // Verify construction / destruction increment. + for ( size_t i = 0; i < N; ++i ) { + ASSERT_EQ( rec->use_count(), 1 ); - // Verify construction / destruction increment - for ( size_t i = 0 ; i < N ; ++i ) { - ASSERT_EQ( rec->use_count() , 1 ); { - Tracker local_tracker ; + Tracker local_tracker; local_tracker.assign_allocated_record_to_uninitialized( rec ); - ASSERT_EQ( rec->use_count() , 2 ); - ASSERT_EQ( local_tracker.use_count() , 2 ); + ASSERT_EQ( rec->use_count(), 2 ); + ASSERT_EQ( local_tracker.use_count(), 2 ); } - ASSERT_EQ( rec->use_count() , 1 ); - ASSERT_EQ( track.use_count() , 1 ); + + ASSERT_EQ( rec->use_count(), 1 ); + ASSERT_EQ( track.use_count(), 1 ); } - Kokkos::parallel_for( range , [=]( size_t i ){ - Tracker local_tracker ; + Kokkos::parallel_for( range, [=] ( size_t i ) { + Tracker local_tracker; local_tracker.assign_allocated_record_to_uninitialized( rec ); - ASSERT_GT( rec->use_count() , 1 ); + ASSERT_GT( rec->use_count(), 1 ); }); - ASSERT_EQ( rec->use_count() , 1 ); - ASSERT_EQ( track.use_count() , 1 ); + ASSERT_EQ( rec->use_count(), 1 ); + ASSERT_EQ( track.use_count(), 1 ); // Destruction of 'track' object deallocates the 'rec' and invokes the destroy function object. } - ASSERT_EQ( destroy_count , 1 ); + ASSERT_EQ( destroy_count, 1 ); } #endif /* #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) */ } - -} - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestSynchronic.cpp b/lib/kokkos/core/unit_test/TestSynchronic.cpp deleted file mode 100644 index dc1abbd8b3d6a0532408956a5a7bffff1ec2f3f6..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestSynchronic.cpp +++ /dev/null @@ -1,449 +0,0 @@ -/* - -Copyright (c) 2014, NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -//#undef _WIN32_WINNT -//#define _WIN32_WINNT 0x0602 - -#if defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || \ - defined(__APPLE__) || defined(__ARM_ARCH_8A) || defined(_CRAYC) - -// Skip for now - -#else - -#include <gtest/gtest.h> - -#ifdef USEOMP -#include <omp.h> -#endif - -#include <iostream> -#include <sstream> -#include <algorithm> -#include <string> -#include <vector> -#include <map> -#include <cstring> -#include <ctime> - -//#include <details/config> -//#undef __SYNCHRONIC_COMPATIBLE - -#include <impl/Kokkos_Synchronic.hpp> -#include <impl/Kokkos_Synchronic_n3998.hpp> - -#include "TestSynchronic.hpp" - -// Uncomment to allow test to dump output -//#define VERBOSE_TEST - -namespace Test { - -unsigned next_table[] = - { - 0, 1, 2, 3, //0-3 - 4, 4, 6, 6, //4-7 - 8, 8, 8, 8, //8-11 - 12, 12, 12, 12, //12-15 - 16, 16, 16, 16, //16-19 - 16, 16, 16, 16, //20-23 - 24, 24, 24, 24, //24-27 - 24, 24, 24, 24, //28-31 - 32, 32, 32, 32, //32-35 - 32, 32, 32, 32, //36-39 - 40, 40, 40, 40, //40-43 - 40, 40, 40, 40, //44-47 - 48, 48, 48, 48, //48-51 - 48, 48, 48, 48, //52-55 - 56, 56, 56, 56, //56-59 - 56, 56, 56, 56, //60-63 - }; - -//change this if you want to allow oversubscription of the system, by default only the range {1-(system size)} is tested -#define FOR_GAUNTLET(x) for(unsigned x = (std::min)(std::thread::hardware_concurrency()*8,unsigned(sizeof(next_table)/sizeof(unsigned))); x; x = next_table[x-1]) - -//set this to override the benchmark of barriers to use OMP barriers instead of n3998 std::barrier -//#define USEOMP - -#if defined(__SYNCHRONIC_COMPATIBLE) - #define PREFIX "futex-" -#else - #define PREFIX "backoff-" -#endif - -//this test uses a custom Mersenne twister to eliminate implementation variation -MersenneTwister mt; - -int dummya = 1, dummyb =1; - -int dummy1 = 1; -std::atomic<int> dummy2(1); -std::atomic<int> dummy3(1); - -double time_item(int const count = (int)1E8) { - - clock_t const start = clock(); - - for(int i = 0;i < count; ++i) - mt.integer(); - - clock_t const end = clock(); - double elapsed_seconds = (end - start) / double(CLOCKS_PER_SEC); - - return elapsed_seconds / count; -} -double time_nil(int const count = (int)1E08) { - - clock_t const start = clock(); - - dummy3 = count; - for(int i = 0;i < (int)1E6; ++i) { - if(dummy1) { - // Do some work while holding the lock - int workunits = dummy3;//(int) (mtc.poissonInterval((float)num_items_critical) + 0.5f); - for (int j = 1; j < workunits; j++) - dummy1 &= j; // Do one work unit - dummy2.fetch_add(dummy1,std::memory_order_relaxed); - } - } - - clock_t const end = clock(); - double elapsed_seconds = (end - start) / double(CLOCKS_PER_SEC); - - return elapsed_seconds / count; -} - - -template <class mutex_type> -void testmutex_inner(mutex_type& m, std::atomic<int>& t,std::atomic<int>& wc,std::atomic<int>& wnc, int const num_iterations, - int const num_items_critical, int const num_items_noncritical, MersenneTwister& mtc, MersenneTwister& mtnc, bool skip) { - - for(int k = 0; k < num_iterations; ++k) { - - if(num_items_noncritical) { - // Do some work without holding the lock - int workunits = num_items_noncritical;//(int) (mtnc.poissonInterval((float)num_items_noncritical) + 0.5f); - for (int i = 1; i < workunits; i++) - mtnc.integer(); // Do one work unit - wnc.fetch_add(workunits,std::memory_order_relaxed); - } - - t.fetch_add(1,std::memory_order_relaxed); - - if(!skip) { - std::unique_lock<mutex_type> l(m); - if(num_items_critical) { - // Do some work while holding the lock - int workunits = num_items_critical;//(int) (mtc.poissonInterval((float)num_items_critical) + 0.5f); - for (int i = 1; i < workunits; i++) - mtc.integer(); // Do one work unit - wc.fetch_add(workunits,std::memory_order_relaxed); - } - } - } -} -template <class mutex_type> -void testmutex_outer(std::map<std::string,std::vector<double>>& results, std::string const& name, double critical_fraction, double critical_duration) { - - std::ostringstream truename; - truename << name << " (f=" << critical_fraction << ",d=" << critical_duration << ")"; - - std::vector<double>& data = results[truename.str()]; - - double const workItemTime = time_item() , - nilTime = time_nil(); - - int const num_items_critical = (critical_duration <= 0 ? 0 : (std::max)( int(critical_duration / workItemTime + 0.5), int(100 * nilTime / workItemTime + 0.5))), - num_items_noncritical = (num_items_critical <= 0 ? 0 : int( ( 1 - critical_fraction ) * num_items_critical / critical_fraction + 0.5 )); - - FOR_GAUNTLET(num_threads) { - - //Kokkos::Impl::portable_sleep(std::chrono::microseconds(2000000)); - - int const num_iterations = (num_items_critical + num_items_noncritical != 0) ? -#ifdef __SYNCHRONIC_JUST_YIELD - int( 1 / ( 8 * workItemTime ) / (num_items_critical + num_items_noncritical) / num_threads + 0.5 ) : -#else - int( 1 / ( 8 * workItemTime ) / (num_items_critical + num_items_noncritical) / num_threads + 0.5 ) : -#endif -#ifdef WIN32 - int( 1 / workItemTime / (20 * num_threads * num_threads) ); -#else - int( 1 / workItemTime / (200 * num_threads * num_threads) ); -#endif - -#ifdef VERBOSE_TEST - std::cerr << "running " << truename.str() << " #" << num_threads << ", " << num_iterations << " * " << num_items_noncritical << "\n" << std::flush; -#endif - - - std::atomic<int> t[2], wc[2], wnc[2]; - - clock_t start[2], end[2]; - for(int pass = 0; pass < 2; ++pass) { - - t[pass] = 0; - wc[pass] = 0; - wnc[pass] = 0; - - srand(num_threads); - std::vector<MersenneTwister> randomsnc(num_threads), - randomsc(num_threads); - - mutex_type m; - - start[pass] = clock(); -#ifdef USEOMP - omp_set_num_threads(num_threads); - std::atomic<int> _j(0); - #pragma omp parallel - { - int const j = _j.fetch_add(1,std::memory_order_relaxed); - testmutex_inner(m, t[pass], wc[pass], wnc[pass], num_iterations, num_items_critical, num_items_noncritical, randomsc[j], randomsnc[j], pass==0); - num_threads = omp_get_num_threads(); - } -#else - std::vector<std::thread*> threads(num_threads); - for(unsigned j = 0; j < num_threads; ++j) - threads[j] = new std::thread([&,j](){ - testmutex_inner(m, t[pass], wc[pass], wnc[pass], num_iterations, num_items_critical, num_items_noncritical, randomsc[j], randomsnc[j], pass==0); - } - ); - for(unsigned j = 0; j < num_threads; ++j) { - threads[j]->join(); - delete threads[j]; - } -#endif - end[pass] = clock(); - } - if(t[0] != t[1]) throw std::string("mismatched iteration counts"); - if(wnc[0] != wnc[1]) throw std::string("mismatched work item counts"); - - double elapsed_seconds_0 = (end[0] - start[0]) / double(CLOCKS_PER_SEC), - elapsed_seconds_1 = (end[1] - start[1]) / double(CLOCKS_PER_SEC); - double time = (elapsed_seconds_1 - elapsed_seconds_0 - wc[1]*workItemTime) / num_iterations; - - data.push_back(time); -#ifdef VERBOSE_TEST - std::cerr << truename.str() << " : " << num_threads << "," << elapsed_seconds_1 / num_iterations << " - " << elapsed_seconds_0 / num_iterations << " - " << wc[1]*workItemTime/num_iterations << " = " << time << " \n"; -#endif - } -} - -template <class barrier_type> -void testbarrier_inner(barrier_type& b, int const num_threads, int const j, std::atomic<int>& t,std::atomic<int>& w, - int const num_iterations_odd, int const num_iterations_even, - int const num_items_noncritical, MersenneTwister& arg_mt, bool skip) { - - for(int k = 0; k < (std::max)(num_iterations_even,num_iterations_odd); ++k) { - - if(k >= (~j & 0x1 ? num_iterations_odd : num_iterations_even )) { - if(!skip) - b.arrive_and_drop(); - break; - } - - if(num_items_noncritical) { - // Do some work without holding the lock - int workunits = (int) (arg_mt.poissonInterval((float)num_items_noncritical) + 0.5f); - for (int i = 1; i < workunits; i++) - arg_mt.integer(); // Do one work unit - w.fetch_add(workunits,std::memory_order_relaxed); - } - - t.fetch_add(1,std::memory_order_relaxed); - - if(!skip) { - int const thiscount = (std::min)(k+1,num_iterations_odd)*((num_threads>>1)+(num_threads&1)) + (std::min)(k+1,num_iterations_even)*(num_threads>>1); - if(t.load(std::memory_order_relaxed) > thiscount) { - std::cerr << "FAILURE: some threads have run ahead of the barrier (" << t.load(std::memory_order_relaxed) << ">" << thiscount << ").\n"; - EXPECT_TRUE(false); - } -#ifdef USEOMP - #pragma omp barrier -#else - b.arrive_and_wait(); -#endif - if(t.load(std::memory_order_relaxed) < thiscount) { - std::cerr << "FAILURE: some threads have fallen behind the barrier (" << t.load(std::memory_order_relaxed) << "<" << thiscount << ").\n"; - EXPECT_TRUE(false); - } - } - } -} -template <class barrier_type> -void testbarrier_outer(std::map<std::string,std::vector<double>>& results, std::string const& name, double barrier_frequency, double phase_duration, bool randomIterations = false) { - - std::vector<double>& data = results[name]; - - double const workItemTime = time_item(); - int const num_items_noncritical = int( phase_duration / workItemTime + 0.5 ); - - FOR_GAUNTLET(num_threads) { - - int const num_iterations = int( barrier_frequency ); -#ifdef VERBOSE_TEST - std::cerr << "running " << name << " #" << num_threads << ", " << num_iterations << " * " << num_items_noncritical << "\r" << std::flush; -#endif - - srand(num_threads); - - MersenneTwister local_mt; - int const num_iterations_odd = randomIterations ? int(local_mt.poissonInterval((float)num_iterations)+0.5f) : num_iterations, - num_iterations_even = randomIterations ? int(local_mt.poissonInterval((float)num_iterations)+0.5f) : num_iterations; - - std::atomic<int> t[2], w[2]; - std::chrono::time_point<std::chrono::high_resolution_clock> start[2], end[2]; - for(int pass = 0; pass < 2; ++pass) { - - t[pass] = 0; - w[pass] = 0; - - srand(num_threads); - std::vector<MersenneTwister> randoms(num_threads); - - barrier_type b(num_threads); - - start[pass] = std::chrono::high_resolution_clock::now(); -#ifdef USEOMP - omp_set_num_threads(num_threads); - std::atomic<int> _j(0); - #pragma omp parallel - { - int const j = _j.fetch_add(1,std::memory_order_relaxed); - testbarrier_inner(b, num_threads, j, t[pass], w[pass], num_iterations_odd, num_iterations_even, num_items_noncritical, randoms[j], pass==0); - num_threads = omp_get_num_threads(); - } -#else - std::vector<std::thread*> threads(num_threads); - for(unsigned j = 0; j < num_threads; ++j) - threads[j] = new std::thread([&,j](){ - testbarrier_inner(b, num_threads, j, t[pass], w[pass], num_iterations_odd, num_iterations_even, num_items_noncritical, randoms[j], pass==0); - }); - for(unsigned j = 0; j < num_threads; ++j) { - threads[j]->join(); - delete threads[j]; - } -#endif - end[pass] = std::chrono::high_resolution_clock::now(); - } - - if(t[0] != t[1]) throw std::string("mismatched iteration counts"); - if(w[0] != w[1]) throw std::string("mismatched work item counts"); - - int const phases = (std::max)(num_iterations_odd, num_iterations_even); - - std::chrono::duration<double> elapsed_seconds_0 = end[0]-start[0], - elapsed_seconds_1 = end[1]-start[1]; - double const time = (elapsed_seconds_1.count() - elapsed_seconds_0.count()) / phases; - - data.push_back(time); -#ifdef VERBOSE_TEST - std::cerr << name << " : " << num_threads << "," << elapsed_seconds_1.count() / phases << " - " << elapsed_seconds_0.count() / phases << " = " << time << " \n"; -#endif - } -} - -template <class... T> -struct mutex_tester; -template <class F> -struct mutex_tester<F> { - static void run(std::map<std::string,std::vector<double>>& results, std::string const name[], double critical_fraction, double critical_duration) { - testmutex_outer<F>(results, *name, critical_fraction, critical_duration); - } -}; -template <class F, class... T> -struct mutex_tester<F,T...> { - static void run(std::map<std::string,std::vector<double>>& results, std::string const name[], double critical_fraction, double critical_duration) { - mutex_tester<F>::run(results, name, critical_fraction, critical_duration); - mutex_tester<T...>::run(results, ++name, critical_fraction, critical_duration); - } -}; - -TEST( synchronic, main ) -{ - //warm up - time_item(); - - //measure up -#ifdef VERBOSE_TEST - std::cerr << "measuring work item speed...\r"; - std::cerr << "work item speed is " << time_item() << " per item, nil is " << time_nil() << "\n"; -#endif - try { - - std::pair<double,double> testpoints[] = { {1, 0}, /*{1E-1, 10E-3}, {5E-1, 2E-6}, {3E-1, 50E-9},*/ }; - for(auto x : testpoints ) { - - std::map<std::string,std::vector<double>> results; - - //testbarrier_outer<std::barrier>(results, PREFIX"bar 1khz 100us", 1E3, x.second); - - std::string const names[] = { - PREFIX"tkt", PREFIX"mcs", PREFIX"ttas", PREFIX"std" -#ifdef WIN32 - ,PREFIX"srw" -#endif - }; - - //run --> - - mutex_tester< - ticket_mutex, mcs_mutex, ttas_mutex, std::mutex -#ifdef WIN32 - ,srw_mutex -#endif - >::run(results, names, x.first, x.second); - - //<-- run - -#ifdef VERBOSE_TEST - std::cout << "threads"; - for(auto & i : results) - std::cout << ",\"" << i.first << '\"'; - std::cout << std::endl; - int j = 0; - FOR_GAUNTLET(num_threads) { - std::cout << num_threads; - for(auto & i : results) - std::cout << ',' << i.second[j]; - std::cout << std::endl; - ++j; - } -#endif - } - } - catch(std::string & e) { - std::cerr << "EXCEPTION : " << e << std::endl; - EXPECT_TRUE( false ); - } -} - -} // namespace Test - -#endif diff --git a/lib/kokkos/core/unit_test/TestSynchronic.hpp b/lib/kokkos/core/unit_test/TestSynchronic.hpp deleted file mode 100644 index f4341b97815b8d70956dfb85cf0d41a4f07bab4d..0000000000000000000000000000000000000000 --- a/lib/kokkos/core/unit_test/TestSynchronic.hpp +++ /dev/null @@ -1,241 +0,0 @@ -/* - -Copyright (c) 2014, NVIDIA Corporation -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this -list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, -BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef TEST_SYNCHRONIC_HPP -#define TEST_SYNCHRONIC_HPP - -#include <impl/Kokkos_Synchronic.hpp> -#include <mutex> -#include <cmath> - -namespace Test { - -template <bool truly> -struct dumb_mutex { - - dumb_mutex () : locked(0) { - } - - void lock() { - while(1) { - bool state = false; - if (locked.compare_exchange_weak(state,true,std::memory_order_acquire)) { - break; - } - while (locked.load(std::memory_order_relaxed)) { - if (!truly) { - Kokkos::Impl::portable_yield(); - } - } - } - } - - void unlock() { - locked.store(false,std::memory_order_release); - } - -private : - std::atomic<bool> locked; -}; - -#ifdef WIN32 -#include <winsock2.h> -#include <windows.h> -#include <synchapi.h> -struct srw_mutex { - - srw_mutex () { - InitializeSRWLock(&_lock); - } - - void lock() { - AcquireSRWLockExclusive(&_lock); - } - void unlock() { - ReleaseSRWLockExclusive(&_lock); - } - -private : - SRWLOCK _lock; -}; -#endif - -struct ttas_mutex { - - ttas_mutex() : locked(false) { - } - - ttas_mutex(const ttas_mutex&) = delete; - ttas_mutex& operator=(const ttas_mutex&) = delete; - - void lock() { - for(int i = 0;; ++i) { - bool state = false; - if(locked.compare_exchange_weak(state,true,std::memory_order_relaxed,Kokkos::Impl::notify_none)) - break; - locked.expect_update(true); - } - std::atomic_thread_fence(std::memory_order_acquire); - } - void unlock() { - locked.store(false,std::memory_order_release); - } - -private : - Kokkos::Impl::synchronic<bool> locked; -}; - -struct ticket_mutex { - - ticket_mutex() : active(0), queue(0) { - } - - ticket_mutex(const ticket_mutex&) = delete; - ticket_mutex& operator=(const ticket_mutex&) = delete; - - void lock() { - int const me = queue.fetch_add(1, std::memory_order_relaxed); - while(me != active.load_when_equal(me, std::memory_order_acquire)) - ; - } - - void unlock() { - active.fetch_add(1,std::memory_order_release); - } -private : - Kokkos::Impl::synchronic<int> active; - std::atomic<int> queue; -}; - -struct mcs_mutex { - - mcs_mutex() : head(nullptr) { - } - - mcs_mutex(const mcs_mutex&) = delete; - mcs_mutex& operator=(const mcs_mutex&) = delete; - - struct unique_lock { - - unique_lock(mcs_mutex & arg_m) : m(arg_m), next(nullptr), ready(false) { - - unique_lock * const h = m.head.exchange(this,std::memory_order_acquire); - if(__builtin_expect(h != nullptr,0)) { - h->next.store(this,std::memory_order_seq_cst,Kokkos::Impl::notify_one); - while(!ready.load_when_not_equal(false,std::memory_order_acquire)) - ; - } - } - - unique_lock(const unique_lock&) = delete; - unique_lock& operator=(const unique_lock&) = delete; - - ~unique_lock() { - unique_lock * h = this; - if(__builtin_expect(!m.head.compare_exchange_strong(h,nullptr,std::memory_order_release, std::memory_order_relaxed),0)) { - unique_lock * n = next.load(std::memory_order_relaxed); - while(!n) - n = next.load_when_not_equal(n,std::memory_order_relaxed); - n->ready.store(true,std::memory_order_release,Kokkos::Impl::notify_one); - } - } - - private: - mcs_mutex & m; - Kokkos::Impl::synchronic<unique_lock*> next; - Kokkos::Impl::synchronic<bool> ready; - }; - -private : - std::atomic<unique_lock*> head; -}; - -} - -namespace std { -template<> -struct unique_lock<Test::mcs_mutex> : Test::mcs_mutex::unique_lock { - unique_lock(Test::mcs_mutex & arg_m) : Test::mcs_mutex::unique_lock(arg_m) { - } - unique_lock(const unique_lock&) = delete; - unique_lock& operator=(const unique_lock&) = delete; -}; - -} - -/* #include <cmath> */ -#include <stdlib.h> - -namespace Test { - -//------------------------------------- -// MersenneTwister -//------------------------------------- -#define MT_IA 397 -#define MT_LEN 624 - -class MersenneTwister -{ - volatile unsigned long m_buffer[MT_LEN][64/sizeof(unsigned long)]; - volatile int m_index; - -public: - MersenneTwister() { - for (int i = 0; i < MT_LEN; i++) - m_buffer[i][0] = rand(); - m_index = 0; - for (int i = 0; i < MT_LEN * 100; i++) - integer(); - } - unsigned long integer() { - // Indices - int i = m_index; - int i2 = m_index + 1; if (i2 >= MT_LEN) i2 = 0; // wrap-around - int j = m_index + MT_IA; if (j >= MT_LEN) j -= MT_LEN; // wrap-around - - // Twist - unsigned long s = (m_buffer[i][0] & 0x80000000) | (m_buffer[i2][0] & 0x7fffffff); - unsigned long r = m_buffer[j][0] ^ (s >> 1) ^ ((s & 1) * 0x9908B0DF); - m_buffer[m_index][0] = r; - m_index = i2; - - // Swizzle - r ^= (r >> 11); - r ^= (r << 7) & 0x9d2c5680UL; - r ^= (r << 15) & 0xefc60000UL; - r ^= (r >> 18); - return r; - } - float poissonInterval(float ooLambda) { - return -logf(1.0f - integer() * 2.3283e-10f) * ooLambda; - } -}; - -} // namespace Test - -#endif //TEST_HPP diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp index 1134553980f8a63351f85a86b33537a35d52644c..57e47d4baa0d177dca9379cf43a05742af2519d1 100644 --- a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp +++ b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,12 +36,11 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ - #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP #define KOKKOS_UNITTEST_TASKSCHEDULER_HPP @@ -51,9 +50,6 @@ #if defined( KOKKOS_ENABLE_TASKDAG ) -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - namespace TestTaskScheduler { namespace { @@ -61,14 +57,14 @@ namespace { inline long eval_fib( long n ) { - constexpr long mask = 0x03 ; + constexpr long mask = 0x03; - long fib[4] = { 0 , 1 , 1 , 2 }; + long fib[4] = { 0, 1, 1, 2 }; - for ( long i = 2 ; i <= n ; ++i ) { + for ( long i = 2; i <= n; ++i ) { fib[ i & mask ] = fib[ ( i - 1 ) & mask ] + fib[ ( i - 2 ) & mask ]; } - + return fib[ n & mask ]; } @@ -77,100 +73,93 @@ long eval_fib( long n ) template< typename Space > struct TestFib { - typedef Kokkos::TaskScheduler<Space> policy_type ; - typedef Kokkos::Future<long,Space> future_type ; - typedef long value_type ; + typedef Kokkos::TaskScheduler< Space > sched_type; + typedef Kokkos::Future< long, Space > future_type; + typedef long value_type; - policy_type policy ; - future_type fib_m1 ; - future_type fib_m2 ; - const value_type n ; + sched_type sched; + future_type fib_m1; + future_type fib_m2; + const value_type n; KOKKOS_INLINE_FUNCTION - TestFib( const policy_type & arg_policy , const value_type arg_n ) - : policy(arg_policy) - , fib_m1() , fib_m2() - , n( arg_n ) - {} + TestFib( const sched_type & arg_sched, const value_type arg_n ) + : sched( arg_sched ), fib_m1(), fib_m2(), n( arg_n ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type & , value_type & result ) - { + void operator()( typename sched_type::member_type &, value_type & result ) + { #if 0 - printf( "\nTestFib(%ld) %d %d\n" - , n - , int( ! fib_m1.is_null() ) - , int( ! fib_m2.is_null() ) - ); + printf( "\nTestFib(%ld) %d %d\n", n, int( !fib_m1.is_null() ), int( !fib_m2.is_null() ) ); #endif - if ( n < 2 ) { - result = n ; - } - else if ( ! fib_m2.is_null() && ! fib_m1.is_null() ) { - result = fib_m1.get() + fib_m2.get(); - } - else { - - // Spawn new children and respawn myself to sum their results: - // Spawn lower value at higher priority as it has a shorter - // path to completion. - - fib_m2 = policy.task_spawn( TestFib(policy,n-2) - , Kokkos::TaskSingle - , Kokkos::TaskHighPriority ); + if ( n < 2 ) { + result = n; + } + else if ( !fib_m2.is_null() && !fib_m1.is_null() ) { + result = fib_m1.get() + fib_m2.get(); + } + else { + // Spawn new children and respawn myself to sum their results. + // Spawn lower value at higher priority as it has a shorter + // path to completion. - fib_m1 = policy.task_spawn( TestFib(policy,n-1) - , Kokkos::TaskSingle ); + fib_m2 = Kokkos::task_spawn( Kokkos::TaskSingle( sched, Kokkos::TaskPriority::High ) + , TestFib( sched, n - 2 ) ); - Kokkos::Future<Space> dep[] = { fib_m1 , fib_m2 }; + fib_m1 = Kokkos::task_spawn( Kokkos::TaskSingle( sched ) + , TestFib( sched, n - 1 ) ); - Kokkos::Future<Space> fib_all = policy.when_all( 2 , dep ); + Kokkos::Future< Space > dep[] = { fib_m1, fib_m2 }; + Kokkos::Future< Space > fib_all = Kokkos::when_all( dep, 2 ); - if ( ! fib_m2.is_null() && ! fib_m1.is_null() && ! fib_all.is_null() ) { - // High priority to retire this branch - policy.respawn( this , Kokkos::TaskHighPriority , fib_all ); - } - else { + if ( !fib_m2.is_null() && !fib_m1.is_null() && !fib_all.is_null() ) { + // High priority to retire this branch. + Kokkos::respawn( this, fib_all, Kokkos::TaskPriority::High ); + } + else { #if 1 - printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" - , n - , policy.allocation_capacity() - , policy.allocated_task_count_max() - , policy.allocated_task_count_accum() - ); + printf( "TestFib(%ld) insufficient memory alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" + , n + , sched.allocation_capacity() + , sched.allocated_task_count_max() + , sched.allocated_task_count_accum() + ); #endif - Kokkos::abort("TestFib insufficient memory"); - } + Kokkos::abort( "TestFib insufficient memory" ); + } } + } - static void run( int i , size_t MemoryCapacity = 16000 ) - { - typedef typename policy_type::memory_space memory_space ; + static void run( int i, size_t MemoryCapacity = 16000 ) + { + typedef typename sched_type::memory_space memory_space; - enum { Log2_SuperBlockSize = 12 }; + enum { Log2_SuperBlockSize = 12 }; - policy_type root_policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize ); + sched_type root_sched( memory_space(), MemoryCapacity, Log2_SuperBlockSize ); - future_type f = root_policy.host_spawn( TestFib(root_policy,i) , Kokkos::TaskSingle ); - Kokkos::wait( root_policy ); - ASSERT_EQ( eval_fib(i) , f.get() ); + future_type f = Kokkos::host_spawn( Kokkos::TaskSingle( root_sched ) + , TestFib( root_sched, i ) ); + + Kokkos::wait( root_sched ); + + ASSERT_EQ( eval_fib( i ), f.get() ); #if 0 - fprintf( stdout , "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" - , i - , int(root_policy.template spawn_allocation_size<TestFib>()) - , int(root_policy.when_all_allocation_size(2)) - , root_policy.allocation_capacity() - , root_policy.allocated_task_count_max() - , root_policy.allocated_task_count_accum() - ); - fflush( stdout ); + fprintf( stdout, "\nTestFib::run(%d) spawn_size(%d) when_all_size(%d) alloc_capacity(%d) task_max(%d) task_accum(%ld)\n" + , i + , int(root_sched.template spawn_allocation_size<TestFib>()) + , int(root_sched.when_all_allocation_size(2)) + , root_sched.allocation_capacity() + , root_sched.allocated_task_count_max() + , root_sched.allocated_task_count_accum() + ); + fflush( stdout ); #endif - } - + } }; } // namespace TestTaskScheduler @@ -181,73 +170,71 @@ namespace TestTaskScheduler { template< class Space > struct TestTaskDependence { + typedef Kokkos::TaskScheduler< Space > sched_type; + typedef Kokkos::Future< Space > future_type; + typedef Kokkos::View< long, Space > accum_type; + typedef void value_type; - typedef Kokkos::TaskScheduler<Space> policy_type ; - typedef Kokkos::Future<Space> future_type ; - typedef Kokkos::View<long,Space> accum_type ; - typedef void value_type ; - - policy_type m_policy ; - accum_type m_accum ; - long m_count ; + sched_type m_sched; + accum_type m_accum; + long m_count; KOKKOS_INLINE_FUNCTION TestTaskDependence( long n - , const policy_type & arg_policy - , const accum_type & arg_accum ) - : m_policy( arg_policy ) + , const sched_type & arg_sched + , const accum_type & arg_accum ) + : m_sched( arg_sched ) , m_accum( arg_accum ) - , m_count( n ) - {} + , m_count( n ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type & ) - { - enum { CHUNK = 8 }; - const int n = CHUNK < m_count ? CHUNK : m_count ; + void operator()( typename sched_type::member_type & ) + { + enum { CHUNK = 8 }; + const int n = CHUNK < m_count ? CHUNK : m_count; - if ( 1 < m_count ) { - future_type f[ CHUNK ] ; + if ( 1 < m_count ) { + future_type f[ CHUNK ]; - const int inc = ( m_count + n - 1 ) / n ; + const int inc = ( m_count + n - 1 ) / n; - for ( int i = 0 ; i < n ; ++i ) { - long begin = i * inc ; - long count = begin + inc < m_count ? inc : m_count - begin ; - f[i] = m_policy.task_spawn( TestTaskDependence(count,m_policy,m_accum) , Kokkos::TaskSingle ); - } + for ( int i = 0; i < n; ++i ) { + long begin = i * inc; + long count = begin + inc < m_count ? inc : m_count - begin; + f[i] = Kokkos::task_spawn( Kokkos::TaskSingle( m_sched ) + , TestTaskDependence( count, m_sched, m_accum ) ); + } - m_count = 0 ; + m_count = 0; - m_policy.respawn( this , m_policy.when_all( n , f ) ); - } - else if ( 1 == m_count ) { - Kokkos::atomic_increment( & m_accum() ); - } + Kokkos::respawn( this, Kokkos::when_all( f, n ) ); + } + else if ( 1 == m_count ) { + Kokkos::atomic_increment( & m_accum() ); } + } static void run( int n ) - { - typedef typename policy_type::memory_space memory_space ; + { + typedef typename sched_type::memory_space memory_space; - // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool - enum { MemoryCapacity = 16000 }; - enum { Log2_SuperBlockSize = 12 }; - policy_type policy( memory_space() , MemoryCapacity , Log2_SuperBlockSize ); + // enum { MemoryCapacity = 4000 }; // Triggers infinite loop in memory pool. + enum { MemoryCapacity = 16000 }; + enum { Log2_SuperBlockSize = 12 }; + sched_type sched( memory_space(), MemoryCapacity, Log2_SuperBlockSize ); - accum_type accum("accum"); + accum_type accum( "accum" ); - typename accum_type::HostMirror host_accum = - Kokkos::create_mirror_view( accum ); + typename accum_type::HostMirror host_accum = Kokkos::create_mirror_view( accum ); - policy.host_spawn( TestTaskDependence(n,policy,accum) , Kokkos::TaskSingle ); + Kokkos::host_spawn( Kokkos::TaskSingle( sched ), TestTaskDependence( n, sched, accum ) ); - Kokkos::wait( policy ); + Kokkos::wait( sched ); - Kokkos::deep_copy( host_accum , accum ); + Kokkos::deep_copy( host_accum, accum ); - ASSERT_EQ( host_accum() , n ); - } + ASSERT_EQ( host_accum(), n ); + } }; } // namespace TestTaskScheduler @@ -258,294 +245,317 @@ namespace TestTaskScheduler { template< class ExecSpace > struct TestTaskTeam { - //enum { SPAN = 8 }; enum { SPAN = 33 }; //enum { SPAN = 1 }; - typedef void value_type ; - typedef Kokkos::TaskScheduler<ExecSpace> policy_type ; - typedef Kokkos::Future<ExecSpace> future_type ; - typedef Kokkos::View<long*,ExecSpace> view_type ; + typedef void value_type; + typedef Kokkos::TaskScheduler< ExecSpace > sched_type; + typedef Kokkos::Future< ExecSpace > future_type; + typedef Kokkos::View< long*, ExecSpace > view_type; - policy_type policy ; - future_type future ; + sched_type sched; + future_type future; - view_type parfor_result ; - view_type parreduce_check ; - view_type parscan_result ; - view_type parscan_check ; - const long nvalue ; + view_type parfor_result; + view_type parreduce_check; + view_type parscan_result; + view_type parscan_check; + const long nvalue; KOKKOS_INLINE_FUNCTION - TestTaskTeam( const policy_type & arg_policy - , const view_type & arg_parfor_result - , const view_type & arg_parreduce_check - , const view_type & arg_parscan_result - , const view_type & arg_parscan_check - , const long arg_nvalue ) - : policy(arg_policy) + TestTaskTeam( const sched_type & arg_sched + , const view_type & arg_parfor_result + , const view_type & arg_parreduce_check + , const view_type & arg_parscan_result + , const view_type & arg_parscan_check + , const long arg_nvalue ) + : sched( arg_sched ) , future() , parfor_result( arg_parfor_result ) , parreduce_check( arg_parreduce_check ) , parscan_result( arg_parscan_result ) , parscan_check( arg_parscan_check ) - , nvalue( arg_nvalue ) - {} + , nvalue( arg_nvalue ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type & member ) - { - const long end = nvalue + 1 ; - const long begin = 0 < end - SPAN ? end - SPAN : 0 ; - - if ( 0 < begin && future.is_null() ) { - if ( member.team_rank() == 0 ) { - future = policy.task_spawn - ( TestTaskTeam( policy , - parfor_result , - parreduce_check, - parscan_result, - parscan_check, - begin - 1 ) - , Kokkos::TaskTeam ); - - assert( ! future.is_null() ); - - policy.respawn( this , future ); - } - return ; - } + void operator()( typename sched_type::member_type & member ) + { + const long end = nvalue + 1; + const long begin = 0 < end - SPAN ? end - SPAN : 0; - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { parfor_result[i] = i ; } - ); - - // test parallel_reduce without join - - long tot = 0; - long expected = (begin+end-1)*(end-begin)*0.5; - - Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &res) { res += parfor_result[i]; } - , tot); - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { parreduce_check[i] = expected-tot ; } - ); - - // test parallel_reduce with join - - tot = 0; - Kokkos::parallel_reduce( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &res) { res += parfor_result[i]; } - , [&]( long& val1, const long& val2) { val1 += val2; } - , tot); - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { parreduce_check[i] += expected-tot ; } - ); - - // test parallel_scan - - // Exclusive scan - Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &val , const bool final ) { - if ( final ) { parscan_result[i] = val; } - val += i; - } - ); + if ( 0 < begin && future.is_null() ) { if ( member.team_rank() == 0 ) { - for ( long i = begin ; i < end ; ++i ) { - parscan_check[i] = (i*(i-1)-begin*(begin-1))*0.5-parscan_result[i]; - } + future = Kokkos::task_spawn( Kokkos::TaskTeam( sched ) + , TestTaskTeam( sched + , parfor_result + , parreduce_check + , parscan_result + , parscan_check + , begin - 1 ) + ); + + assert( !future.is_null() ); + + Kokkos::respawn( this, future ); } - // Inclusive scan - Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i, long &val , const bool final ) { - val += i; - if ( final ) { parscan_result[i] = val; } - } - ); - if ( member.team_rank() == 0 ) { - for ( long i = begin ; i < end ; ++i ) { - parscan_check[i] += (i*(i+1)-begin*(begin-1))*0.5-parscan_result[i]; - } + return; + } + + Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i ) { parfor_result[i] = i; } + ); + + // Test parallel_reduce without join. + + long tot = 0; + long expected = ( begin + end - 1 ) * ( end - begin ) * 0.5; + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i, long & res ) { res += parfor_result[i]; } + , tot + ); + + Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i ) { parreduce_check[i] = expected - tot; } + ); + + // Test parallel_reduce with join. + + tot = 0; + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i, long & res ) { res += parfor_result[i]; } +#if 0 + , Kokkos::Sum( tot ) +#else + , [] ( long & dst, const long & src ) { dst += src; } + , tot +#endif + ); + + Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i ) { parreduce_check[i] += expected - tot; } + ); + + // Test parallel_scan. + + // Exclusive scan. + Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i, long & val, const bool final ) + { + if ( final ) { parscan_result[i] = val; } + + val += i; + }); + + // Wait for 'parscan_result' before testing it. + member.team_barrier(); + + if ( member.team_rank() == 0 ) { + for ( long i = begin; i < end; ++i ) { + parscan_check[i] = ( i * ( i - 1 ) - begin * ( begin - 1 ) ) * 0.5 - parscan_result[i]; } - // ThreadVectorRange check - /* - long result = 0; - expected = (begin+end-1)*(end-begin)*0.5; - Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member , 0 , 1 ) - , [&] ( const int i , long & outerUpdate ) { - long sum_j = 0.0; - Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( member , end - begin ) - , [&] ( const int j , long &innerUpdate ) { - innerUpdate += begin+j; - } , sum_j ); - outerUpdate += sum_j ; - } , result ); - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { - parreduce_check[i] += result-expected ; - } - ); - */ } - static void run( long n ) + // Don't overwrite 'parscan_result' until it has been tested. + member.team_barrier(); + + // Inclusive scan. + Kokkos::parallel_scan<long>( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i, long & val, const bool final ) { - // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop - // const unsigned memory_capacity = 100000 ; // fails with SPAN=1 for serial and OMP - const unsigned memory_capacity = 400000 ; - - policy_type root_policy( typename policy_type::memory_space() - , memory_capacity ); - - view_type root_parfor_result("parfor_result",n+1); - view_type root_parreduce_check("parreduce_check",n+1); - view_type root_parscan_result("parscan_result",n+1); - view_type root_parscan_check("parscan_check",n+1); - - typename view_type::HostMirror - host_parfor_result = Kokkos::create_mirror_view( root_parfor_result ); - typename view_type::HostMirror - host_parreduce_check = Kokkos::create_mirror_view( root_parreduce_check ); - typename view_type::HostMirror - host_parscan_result = Kokkos::create_mirror_view( root_parscan_result ); - typename view_type::HostMirror - host_parscan_check = Kokkos::create_mirror_view( root_parscan_check ); - - future_type f = root_policy.host_spawn( - TestTaskTeam( root_policy , - root_parfor_result , - root_parreduce_check , - root_parscan_result, - root_parscan_check, - n ) , - Kokkos::TaskTeam ); - - Kokkos::wait( root_policy ); - - Kokkos::deep_copy( host_parfor_result , root_parfor_result ); - Kokkos::deep_copy( host_parreduce_check , root_parreduce_check ); - Kokkos::deep_copy( host_parscan_result , root_parscan_result ); - Kokkos::deep_copy( host_parscan_check , root_parscan_check ); - - for ( long i = 0 ; i <= n ; ++i ) { - const long answer = i ; - if ( host_parfor_result(i) != answer ) { - std::cerr << "TestTaskTeam::run ERROR parallel_for result(" << i << ") = " - << host_parfor_result(i) << " != " << answer << std::endl ; - } - if ( host_parreduce_check(i) != 0 ) { - std::cerr << "TestTaskTeam::run ERROR parallel_reduce check(" << i << ") = " - << host_parreduce_check(i) << " != 0" << std::endl ; - } - if ( host_parscan_check(i) != 0 ) { - std::cerr << "TestTaskTeam::run ERROR parallel_scan check(" << i << ") = " - << host_parscan_check(i) << " != 0" << std::endl ; - } + val += i; + + if ( final ) { parscan_result[i] = val; } + }); + + // Wait for 'parscan_result' before testing it. + member.team_barrier(); + + if ( member.team_rank() == 0 ) { + for ( long i = begin; i < end; ++i ) { + parscan_check[i] += ( i * ( i + 1 ) - begin * ( begin - 1 ) ) * 0.5 - parscan_result[i]; } } + + // ThreadVectorRange check. +/* + long result = 0; + expected = ( begin + end - 1 ) * ( end - begin ) * 0.5; + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( member, 0, 1 ) + , [&] ( const int i, long & outerUpdate ) + { + long sum_j = 0.0; + + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( member, end - begin ) + , [&] ( const int j, long & innerUpdate ) + { + innerUpdate += begin + j; + }, sum_j ); + + outerUpdate += sum_j; + }, result ); + + Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i ) + { + parreduce_check[i] += result - expected; + }); +*/ + } + + static void run( long n ) + { + //const unsigned memory_capacity = 10000; // Causes memory pool infinite loop. + //const unsigned memory_capacity = 100000; // Fails with SPAN=1 for serial and OMP. + const unsigned memory_capacity = 400000; + + sched_type root_sched( typename sched_type::memory_space(), memory_capacity ); + + view_type root_parfor_result( "parfor_result", n + 1 ); + view_type root_parreduce_check( "parreduce_check", n + 1 ); + view_type root_parscan_result( "parscan_result", n + 1 ); + view_type root_parscan_check( "parscan_check", n + 1 ); + + typename view_type::HostMirror + host_parfor_result = Kokkos::create_mirror_view( root_parfor_result ); + typename view_type::HostMirror + host_parreduce_check = Kokkos::create_mirror_view( root_parreduce_check ); + typename view_type::HostMirror + host_parscan_result = Kokkos::create_mirror_view( root_parscan_result ); + typename view_type::HostMirror + host_parscan_check = Kokkos::create_mirror_view( root_parscan_check ); + + future_type f = Kokkos::host_spawn( Kokkos::TaskTeam( root_sched ) + , TestTaskTeam( root_sched + , root_parfor_result + , root_parreduce_check + , root_parscan_result + , root_parscan_check + , n ) + ); + + Kokkos::wait( root_sched ); + + Kokkos::deep_copy( host_parfor_result, root_parfor_result ); + Kokkos::deep_copy( host_parreduce_check, root_parreduce_check ); + Kokkos::deep_copy( host_parscan_result, root_parscan_result ); + Kokkos::deep_copy( host_parscan_check, root_parscan_check ); + + for ( long i = 0; i <= n; ++i ) { + const long answer = i; + + if ( host_parfor_result( i ) != answer ) { + std::cerr << "TestTaskTeam::run ERROR parallel_for result(" << i << ") = " + << host_parfor_result( i ) << " != " << answer << std::endl; + } + + if ( host_parreduce_check( i ) != 0 ) { + std::cerr << "TestTaskTeam::run ERROR parallel_reduce check(" << i << ") = " + << host_parreduce_check( i ) << " != 0" << std::endl; + } + + if ( host_parscan_check( i ) != 0 ) { + std::cerr << "TestTaskTeam::run ERROR parallel_scan check(" << i << ") = " + << host_parscan_check( i ) << " != 0" << std::endl; + } + } + } }; template< class ExecSpace > struct TestTaskTeamValue { - enum { SPAN = 8 }; - typedef long value_type ; - typedef Kokkos::TaskScheduler<ExecSpace> policy_type ; - typedef Kokkos::Future<value_type,ExecSpace> future_type ; - typedef Kokkos::View<long*,ExecSpace> view_type ; + typedef long value_type; + typedef Kokkos::TaskScheduler< ExecSpace > sched_type; + typedef Kokkos::Future< value_type, ExecSpace > future_type; + typedef Kokkos::View< long*, ExecSpace > view_type; - policy_type policy ; - future_type future ; + sched_type sched; + future_type future; - view_type result ; - const long nvalue ; + view_type result; + const long nvalue; KOKKOS_INLINE_FUNCTION - TestTaskTeamValue( const policy_type & arg_policy - , const view_type & arg_result - , const long arg_nvalue ) - : policy(arg_policy) + TestTaskTeamValue( const sched_type & arg_sched + , const view_type & arg_result + , const long arg_nvalue ) + : sched( arg_sched ) , future() , result( arg_result ) - , nvalue( arg_nvalue ) - {} + , nvalue( arg_nvalue ) {} KOKKOS_INLINE_FUNCTION - void operator()( typename policy_type::member_type const & member + void operator()( typename sched_type::member_type const & member , value_type & final ) - { - const long end = nvalue + 1 ; - const long begin = 0 < end - SPAN ? end - SPAN : 0 ; + { + const long end = nvalue + 1; + const long begin = 0 < end - SPAN ? end - SPAN : 0; - if ( 0 < begin && future.is_null() ) { - if ( member.team_rank() == 0 ) { - - future = policy.task_spawn - ( TestTaskTeamValue( policy , result , begin - 1 ) - , Kokkos::TaskTeam ); + if ( 0 < begin && future.is_null() ) { + if ( member.team_rank() == 0 ) { + future = sched.task_spawn( TestTaskTeamValue( sched, result, begin - 1 ) + , Kokkos::TaskTeam ); - assert( ! future.is_null() ); + assert( !future.is_null() ); - policy.respawn( this , future ); - } - return ; + sched.respawn( this , future ); } - Kokkos::parallel_for( Kokkos::TeamThreadRange(member,begin,end) - , [&]( int i ) { result[i] = i + 1 ; } - ); + return; + } - if ( member.team_rank() == 0 ) { - final = result[nvalue] ; - } + Kokkos::parallel_for( Kokkos::TeamThreadRange( member, begin, end ) + , [&] ( int i ) { result[i] = i + 1; } + ); - Kokkos::memory_fence(); + if ( member.team_rank() == 0 ) { + final = result[nvalue]; } + Kokkos::memory_fence(); + } + static void run( long n ) - { - // const unsigned memory_capacity = 10000 ; // causes memory pool infinite loop - const unsigned memory_capacity = 100000 ; + { + //const unsigned memory_capacity = 10000; // Causes memory pool infinite loop. + const unsigned memory_capacity = 100000; - policy_type root_policy( typename policy_type::memory_space() - , memory_capacity ); + sched_type root_sched( typename sched_type::memory_space() + , memory_capacity ); - view_type root_result("result",n+1); + view_type root_result( "result", n + 1 ); - typename view_type::HostMirror - host_result = Kokkos::create_mirror_view( root_result ); + typename view_type::HostMirror host_result = Kokkos::create_mirror_view( root_result ); - future_type fv = root_policy.host_spawn - ( TestTaskTeamValue( root_policy, root_result, n ) , Kokkos::TaskTeam ); + future_type fv = root_sched.host_spawn( TestTaskTeamValue( root_sched, root_result, n ) + , Kokkos::TaskTeam ); - Kokkos::wait( root_policy ); + Kokkos::wait( root_sched ); - Kokkos::deep_copy( host_result , root_result ); + Kokkos::deep_copy( host_result, root_result ); - if ( fv.get() != n + 1 ) { - std::cerr << "TestTaskTeamValue ERROR future = " - << fv.get() << " != " << n + 1 << std::endl ; - } - for ( long i = 0 ; i <= n ; ++i ) { - const long answer = i + 1 ; - if ( host_result(i) != answer ) { - std::cerr << "TestTaskTeamValue ERROR result(" << i << ") = " - << host_result(i) << " != " << answer << std::endl ; - } + if ( fv.get() != n + 1 ) { + std::cerr << "TestTaskTeamValue ERROR future = " + << fv.get() << " != " << n + 1 << std::endl; + } + + for ( long i = 0; i <= n; ++i ) { + const long answer = i + 1; + + if ( host_result( i ) != answer ) { + std::cerr << "TestTaskTeamValue ERROR result(" << i << ") = " + << host_result( i ) << " != " << answer << std::endl; } } + } }; -} // namespace TestTaskScheduler - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- -#endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ -#endif /* #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP */ +} // namespace TestTaskScheduler +#endif // #if defined( KOKKOS_ENABLE_TASKDAG ) +#endif // #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP diff --git a/lib/kokkos/core/unit_test/TestTeam.hpp b/lib/kokkos/core/unit_test/TestTeam.hpp index bcf4d3a173686ad8b1d14abc45ee957bb8650389..11a523921db9995c18d38ac5e18661244acd0ecb 100644 --- a/lib/kokkos/core/unit_test/TestTeam.hpp +++ b/lib/kokkos/core/unit_test/TestTeam.hpp @@ -48,177 +48,169 @@ #include <Kokkos_Core.hpp> -/*--------------------------------------------------------------------------*/ - namespace Test { + namespace { template< class ExecSpace, class ScheduleType > struct TestTeamPolicy { + typedef typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type team_member; + typedef Kokkos::View< int**, ExecSpace > view_type; - typedef typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type team_member ; - typedef Kokkos::View<int**,ExecSpace> view_type ; - - view_type m_flags ; + view_type m_flags; TestTeamPolicy( const size_t league_size ) - : m_flags( Kokkos::ViewAllocateWithoutInitializing("flags") - , Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( *this ) - , league_size ) - {} + : m_flags( Kokkos::ViewAllocateWithoutInitializing( "flags" ), + Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( *this ), + league_size ) {} struct VerifyInitTag {}; KOKKOS_INLINE_FUNCTION void operator()( const team_member & member ) const - { - const int tid = member.team_rank() + member.team_size() * member.league_rank(); + { + const int tid = member.team_rank() + member.team_size() * member.league_rank(); - m_flags( member.team_rank() , member.league_rank() ) = tid ; - } + m_flags( member.team_rank(), member.league_rank() ) = tid; + } KOKKOS_INLINE_FUNCTION - void operator()( const VerifyInitTag & , const team_member & member ) const - { - const int tid = member.team_rank() + member.team_size() * member.league_rank(); + void operator()( const VerifyInitTag &, const team_member & member ) const + { + const int tid = member.team_rank() + member.team_size() * member.league_rank(); - if ( tid != m_flags( member.team_rank() , member.league_rank() ) ) { - printf("TestTeamPolicy member(%d,%d) error %d != %d\n" - , member.league_rank() , member.team_rank() - , tid , m_flags( member.team_rank() , member.league_rank() ) ); - } + if ( tid != m_flags( member.team_rank(), member.league_rank() ) ) { + printf( "TestTeamPolicy member(%d,%d) error %d != %d\n", + member.league_rank(), member.team_rank(), + tid, m_flags( member.team_rank(), member.league_rank() ) ); } + } - // included for test_small_league_size - TestTeamPolicy() - : m_flags() - {} + // Included for test_small_league_size. + TestTeamPolicy() : m_flags() {} + + // Included for test_small_league_size. + struct NoOpTag {}; - // included for test_small_league_size - struct NoOpTag {} ; KOKKOS_INLINE_FUNCTION - void operator()( const NoOpTag & , const team_member & member ) const - {} + void operator()( const NoOpTag &, const team_member & member ) const {} static void test_small_league_size() { - int bs = 8; // batch size (number of elements per batch) int ns = 16; // total number of "problems" to process - // calculate total scratch memory space size + // Calculate total scratch memory space size. const int level = 0; int mem_size = 960; - const int num_teams = ns/bs; - const Kokkos::TeamPolicy< ExecSpace, NoOpTag > policy(num_teams, Kokkos::AUTO()); + const int num_teams = ns / bs; + const Kokkos::TeamPolicy< ExecSpace, NoOpTag > policy( num_teams, Kokkos::AUTO() ); - Kokkos::parallel_for ( policy.set_scratch_size(level, Kokkos::PerTeam(mem_size), Kokkos::PerThread(0)) - , TestTeamPolicy() - ); + Kokkos::parallel_for( policy.set_scratch_size( level, Kokkos::PerTeam( mem_size ), Kokkos::PerThread( 0 ) ), + TestTeamPolicy() ); } static void test_for( const size_t league_size ) - { - TestTeamPolicy functor( league_size ); + { + TestTeamPolicy functor( league_size ); - const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); + const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); - Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor ); - Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace , VerifyInitTag >( league_size , team_size ) , functor ); + Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size, team_size ), functor ); + Kokkos::parallel_for( Kokkos::TeamPolicy< ScheduleType, ExecSpace, VerifyInitTag >( league_size, team_size ), functor ); - test_small_league_size(); - } + test_small_league_size(); + } struct ReduceTag {}; - typedef long value_type ; + typedef long value_type; KOKKOS_INLINE_FUNCTION - void operator()( const team_member & member , value_type & update ) const - { - update += member.team_rank() + member.team_size() * member.league_rank(); - } + void operator()( const team_member & member, value_type & update ) const + { + update += member.team_rank() + member.team_size() * member.league_rank(); + } KOKKOS_INLINE_FUNCTION - void operator()( const ReduceTag & , const team_member & member , value_type & update ) const - { - update += 1 + member.team_rank() + member.team_size() * member.league_rank(); - } + void operator()( const ReduceTag &, const team_member & member, value_type & update ) const + { + update += 1 + member.team_rank() + member.team_size() * member.league_rank(); + } static void test_reduce( const size_t league_size ) - { - TestTeamPolicy functor( league_size ); + { + TestTeamPolicy functor( league_size ); - const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); - const long N = team_size * league_size ; + const int team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( functor ); + const long N = team_size * league_size; - long total = 0 ; + long total = 0; - Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size , team_size ) , functor , total ); - ASSERT_EQ( size_t((N-1)*(N))/2 , size_t(total) ); + Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace >( league_size, team_size ), functor, total ); + ASSERT_EQ( size_t( ( N - 1 ) * ( N ) ) / 2, size_t( total ) ); - Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace , ReduceTag >( league_size , team_size ) , functor , total ); - ASSERT_EQ( (size_t(N)*size_t(N+1))/2 , size_t(total) ); - } + Kokkos::parallel_reduce( Kokkos::TeamPolicy< ScheduleType, ExecSpace, ReduceTag >( league_size, team_size ), functor, total ); + ASSERT_EQ( ( size_t( N ) * size_t( N + 1 ) ) / 2, size_t( total ) ); + } }; -} -} +} // namespace + +} // namespace Test /*--------------------------------------------------------------------------*/ namespace Test { -template< typename ScalarType , class DeviceType, class ScheduleType > +template< typename ScalarType, class DeviceType, class ScheduleType > class ReduceTeamFunctor { public: - typedef DeviceType execution_space ; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; - typedef typename execution_space::size_type size_type ; + typedef DeviceType execution_space; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; + typedef typename execution_space::size_type size_type; struct value_type { - ScalarType value[3] ; + ScalarType value[3]; }; - const size_type nwork ; + const size_type nwork; ReduceTeamFunctor( const size_type & arg_nwork ) : nwork( arg_nwork ) {} - ReduceTeamFunctor( const ReduceTeamFunctor & rhs ) - : nwork( rhs.nwork ) {} + ReduceTeamFunctor( const ReduceTeamFunctor & rhs ) : nwork( rhs.nwork ) {} KOKKOS_INLINE_FUNCTION void init( value_type & dst ) const { - dst.value[0] = 0 ; - dst.value[1] = 0 ; - dst.value[2] = 0 ; + dst.value[0] = 0; + dst.value[1] = 0; + dst.value[2] = 0; } KOKKOS_INLINE_FUNCTION - void join( volatile value_type & dst , - const volatile value_type & src ) const + void join( volatile value_type & dst, const volatile value_type & src ) const { - dst.value[0] += src.value[0] ; - dst.value[1] += src.value[1] ; - dst.value[2] += src.value[2] ; + dst.value[0] += src.value[0]; + dst.value[1] += src.value[1]; + dst.value[2] += src.value[2]; } KOKKOS_INLINE_FUNCTION - void operator()( const typename policy_type::member_type ind , value_type & dst ) const + void operator()( const typename policy_type::member_type ind, value_type & dst ) const { const int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank(); const int thread_size = ind.team_size() * ind.league_size(); - const int chunk = ( nwork + thread_size - 1 ) / thread_size ; + const int chunk = ( nwork + thread_size - 1 ) / thread_size; - size_type iwork = chunk * thread_rank ; - const size_type iwork_end = iwork + chunk < nwork ? iwork + chunk : nwork ; + size_type iwork = chunk * thread_rank; + const size_type iwork_end = iwork + chunk < nwork ? iwork + chunk : nwork; - for ( ; iwork < iwork_end ; ++iwork ) { - dst.value[0] += 1 ; - dst.value[1] += iwork + 1 ; - dst.value[2] += nwork - iwork ; + for ( ; iwork < iwork_end; ++iwork ) { + dst.value[0] += 1; + dst.value[1] += iwork + 1; + dst.value[2] += nwork - iwork; } } }; @@ -227,58 +219,53 @@ public: namespace { -template< typename ScalarType , class DeviceType, class ScheduleType > +template< typename ScalarType, class DeviceType, class ScheduleType > class TestReduceTeam { public: - typedef DeviceType execution_space ; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; - typedef typename execution_space::size_type size_type ; - - //------------------------------------ + typedef DeviceType execution_space; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; + typedef typename execution_space::size_type size_type; - TestReduceTeam( const size_type & nwork ) - { - run_test(nwork); - } + TestReduceTeam( const size_type & nwork ) { run_test( nwork ); } void run_test( const size_type & nwork ) { - typedef Test::ReduceTeamFunctor< ScalarType , execution_space , ScheduleType> functor_type ; - typedef typename functor_type::value_type value_type ; - typedef Kokkos::View< value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type ; + typedef Test::ReduceTeamFunctor< ScalarType, execution_space, ScheduleType> functor_type; + typedef typename functor_type::value_type value_type; + typedef Kokkos::View< value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; enum { Count = 3 }; enum { Repeat = 100 }; value_type result[ Repeat ]; - const unsigned long nw = nwork ; - const unsigned long nsum = nw % 2 ? nw * (( nw + 1 )/2 ) - : (nw/2) * ( nw + 1 ); + const unsigned long nw = nwork; + const unsigned long nsum = nw % 2 ? nw * ( ( nw + 1 ) / 2 ) + : ( nw / 2 ) * ( nw + 1 ); - const unsigned team_size = policy_type::team_size_recommended( functor_type(nwork) ); - const unsigned league_size = ( nwork + team_size - 1 ) / team_size ; + const unsigned team_size = policy_type::team_size_recommended( functor_type( nwork ) ); + const unsigned league_size = ( nwork + team_size - 1 ) / team_size; - policy_type team_exec( league_size , team_size ); + policy_type team_exec( league_size, team_size ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { + for ( unsigned i = 0; i < Repeat; ++i ) { result_type tmp( & result[i] ); - Kokkos::parallel_reduce( team_exec , functor_type(nwork) , tmp ); + Kokkos::parallel_reduce( team_exec, functor_type( nwork ), tmp ); } execution_space::fence(); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - for ( unsigned j = 0 ; j < Count ; ++j ) { - const unsigned long correct = 0 == j % 3 ? nw : nsum ; - ASSERT_EQ( (ScalarType) correct , result[i].value[j] ); + for ( unsigned i = 0; i < Repeat; ++i ) { + for ( unsigned j = 0; j < Count; ++j ) { + const unsigned long correct = 0 == j % 3 ? nw : nsum; + ASSERT_EQ( (ScalarType) correct, result[i].value[j] ); } } } }; -} +} // namespace /*--------------------------------------------------------------------------*/ @@ -288,53 +275,51 @@ template< class DeviceType, class ScheduleType > class ScanTeamFunctor { public: - typedef DeviceType execution_space ; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + typedef DeviceType execution_space; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; + typedef long int value_type; - typedef long int value_type ; - Kokkos::View< value_type , execution_space > accum ; - Kokkos::View< value_type , execution_space > total ; + Kokkos::View< value_type, execution_space > accum; + Kokkos::View< value_type, execution_space > total; - ScanTeamFunctor() : accum("accum"), total("total") {} + ScanTeamFunctor() : accum( "accum" ), total( "total" ) {} KOKKOS_INLINE_FUNCTION - void init( value_type & error ) const { error = 0 ; } + void init( value_type & error ) const { error = 0; } KOKKOS_INLINE_FUNCTION - void join( value_type volatile & error , - value_type volatile const & input ) const - { if ( input ) error = 1 ; } + void join( value_type volatile & error, value_type volatile const & input ) const + { if ( input ) error = 1; } struct JoinMax { - typedef long int value_type ; + typedef long int value_type; + KOKKOS_INLINE_FUNCTION - void join( value_type volatile & dst - , value_type volatile const & input ) const - { if ( dst < input ) dst = input ; } + void join( value_type volatile & dst, value_type volatile const & input ) const + { if ( dst < input ) dst = input; } }; KOKKOS_INLINE_FUNCTION - void operator()( const typename policy_type::member_type ind , value_type & error ) const + void operator()( const typename policy_type::member_type ind, value_type & error ) const { if ( 0 == ind.league_rank() && 0 == ind.team_rank() ) { const long int thread_count = ind.league_size() * ind.team_size(); - total() = ( thread_count * ( thread_count + 1 ) ) / 2 ; + total() = ( thread_count * ( thread_count + 1 ) ) / 2; } // Team max: - const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ) , JoinMax() ); + const int long m = ind.team_reduce( (long int) ( ind.league_rank() + ind.team_rank() ), JoinMax() ); if ( m != ind.league_rank() + ( ind.team_size() - 1 ) ) { - printf("ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n" - , ind.league_rank(), ind.team_rank() - , ind.league_size(), ind.team_size() - , (long int)(ind.league_rank() + ( ind.team_size() - 1 )) , m ); + printf( "ScanTeamFunctor[%d.%d of %d.%d] reduce_max_answer(%ld) != reduce_max(%ld)\n", + ind.league_rank(), ind.team_rank(), + ind.league_size(), ind.team_size(), + (long int) ( ind.league_rank() + ( ind.team_size() - 1 ) ), m ); } // Scan: const long int answer = - ( ind.league_rank() + 1 ) * ind.team_rank() + - ( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2 ; + ( ind.league_rank() + 1 ) * ind.team_rank() + ( ind.team_rank() * ( ind.team_rank() + 1 ) ) / 2; const long int result = ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 ); @@ -343,16 +328,17 @@ public: ind.team_scan( ind.league_rank() + 1 + ind.team_rank() + 1 ); if ( answer != result || answer != result2 ) { - printf("ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n", - ind.league_rank(), ind.team_rank(), - ind.league_size(), ind.team_size(), - answer,result,result2); - error = 1 ; + printf( "ScanTeamFunctor[%d.%d of %d.%d] answer(%ld) != scan_first(%ld) or scan_second(%ld)\n", + ind.league_rank(), ind.team_rank(), + ind.league_size(), ind.team_size(), + answer, result, result2 ); + + error = 1; } const long int thread_rank = ind.team_rank() + ind.team_size() * ind.league_rank(); - ind.team_scan( 1 + thread_rank , accum.ptr_on_device() ); + ind.team_scan( 1 + thread_rank, accum.ptr_on_device() ); } }; @@ -360,47 +346,45 @@ template< class DeviceType, class ScheduleType > class TestScanTeam { public: - typedef DeviceType execution_space ; - typedef long int value_type ; - - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; - typedef Test::ScanTeamFunctor<DeviceType, ScheduleType> functor_type ; + typedef DeviceType execution_space; + typedef long int value_type; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; + typedef Test::ScanTeamFunctor<DeviceType, ScheduleType> functor_type; - //------------------------------------ - - TestScanTeam( const size_t nteam ) - { - run_test(nteam); - } + TestScanTeam( const size_t nteam ) { run_test( nteam ); } void run_test( const size_t nteam ) { - typedef Kokkos::View< long int , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; - const unsigned REPEAT = 100000 ; + typedef Kokkos::View< long int, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; + + const unsigned REPEAT = 100000; unsigned Repeat; - if ( nteam == 0 ) - { + + if ( nteam == 0 ) { Repeat = 1; - } else { - Repeat = ( REPEAT + nteam - 1 ) / nteam ; //error here } + else { + Repeat = ( REPEAT + nteam - 1 ) / nteam; // Error here. + } + + functor_type functor; - functor_type functor ; + policy_type team_exec( nteam, policy_type::team_size_max( functor ) ); - policy_type team_exec( nteam , policy_type::team_size_max( functor ) ); + for ( unsigned i = 0; i < Repeat; ++i ) { + long int accum = 0; + long int total = 0; + long int error = 0; + Kokkos::deep_copy( functor.accum, total ); - for ( unsigned i = 0 ; i < Repeat ; ++i ) { - long int accum = 0 ; - long int total = 0 ; - long int error = 0 ; - Kokkos::deep_copy( functor.accum , total ); - Kokkos::parallel_reduce( team_exec , functor , result_type( & error ) ); + Kokkos::parallel_reduce( team_exec, functor, result_type( & error ) ); DeviceType::fence(); - Kokkos::deep_copy( accum , functor.accum ); - Kokkos::deep_copy( total , functor.total ); - ASSERT_EQ( error , 0 ); - ASSERT_EQ( total , accum ); + Kokkos::deep_copy( accum, functor.accum ); + Kokkos::deep_copy( total, functor.total ); + + ASSERT_EQ( error, 0 ); + ASSERT_EQ( total, accum ); } execution_space::fence(); @@ -416,18 +400,18 @@ namespace Test { template< class ExecSpace, class ScheduleType > struct SharedTeamFunctor { - typedef ExecSpace execution_space ; - typedef int value_type ; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + typedef ExecSpace execution_space; + typedef int value_type; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; enum { SHARED_COUNT = 1000 }; - typedef typename ExecSpace::scratch_memory_space shmem_space ; + typedef typename ExecSpace::scratch_memory_space shmem_space; - // tbd: MemoryUnmanaged should be the default for shared memory space - typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ; + // TBD: MemoryUnmanaged should be the default for shared memory space. + typedef Kokkos::View< int*, shmem_space, Kokkos::MemoryUnmanaged > shared_int_array_type; - // Tell how much shared memory will be required by this functor: + // Tell how much shared memory will be required by this functor. inline unsigned team_shmem_size( int team_size ) const { @@ -436,19 +420,26 @@ struct SharedTeamFunctor { } KOKKOS_INLINE_FUNCTION - void operator()( const typename policy_type::member_type & ind , value_type & update ) const + void operator()( const typename policy_type::member_type & ind, value_type & update ) const { - const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT ); - const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT ); - - if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) || - (shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) { - printf ("Failed to allocate shared memory of size %lu\n", - static_cast<unsigned long> (SHARED_COUNT)); - ++update; // failure to allocate is an error + const shared_int_array_type shared_A( ind.team_shmem(), SHARED_COUNT ); + const shared_int_array_type shared_B( ind.team_shmem(), SHARED_COUNT ); + + if ( ( shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0 ) || + ( shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0 ) ) + { + printf ("member( %d/%d , %d/%d ) Failed to allocate shared memory of size %lu\n" + , ind.league_rank() + , ind.league_size() + , ind.team_rank() + , ind.team_size() + , static_cast<unsigned long>( SHARED_COUNT ) + ); + + ++update; // Failure to allocate is an error. } else { - for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) { + for ( int i = ind.team_rank(); i < SHARED_COUNT; i += ind.team_size() ) { shared_A[i] = i + ind.league_rank(); shared_B[i] = 2 * i + ind.league_rank(); } @@ -456,12 +447,13 @@ struct SharedTeamFunctor { ind.team_barrier(); if ( ind.team_rank() + 1 == ind.team_size() ) { - for ( int i = 0 ; i < SHARED_COUNT ; ++i ) { + for ( int i = 0; i < SHARED_COUNT; ++i ) { if ( shared_A[i] != i + ind.league_rank() ) { - ++update ; + ++update; } + if ( shared_B[i] != 2 * i + ind.league_rank() ) { - ++update ; + ++update; } } } @@ -469,78 +461,79 @@ struct SharedTeamFunctor { } }; -} +} // namespace Test namespace { template< class ExecSpace, class ScheduleType > struct TestSharedTeam { - - TestSharedTeam() - { run(); } + TestSharedTeam() { run(); } void run() { - typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor ; - typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; + typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor; + typedef Kokkos::View< typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; - const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); + const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); - Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size ); + Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size, team_size ); - typename Functor::value_type error_count = 0 ; + typename Functor::value_type error_count = 0; - Kokkos::parallel_reduce( team_exec , Functor() , result_type( & error_count ) ); + Kokkos::parallel_reduce( team_exec, Functor(), result_type( & error_count ) ); - ASSERT_EQ( error_count , 0 ); + ASSERT_EQ( error_count, 0 ); } }; -} + +} // namespace namespace Test { -#if defined (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) template< class MemorySpace, class ExecSpace, class ScheduleType > struct TestLambdaSharedTeam { - - TestLambdaSharedTeam() - { run(); } + TestLambdaSharedTeam() { run(); } void run() { - typedef Test::SharedTeamFunctor<ExecSpace, ScheduleType> Functor ; - //typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; - typedef Kokkos::View< typename Functor::value_type , MemorySpace, Kokkos::MemoryUnmanaged > result_type ; + typedef Test::SharedTeamFunctor< ExecSpace, ScheduleType > Functor; + //typedef Kokkos::View< typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; + typedef Kokkos::View< typename Functor::value_type, MemorySpace, Kokkos::MemoryUnmanaged > result_type; - typedef typename ExecSpace::scratch_memory_space shmem_space ; + typedef typename ExecSpace::scratch_memory_space shmem_space; - // tbd: MemoryUnmanaged should be the default for shared memory space - typedef Kokkos::View<int*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ; + // TBD: MemoryUnmanaged should be the default for shared memory space. + typedef Kokkos::View< int*, shmem_space, Kokkos::MemoryUnmanaged > shared_int_array_type; const int SHARED_COUNT = 1000; int team_size = 1; + #ifdef KOKKOS_ENABLE_CUDA - if(std::is_same<ExecSpace,Kokkos::Cuda>::value) - team_size = 128; + if ( std::is_same< ExecSpace, Kokkos::Cuda >::value ) team_size = 128; #endif - Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size); - team_exec = team_exec.set_scratch_size(0,Kokkos::PerTeam(SHARED_COUNT*2*sizeof(int))); - typename Functor::value_type error_count = 0 ; + Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size, team_size ); + team_exec = team_exec.set_scratch_size( 0, Kokkos::PerTeam( SHARED_COUNT * 2 * sizeof( int ) ) ); + + typename Functor::value_type error_count = 0; - Kokkos::parallel_reduce( team_exec , KOKKOS_LAMBDA - ( const typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type & ind , int & update ) { + Kokkos::parallel_reduce( team_exec, KOKKOS_LAMBDA + ( const typename Kokkos::TeamPolicy< ScheduleType, ExecSpace >::member_type & ind, int & update ) + { + const shared_int_array_type shared_A( ind.team_shmem(), SHARED_COUNT ); + const shared_int_array_type shared_B( ind.team_shmem(), SHARED_COUNT ); - const shared_int_array_type shared_A( ind.team_shmem() , SHARED_COUNT ); - const shared_int_array_type shared_B( ind.team_shmem() , SHARED_COUNT ); + if ( ( shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0 ) || + ( shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0 ) ) + { + printf( "Failed to allocate shared memory of size %lu\n", + static_cast<unsigned long>( SHARED_COUNT ) ); - if ((shared_A.ptr_on_device () == NULL && SHARED_COUNT > 0) || - (shared_B.ptr_on_device () == NULL && SHARED_COUNT > 0)) { - printf ("Failed to allocate shared memory of size %lu\n", - static_cast<unsigned long> (SHARED_COUNT)); - ++update; // failure to allocate is an error - } else { - for ( int i = ind.team_rank() ; i < SHARED_COUNT ; i += ind.team_size() ) { + ++update; // Failure to allocate is an error. + } + else { + for ( int i = ind.team_rank(); i < SHARED_COUNT; i += ind.team_size() ) { shared_A[i] = i + ind.league_rank(); shared_B[i] = 2 * i + ind.league_rank(); } @@ -548,196 +541,213 @@ struct TestLambdaSharedTeam { ind.team_barrier(); if ( ind.team_rank() + 1 == ind.team_size() ) { - for ( int i = 0 ; i < SHARED_COUNT ; ++i ) { + for ( int i = 0; i < SHARED_COUNT; ++i ) { if ( shared_A[i] != i + ind.league_rank() ) { - ++update ; + ++update; } + if ( shared_B[i] != 2 * i + ind.league_rank() ) { - ++update ; + ++update; } } } } }, result_type( & error_count ) ); - ASSERT_EQ( error_count , 0 ); + ASSERT_EQ( error_count, 0 ); } }; #endif -} + +} // namespace Test namespace Test { template< class ExecSpace, class ScheduleType > struct ScratchTeamFunctor { - typedef ExecSpace execution_space ; - typedef int value_type ; - typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type ; + typedef ExecSpace execution_space; + typedef int value_type; + typedef Kokkos::TeamPolicy< ScheduleType, execution_space > policy_type; enum { SHARED_TEAM_COUNT = 100 }; enum { SHARED_THREAD_COUNT = 10 }; - typedef typename ExecSpace::scratch_memory_space shmem_space ; + typedef typename ExecSpace::scratch_memory_space shmem_space; - // tbd: MemoryUnmanaged should be the default for shared memory space - typedef Kokkos::View<size_t*,shmem_space,Kokkos::MemoryUnmanaged> shared_int_array_type ; + // TBD: MemoryUnmanaged should be the default for shared memory space. + typedef Kokkos::View< size_t*, shmem_space, Kokkos::MemoryUnmanaged > shared_int_array_type; KOKKOS_INLINE_FUNCTION - void operator()( const typename policy_type::member_type & ind , value_type & update ) const + void operator()( const typename policy_type::member_type & ind, value_type & update ) const { - const shared_int_array_type scratch_ptr( ind.team_scratch(1) , 3*ind.team_size() ); - const shared_int_array_type scratch_A( ind.team_scratch(1) , SHARED_TEAM_COUNT ); - const shared_int_array_type scratch_B( ind.thread_scratch(1) , SHARED_THREAD_COUNT ); - - if ((scratch_ptr.ptr_on_device () == NULL ) || - (scratch_A. ptr_on_device () == NULL && SHARED_TEAM_COUNT > 0) || - (scratch_B. ptr_on_device () == NULL && SHARED_THREAD_COUNT > 0)) { - printf ("Failed to allocate shared memory of size %lu\n", - static_cast<unsigned long> (SHARED_TEAM_COUNT)); - ++update; // failure to allocate is an error + const shared_int_array_type scratch_ptr( ind.team_scratch( 1 ), 3 * ind.team_size() ); + const shared_int_array_type scratch_A( ind.team_scratch( 1 ), SHARED_TEAM_COUNT ); + const shared_int_array_type scratch_B( ind.thread_scratch( 1 ), SHARED_THREAD_COUNT ); + + if ( ( scratch_ptr.ptr_on_device () == NULL ) || + ( scratch_A. ptr_on_device () == NULL && SHARED_TEAM_COUNT > 0 ) || + ( scratch_B. ptr_on_device () == NULL && SHARED_THREAD_COUNT > 0 ) ) + { + printf( "Failed to allocate shared memory of size %lu\n", + static_cast<unsigned long>( SHARED_TEAM_COUNT ) ); + + ++update; // Failure to allocate is an error. } else { - Kokkos::parallel_for(Kokkos::TeamThreadRange(ind,0,(int)SHARED_TEAM_COUNT),[&] (const int &i) { + Kokkos::parallel_for( Kokkos::TeamThreadRange( ind, 0, (int) SHARED_TEAM_COUNT ), [&] ( const int & i ) { scratch_A[i] = i + ind.league_rank(); }); - for(int i=0; i<SHARED_THREAD_COUNT; i++) - scratch_B[i] = 10000*ind.league_rank() + 100*ind.team_rank() + i; + + for ( int i = 0; i < SHARED_THREAD_COUNT; i++ ) { + scratch_B[i] = 10000 * ind.league_rank() + 100 * ind.team_rank() + i; + } scratch_ptr[ind.team_rank()] = (size_t) scratch_A.ptr_on_device(); scratch_ptr[ind.team_rank() + ind.team_size()] = (size_t) scratch_B.ptr_on_device(); ind.team_barrier(); - for( int i = 0; i<SHARED_TEAM_COUNT; i++) { - if(scratch_A[i] != size_t(i + ind.league_rank())) - ++update; + for ( int i = 0; i < SHARED_TEAM_COUNT; i++ ) { + if ( scratch_A[i] != size_t( i + ind.league_rank() ) ) ++update; } - for( int i = 0; i < ind.team_size(); i++) { - if(scratch_ptr[0]!=scratch_ptr[i]) ++update; + + for ( int i = 0; i < ind.team_size(); i++ ) { + if ( scratch_ptr[0] != scratch_ptr[i] ) ++update; } - if(scratch_ptr[1+ind.team_size()] - scratch_ptr[0 + ind.team_size()] < - SHARED_THREAD_COUNT*sizeof(size_t)) + + if ( scratch_ptr[1 + ind.team_size()] - scratch_ptr[0 + ind.team_size()] < SHARED_THREAD_COUNT * sizeof( size_t ) ) { ++update; - for( int i = 1; i < ind.team_size(); i++) { - if((scratch_ptr[i+ind.team_size()] - scratch_ptr[i-1+ind.team_size()]) != - (scratch_ptr[1+ind.team_size()] - scratch_ptr[0 + ind.team_size()])) ++update; + } + for ( int i = 1; i < ind.team_size(); i++ ) { + if ( ( scratch_ptr[i + ind.team_size()] - scratch_ptr[i - 1 + ind.team_size()] ) != + ( scratch_ptr[1 + ind.team_size()] - scratch_ptr[0 + ind.team_size()] ) ) + { + ++update; + } } } } }; -} +} // namespace Test namespace { template< class ExecSpace, class ScheduleType > struct TestScratchTeam { - - TestScratchTeam() - { run(); } + TestScratchTeam() { run(); } void run() { - typedef Test::ScratchTeamFunctor<ExecSpace, ScheduleType> Functor ; - typedef Kokkos::View< typename Functor::value_type , Kokkos::HostSpace , Kokkos::MemoryUnmanaged > result_type ; + typedef Test::ScratchTeamFunctor<ExecSpace, ScheduleType> Functor; + typedef Kokkos::View< typename Functor::value_type, Kokkos::HostSpace, Kokkos::MemoryUnmanaged > result_type; const size_t team_size = Kokkos::TeamPolicy< ScheduleType, ExecSpace >::team_size_max( Functor() ); - Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size , team_size ); + Kokkos::TeamPolicy< ScheduleType, ExecSpace > team_exec( 8192 / team_size, team_size ); + + typename Functor::value_type error_count = 0; + + int team_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_TEAM_COUNT ) + + Functor::shared_int_array_type::shmem_size( 3 * team_size ); - typename Functor::value_type error_count = 0 ; + int thread_scratch_size = Functor::shared_int_array_type::shmem_size( Functor::SHARED_THREAD_COUNT ); - int team_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_TEAM_COUNT) + - Functor::shared_int_array_type::shmem_size(3*team_size); - int thread_scratch_size = Functor::shared_int_array_type::shmem_size(Functor::SHARED_THREAD_COUNT); - Kokkos::parallel_reduce( team_exec.set_scratch_size(0,Kokkos::PerTeam(team_scratch_size), - Kokkos::PerThread(thread_scratch_size)) , - Functor() , result_type( & error_count ) ); + Kokkos::parallel_reduce( team_exec.set_scratch_size( 0, Kokkos::PerTeam( team_scratch_size ), + Kokkos::PerThread( thread_scratch_size ) ), + Functor(), result_type( & error_count ) ); - ASSERT_EQ( error_count , 0 ); + ASSERT_EQ( error_count, 0 ); } }; -} + +} // namespace namespace Test { -template< class ExecSpace> + +template< class ExecSpace > KOKKOS_INLINE_FUNCTION -int test_team_mulit_level_scratch_loop_body(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) { - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team1(team.team_scratch(0),128); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread1(team.thread_scratch(0),16); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team2(team.team_scratch(0),128); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread2(team.thread_scratch(0),16); - - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team1(team.team_scratch(1),128000); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread1(team.thread_scratch(1),16000); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team2(team.team_scratch(1),128000); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread2(team.thread_scratch(1),16000); - - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_team3(team.team_scratch(0),128); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> a_thread3(team.thread_scratch(0),16); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_team3(team.team_scratch(1),128000); - Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>> b_thread3(team.thread_scratch(1),16000); +int test_team_mulit_level_scratch_loop_body( const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team ) { + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > a_team1( team.team_scratch( 0 ), 128 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > a_thread1( team.thread_scratch( 0 ), 16 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > a_team2( team.team_scratch( 0 ), 128 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > a_thread2( team.thread_scratch( 0 ), 16 ); + + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > b_team1( team.team_scratch( 1 ), 128000 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > b_thread1( team.thread_scratch( 1 ), 16000 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > b_team2( team.team_scratch( 1 ), 128000 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > b_thread2( team.thread_scratch( 1 ), 16000 ); + + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > a_team3( team.team_scratch( 0 ), 128 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > a_thread3( team.thread_scratch( 0 ), 16 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > b_team3( team.team_scratch( 1 ), 128000 ); + Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> > b_thread3( team.thread_scratch( 1 ), 16000 ); // The explicit types for 0 and 128 are here to test TeamThreadRange accepting different // types for begin and end. - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,int(0),unsigned(128)), [&] (const int& i) + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, int( 0 ), unsigned( 128 ) ), [&] ( const int & i ) { - a_team1(i) = 1000000 + i; - a_team2(i) = 2000000 + i; - a_team3(i) = 3000000 + i; + a_team1( i ) = 1000000 + i + team.league_rank() * 100000; + a_team2( i ) = 2000000 + i + team.league_rank() * 100000; + a_team3( i ) = 3000000 + i + team.league_rank() * 100000; }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i) + + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 16 ), [&] ( const int & i ) { - a_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; - a_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; - a_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; + a_thread1( i ) = 1000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; + a_thread2( i ) = 2000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; + a_thread3( i ) = 3000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, 0, 128000 ), [&] ( const int & i ) { - b_team1(i) = 1000000 + i; - b_team2(i) = 2000000 + i; - b_team3(i) = 3000000 + i; + b_team1( i ) = 1000000 + i + team.league_rank() * 100000; + b_team2( i ) = 2000000 + i + team.league_rank() * 100000; + b_team3( i ) = 3000000 + i + team.league_rank() * 100000; }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i) + + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 16000 ), [&] ( const int & i ) { - b_thread1(i) = 1000000 + 100000*team.team_rank() + 16-i; - b_thread2(i) = 2000000 + 100000*team.team_rank() + 16-i; - b_thread3(i) = 3000000 + 100000*team.team_rank() + 16-i; + b_thread1( i ) = 1000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; + b_thread2( i ) = 2000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; + b_thread3( i ) = 3000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000; }); team.team_barrier(); + int error = 0; - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128), [&] (const int& i) + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, 0, 128 ), [&] ( const int & i ) { - if(a_team1(i) != 1000000 + i) error++; - if(a_team2(i) != 2000000 + i) error++; - if(a_team3(i) != 3000000 + i) error++; + if ( a_team1( i ) != 1000000 + i + team.league_rank() * 100000 ) error++; + if ( a_team2( i ) != 2000000 + i + team.league_rank() * 100000 ) error++; + if ( a_team3( i ) != 3000000 + i + team.league_rank() * 100000 ) error++; }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16), [&] (const int& i) + + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 16 ), [&] ( const int & i ) { - if(a_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; - if(a_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; - if(a_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; + if ( a_thread1( i ) != 1000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; + if ( a_thread2( i ) != 2000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; + if ( a_thread3( i ) != 3000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,128000), [&] (const int& i) + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, 0, 128000 ), [&] ( const int & i ) { - if(b_team1(i) != 1000000 + i) error++; - if(b_team2(i) != 2000000 + i) error++; - if(b_team3(i) != 3000000 + i) error++; + if ( b_team1( i ) != 1000000 + i + team.league_rank() * 100000 ) error++; + if ( b_team2( i ) != 2000000 + i + team.league_rank() * 100000 ) error++; + if ( b_team3( i ) != 3000000 + i + team.league_rank() * 100000 ) error++; }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,16000), [&] (const int& i) + + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 16000 ), [&] ( const int & i ) { - if(b_thread1(i) != 1000000 + 100000*team.team_rank() + 16-i) error++; - if(b_thread2(i) != 2000000 + 100000*team.team_rank() + 16-i) error++; - if( b_thread3(i) != 3000000 + 100000*team.team_rank() + 16-i) error++; + if ( b_thread1( i ) != 1000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; + if ( b_thread2( i ) != 2000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; + if ( b_thread3( i ) != 3000000 + 100000 * team.team_rank() + 16 - i + team.league_rank() * 100000 ) error++; }); return error; @@ -748,93 +758,107 @@ struct TagFor {}; template< class ExecSpace, class ScheduleType > struct ClassNoShmemSizeFunction { - Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors; + typedef typename Kokkos::TeamPolicy< ExecSpace, ScheduleType >::member_type member_type; + + Kokkos::View< int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > errors; KOKKOS_INLINE_FUNCTION - void operator() (const TagFor&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team) const { - int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + void operator()( const TagFor &, const member_type & team ) const { + int error = test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); errors() += error; } KOKKOS_INLINE_FUNCTION - void operator() (const TagReduce&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team, int& error) const { - error += test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + void operator() ( const TagReduce &, const member_type & team, int & error ) const { + error += test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); } void run() { - Kokkos::View<int,ExecSpace> d_errors = Kokkos::View<int,ExecSpace>("Errors"); + Kokkos::View< int, ExecSpace > d_errors = Kokkos::View< int, ExecSpace >( "Errors" ); errors = d_errors; - const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128); - const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16); + const int per_team0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 128 ); + const int per_thread0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 16 ); + + const int per_team1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 128000 ); + const int per_thread1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 16000 ); - const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000); - const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000); { - Kokkos::TeamPolicy<TagFor,ExecSpace,ScheduleType> policy(10,8,16); - Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - *this); - Kokkos::fence(); - typename Kokkos::View<int,ExecSpace>::HostMirror h_errors = Kokkos::create_mirror_view(d_errors); - Kokkos::deep_copy(h_errors,d_errors); - ASSERT_EQ(h_errors(),0); + Kokkos::TeamPolicy< TagFor, ExecSpace, ScheduleType > policy( 10, 8, 16 ); + + Kokkos::parallel_for( policy.set_scratch_size( 0, Kokkos::PerTeam( per_team0 ), Kokkos::PerThread( per_thread0 ) ).set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), *this ); + Kokkos::fence(); + + typename Kokkos::View< int, ExecSpace >::HostMirror h_errors = Kokkos::create_mirror_view( d_errors ); + Kokkos::deep_copy( h_errors, d_errors ); + ASSERT_EQ( h_errors(), 0 ); } { - int error = 0; - Kokkos::TeamPolicy<TagReduce,ExecSpace,ScheduleType> policy(10,8,16); - Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - *this,error); - Kokkos::fence(); - ASSERT_EQ(error,0); + int error = 0; + Kokkos::TeamPolicy< TagReduce, ExecSpace, ScheduleType > policy( 10, 8, 16 ); + + Kokkos::parallel_reduce( policy.set_scratch_size( 0, Kokkos::PerTeam( per_team0 ), Kokkos::PerThread( per_thread0 ) ).set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), *this, error ); + Kokkos::fence(); + + ASSERT_EQ( error, 0 ); } }; }; template< class ExecSpace, class ScheduleType > struct ClassWithShmemSizeFunction { - Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors; + typedef typename Kokkos::TeamPolicy< ExecSpace, ScheduleType >::member_type member_type; + + Kokkos::View< int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > errors; KOKKOS_INLINE_FUNCTION - void operator() (const TagFor&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team) const { - int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + void operator()( const TagFor &, const member_type & team ) const { + int error = test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); errors() += error; } KOKKOS_INLINE_FUNCTION - void operator() (const TagReduce&, const typename Kokkos::TeamPolicy<ExecSpace,ScheduleType>::member_type& team, int& error) const { - error += test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + void operator() ( const TagReduce &, const member_type & team, int & error ) const { + error += test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); } void run() { - Kokkos::View<int,ExecSpace> d_errors = Kokkos::View<int,ExecSpace>("Errors"); + Kokkos::View< int, ExecSpace > d_errors = Kokkos::View< int, ExecSpace >( "Errors" ); errors = d_errors; - const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000); - const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000); + const int per_team1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 128000 ); + const int per_thread1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 16000 ); + { - Kokkos::TeamPolicy<TagFor,ExecSpace,ScheduleType> policy(10,8,16); - Kokkos::parallel_for(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - *this); - Kokkos::fence(); - typename Kokkos::View<int,ExecSpace>::HostMirror h_errors= Kokkos::create_mirror_view(d_errors); - Kokkos::deep_copy(h_errors,d_errors); - ASSERT_EQ(h_errors(),0); + Kokkos::TeamPolicy< TagFor, ExecSpace, ScheduleType > policy( 10, 8, 16 ); + + Kokkos::parallel_for( policy.set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), + Kokkos::PerThread( per_thread1 ) ), + *this ); + Kokkos::fence(); + + typename Kokkos::View< int, ExecSpace >::HostMirror h_errors = Kokkos::create_mirror_view( d_errors ); + Kokkos::deep_copy( h_errors, d_errors ); + ASSERT_EQ( h_errors(), 0 ); } { - int error = 0; - Kokkos::TeamPolicy<TagReduce,ExecSpace,ScheduleType> policy(10,8,16); - Kokkos::parallel_reduce(policy.set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - *this,error); - Kokkos::fence(); - ASSERT_EQ(error,0); + int error = 0; + Kokkos::TeamPolicy< TagReduce, ExecSpace, ScheduleType > policy( 10, 8, 16 ); + + Kokkos::parallel_reduce( policy.set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), + Kokkos::PerThread( per_thread1 ) ), + *this, error ); + Kokkos::fence(); + + ASSERT_EQ( error, 0 ); } }; - unsigned team_shmem_size(int team_size) const { - const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128); - const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16); + unsigned team_shmem_size( int team_size ) const { + const int per_team0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 128 ); + const int per_thread0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 16 ); return per_team0 + team_size * per_thread0; } }; @@ -842,67 +866,68 @@ struct ClassWithShmemSizeFunction { template< class ExecSpace, class ScheduleType > void test_team_mulit_level_scratch_test_lambda() { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - Kokkos::View<int,ExecSpace,Kokkos::MemoryTraits<Kokkos::Atomic> > errors; - Kokkos::View<int,ExecSpace> d_errors("Errors"); + Kokkos::View< int, ExecSpace, Kokkos::MemoryTraits<Kokkos::Atomic> > errors; + Kokkos::View< int, ExecSpace > d_errors( "Errors" ); errors = d_errors; - const int per_team0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128); - const int per_thread0 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16); + const int per_team0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 128 ); + const int per_thread0 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 16 ); + + const int per_team1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 128000 ); + const int per_thread1 = 3 * Kokkos::View< double*, ExecSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged> >::shmem_size( 16000 ); - const int per_team1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(128000); - const int per_thread1 = 3*Kokkos::View<double*,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>::shmem_size(16000); + Kokkos::TeamPolicy< ExecSpace, ScheduleType > policy( 10, 8, 16 ); - Kokkos::TeamPolicy<ExecSpace,ScheduleType> policy(10,8,16); - Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team) { - int error = test_team_mulit_level_scratch_loop_body<ExecSpace>(team); + Kokkos::parallel_for( policy.set_scratch_size( 0, Kokkos::PerTeam( per_team0 ), Kokkos::PerThread( per_thread0 ) ).set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), + KOKKOS_LAMBDA ( const typename Kokkos::TeamPolicy< ExecSpace >::member_type & team ) + { + int error = test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); errors() += error; }); Kokkos::fence(); - typename Kokkos::View<int,ExecSpace>::HostMirror h_errors= Kokkos::create_mirror_view(errors); - Kokkos::deep_copy(h_errors,d_errors); - ASSERT_EQ(h_errors(),0); + + typename Kokkos::View< int, ExecSpace >::HostMirror h_errors = Kokkos::create_mirror_view( errors ); + Kokkos::deep_copy( h_errors, d_errors ); + ASSERT_EQ( h_errors(), 0 ); int error = 0; - Kokkos::parallel_reduce(policy.set_scratch_size(0,Kokkos::PerTeam(per_team0),Kokkos::PerThread(per_thread0)).set_scratch_size(1,Kokkos::PerTeam(per_team1),Kokkos::PerThread(per_thread1)), - KOKKOS_LAMBDA(const typename Kokkos::TeamPolicy<ExecSpace>::member_type& team, int& count) { - count += test_team_mulit_level_scratch_loop_body<ExecSpace>(team); - },error); - ASSERT_EQ(error,0); + Kokkos::parallel_reduce( policy.set_scratch_size( 0, Kokkos::PerTeam( per_team0 ), Kokkos::PerThread( per_thread0 ) ).set_scratch_size( 1, Kokkos::PerTeam( per_team1 ), Kokkos::PerThread( per_thread1 ) ), + KOKKOS_LAMBDA ( const typename Kokkos::TeamPolicy< ExecSpace >::member_type & team, int & count ) + { + count += test_team_mulit_level_scratch_loop_body< ExecSpace >( team ); + }, error ); + ASSERT_EQ( error, 0 ); Kokkos::fence(); #endif } - -} +} // namespace Test namespace { + template< class ExecSpace, class ScheduleType > struct TestMultiLevelScratchTeam { - - TestMultiLevelScratchTeam() - { run(); } + TestMultiLevelScratchTeam() { run(); } void run() { #ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA - Test::test_team_mulit_level_scratch_test_lambda<ExecSpace, ScheduleType>(); + Test::test_team_mulit_level_scratch_test_lambda< ExecSpace, ScheduleType >(); #endif - Test::ClassNoShmemSizeFunction<ExecSpace, ScheduleType> c1; + Test::ClassNoShmemSizeFunction< ExecSpace, ScheduleType > c1; c1.run(); - Test::ClassWithShmemSizeFunction<ExecSpace, ScheduleType> c2; + Test::ClassWithShmemSizeFunction< ExecSpace, ScheduleType > c2; c2.run(); - } }; -} + +} // namespace namespace Test { template< class ExecSpace > struct TestShmemSize { - TestShmemSize() { run(); } void run() @@ -915,9 +940,8 @@ struct TestShmemSize { size_t size = view_type::shmem_size( d1, d2, d3 ); - ASSERT_EQ( size, d1 * d2 * d3 * sizeof(long) ); + ASSERT_EQ( size, d1 * d2 * d3 * sizeof( long ) ); } }; -} -/*--------------------------------------------------------------------------*/ +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp index d9b06c29e49d0362226168861b0d5e818d1d82f9..8d16ac66db8abbf1b5afc3f12aaff7afe0159307 100644 --- a/lib/kokkos/core/unit_test/TestTeamVector.hpp +++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -50,36 +50,47 @@ namespace TestTeamVector { struct my_complex { - double re,im; + double re, im; int dummy; + KOKKOS_INLINE_FUNCTION my_complex() { re = 0.0; im = 0.0; dummy = 0; } + KOKKOS_INLINE_FUNCTION - my_complex(const my_complex& src) { + my_complex( const my_complex & src ) { re = src.re; im = src.im; dummy = src.dummy; } KOKKOS_INLINE_FUNCTION - my_complex(const volatile my_complex& src) { + my_complex & operator=( const my_complex & src ) { re = src.re; im = src.im; dummy = src.dummy; + return *this ; } KOKKOS_INLINE_FUNCTION - my_complex(const double& val) { + my_complex( const volatile my_complex & src ) { + re = src.re; + im = src.im; + dummy = src.dummy; + } + + KOKKOS_INLINE_FUNCTION + my_complex( const double & val ) { re = val; im = 0.0; dummy = 0; } + KOKKOS_INLINE_FUNCTION - my_complex& operator += (const my_complex& src) { + my_complex & operator+=( const my_complex & src ) { re += src.re; im += src.im; dummy += src.dummy; @@ -87,252 +98,278 @@ struct my_complex { } KOKKOS_INLINE_FUNCTION - void operator += (const volatile my_complex& src) volatile { + void operator+=( const volatile my_complex & src ) volatile { re += src.re; im += src.im; dummy += src.dummy; } + KOKKOS_INLINE_FUNCTION - my_complex& operator *= (const my_complex& src) { - double re_tmp = re*src.re - im*src.im; + my_complex & operator*=( const my_complex & src ) { + double re_tmp = re * src.re - im * src.im; double im_tmp = re * src.im + im * src.re; re = re_tmp; im = im_tmp; dummy *= src.dummy; return *this; } + KOKKOS_INLINE_FUNCTION - void operator *= (const volatile my_complex& src) volatile { - double re_tmp = re*src.re - im*src.im; + void operator*=( const volatile my_complex & src ) volatile { + double re_tmp = re * src.re - im * src.im; double im_tmp = re * src.im + im * src.re; re = re_tmp; im = im_tmp; dummy *= src.dummy; } + KOKKOS_INLINE_FUNCTION - bool operator == (const my_complex& src) { - return (re == src.re) && (im == src.im) && ( dummy == src.dummy ); + bool operator==( const my_complex & src ) { + return ( re == src.re ) && ( im == src.im ) && ( dummy == src.dummy ); } + KOKKOS_INLINE_FUNCTION - bool operator != (const my_complex& src) { - return (re != src.re) || (im != src.im) || ( dummy != src.dummy ); + bool operator!=( const my_complex & src ) { + return ( re != src.re ) || ( im != src.im ) || ( dummy != src.dummy ); } + KOKKOS_INLINE_FUNCTION - bool operator != (const double& val) { - return (re != val) || - (im != 0) || (dummy != 0); + bool operator!=( const double & val ) { + return ( re != val ) || ( im != 0 ) || ( dummy != 0 ); } + KOKKOS_INLINE_FUNCTION - my_complex& operator= (const int& val) { + my_complex & operator=( const int & val ) { re = val; im = 0.0; dummy = 0; return *this; } + KOKKOS_INLINE_FUNCTION - my_complex& operator= (const double& val) { + my_complex & operator=( const double & val ) { re = val; im = 0.0; dummy = 0; return *this; } + KOKKOS_INLINE_FUNCTION operator double() { return re; } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_team_for { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_team_for(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + functor_team_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } - typedef typename ExecutionSpace::scratch_memory_space shmem_space ; - typedef Kokkos::View<Scalar*,shmem_space,Kokkos::MemoryUnmanaged> shared_int; + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; typedef typename shared_int::size_type size_type; - const size_type shmemSize = team.team_size () * 13; - shared_int values = shared_int (team.team_shmem (), shmemSize); + const size_type shmemSize = team.team_size() * 13; + shared_int values = shared_int( team.team_shmem(), shmemSize ); - if (values.ptr_on_device () == NULL || values.dimension_0 () < shmemSize) { - printf ("FAILED to allocate shared memory of size %u\n", - static_cast<unsigned int> (shmemSize)); + if ( values.ptr_on_device() == NULL || values.dimension_0() < shmemSize ) { + printf( "FAILED to allocate shared memory of size %u\n", + static_cast<unsigned int>( shmemSize ) ); } else { + // Initialize shared memory. + values( team.team_rank() ) = 0; - // Initialize shared memory - values(team.team_rank ()) = 0; - - // Accumulate value into per thread shared memory - // This is non blocking - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) + // Accumulate value into per thread shared memory. + // This is non blocking. + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i ) { - values(team.team_rank ()) += i - team.league_rank () + team.league_size () + team.team_size (); + values( team.team_rank() ) += i - team.league_rank() + team.league_size() + team.team_size(); }); - // Wait for all memory to be written - team.team_barrier (); - // One thread per team executes the comparison - Kokkos::single(Kokkos::PerTeam(team),[&]() + + // Wait for all memory to be written. + team.team_barrier(); + + // One thread per team executes the comparison. + Kokkos::single( Kokkos::PerTeam( team ), [&] () { - Scalar test = 0; - Scalar value = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); - } - for (int i = 0; i < team.team_size (); ++i) { - value += values(i); - } - if (test != value) { - printf ("FAILED team_parallel_for %i %i %f %f\n", - team.league_rank (), team.team_rank (), - static_cast<double> (test), static_cast<double> (value)); - flag() = 1; - } + Scalar test = 0; + Scalar value = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); + } + + for ( int i = 0; i < team.team_size(); ++i ) { + value += values( i ); + } + + if ( test != value ) { + printf ( "FAILED team_parallel_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast<double>( test ), static_cast<double>( value ) ); + flag() = 1; + } }); } } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_team_reduce { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_team_reduce(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + functor_team_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { Scalar value = Scalar(); - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) { - val += i - team.league_rank () + team.league_size () + team.team_size (); - },value); + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, value ); - team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() - { - Scalar test = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); - } - if (test != value) { - if(team.league_rank() == 0) - printf ("FAILED team_parallel_reduce %i %i %f %f %lu\n", - team.league_rank (), team.team_rank (), - static_cast<double> (test), static_cast<double> (value),sizeof(Scalar)); - flag() = 1; - } + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () + { + Scalar test = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); + } + + if ( test != value ) { + if ( team.league_rank() == 0 ) { + printf( "FAILED team_parallel_reduce %i %i %f %f %lu\n", + team.league_rank(), team.team_rank(), + static_cast<double>( test ), static_cast<double>( value ), sizeof( Scalar ) ); + } + + flag() = 1; + } }); } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_team_reduce_join { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_team_reduce_join(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + functor_team_reduce_join( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { Scalar value = 0; - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131) - , [&] (int i, Scalar& val) - { - val += i - team.league_rank () + team.league_size () + team.team_size (); - } - , [&] (volatile Scalar& val, const volatile Scalar& src) - {val+=src;} - , value + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, + [] ( volatile Scalar & val, const volatile Scalar & src ) { val += src; }, + value ); - team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () { - Scalar test = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); - } - if (test != value) { - printf ("FAILED team_vector_parallel_reduce_join %i %i %f %f\n", - team.league_rank (), team.team_rank (), - static_cast<double> (test), static_cast<double> (value)); - flag() = 1; - } + Scalar test = 0; + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); + } + + if ( test != value ) { + printf( "FAILED team_vector_parallel_reduce_join %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast<double>( test ), static_cast<double>( value ) ); + + flag() = 1; + } }); } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_team_vector_for { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_team_vector_for(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + functor_team_vector_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } - typedef typename ExecutionSpace::scratch_memory_space shmem_space ; - typedef Kokkos::View<Scalar*,shmem_space,Kokkos::MemoryUnmanaged> shared_int; + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; typedef typename shared_int::size_type size_type; - const size_type shmemSize = team.team_size () * 13; - shared_int values = shared_int (team.team_shmem (), shmemSize); + const size_type shmemSize = team.team_size() * 13; + shared_int values = shared_int( team.team_shmem(), shmemSize ); - if (values.ptr_on_device () == NULL || values.dimension_0 () < shmemSize) { - printf ("FAILED to allocate shared memory of size %u\n", - static_cast<unsigned int> (shmemSize)); + if ( values.ptr_on_device() == NULL || values.dimension_0() < shmemSize ) { + printf( "FAILED to allocate shared memory of size %u\n", + static_cast<unsigned int>( shmemSize ) ); } else { - Kokkos::single(Kokkos::PerThread(team),[&] () + team.team_barrier(); + + Kokkos::single( Kokkos::PerThread( team ), [&] () { - values(team.team_rank ()) = 0; + values( team.team_rank() ) = 0; }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,131),[&] (int i) + Kokkos::parallel_for( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i ) { - Kokkos::single(Kokkos::PerThread(team),[&] () + Kokkos::single( Kokkos::PerThread( team ), [&] () { - values(team.team_rank ()) += i - team.league_rank () + team.league_size () + team.team_size (); + values( team.team_rank() ) += i - team.league_rank() + team.league_size() + team.team_size(); }); }); - team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () { Scalar test = 0; Scalar value = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); } - for (int i = 0; i < team.team_size (); ++i) { - value += values(i); + + for ( int i = 0; i < team.team_size(); ++i ) { + value += values( i ); } - if (test != value) { - printf ("FAILED team_vector_parallel_for %i %i %f %f\n", - team.league_rank (), team.team_rank (), - static_cast<double> (test), static_cast<double> (value)); + + if ( test != value ) { + printf( "FAILED team_vector_parallel_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast<double>( test ), static_cast<double>( value ) ); + flag() = 1; } }); @@ -340,164 +377,176 @@ struct functor_team_vector_for { } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_team_vector_reduce { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_team_vector_reduce(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + functor_team_vector_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - + void operator()( typename policy_type::member_type team ) const { Scalar value = Scalar(); - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131),[&] (int i, Scalar& val) + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) { - val += i - team.league_rank () + team.league_size () + team.team_size (); - },value); + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, value ); - team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () { Scalar test = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); } - if (test != value) { - if(team.league_rank() == 0) - printf ("FAILED team_vector_parallel_reduce %i %i %f %f %lu\n", - team.league_rank (), team.team_rank (), - static_cast<double> (test), static_cast<double> (value),sizeof(Scalar)); - flag() = 1; + + if ( test != value ) { + if ( team.league_rank() == 0 ) { + printf( "FAILED team_vector_parallel_reduce %i %i %f %f %lu\n", + team.league_rank(), team.team_rank(), + static_cast<double>( test ), static_cast<double>( value ), sizeof( Scalar ) ); + } + + flag() = 1; } }); } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_team_vector_reduce_join { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_team_vector_reduce_join(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + functor_team_vector_reduce_join( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } + KOKKOS_INLINE_FUNCTION + void operator()( typename policy_type::member_type team ) const { Scalar value = 0; - Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,131) - , [&] (int i, Scalar& val) - { - val += i - team.league_rank () + team.league_size () + team.team_size (); - } - , [&] (volatile Scalar& val, const volatile Scalar& src) - {val+=src;} - , value + + Kokkos::parallel_reduce( Kokkos::TeamThreadRange( team, 131 ), [&] ( int i, Scalar & val ) + { + val += i - team.league_rank() + team.league_size() + team.team_size(); + }, + [] ( volatile Scalar & val, const volatile Scalar & src ) { val += src; }, + value ); - team.team_barrier (); - Kokkos::single(Kokkos::PerTeam(team),[&]() + team.team_barrier(); + + Kokkos::single( Kokkos::PerTeam( team ), [&] () { Scalar test = 0; - for (int i = 0; i < 131; ++i) { - test += i - team.league_rank () + team.league_size () + team.team_size (); + + for ( int i = 0; i < 131; ++i ) { + test += i - team.league_rank() + team.league_size() + team.team_size(); } - if (test != value) { - printf ("FAILED team_vector_parallel_reduce_join %i %i %f %f\n", - team.league_rank (), team.team_rank (), - static_cast<double> (test), static_cast<double> (value)); + + if ( test != value ) { + printf( "FAILED team_vector_parallel_reduce_join %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast<double>( test ), static_cast<double>( value ) ); + flag() = 1; } }); } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_vec_single { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_vec_single(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + functor_vec_single( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - - // Warning: this test case intentionally violates permissable semantics + void operator()( typename policy_type::member_type team ) const { + // Warning: this test case intentionally violates permissable semantics. // It is not valid to get references to members of the enclosing region // inside a parallel_for and write to it. Scalar value = 0; - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13),[&] (int i) + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i ) { - value = i; // This write is violating Kokkos semantics for nested parallelism + value = i; // This write is violating Kokkos semantics for nested parallelism. }); - Kokkos::single(Kokkos::PerThread(team),[&] (Scalar& val) + Kokkos::single( Kokkos::PerThread( team ), [&] ( Scalar & val ) { val = 1; - },value); + }, value ); Scalar value2 = 0; - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13), [&] (int i, Scalar& val) + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i, Scalar & val ) { val += value; - },value2); + }, value2 ); + + if ( value2 != ( value * 13 ) ) { + printf( "FAILED vector_single broadcast %i %i %f %f\n", + team.league_rank(), team.team_rank(), (double) value2, (double) value ); - if(value2!=(value*13)) { - printf("FAILED vector_single broadcast %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) value2,(double) value); - flag()=1; + flag() = 1; } } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_vec_for { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_vec_for(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_vec_for( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} - unsigned team_shmem_size(int team_size) const {return team_size*13*sizeof(Scalar)+8;} + unsigned team_shmem_size( int team_size ) const { return team_size * 13 * sizeof( Scalar ) + 8; } KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { + void operator()( typename policy_type::member_type team ) const { + typedef typename ExecutionSpace::scratch_memory_space shmem_space; + typedef Kokkos::View< Scalar*, shmem_space, Kokkos::MemoryUnmanaged > shared_int; - typedef typename ExecutionSpace::scratch_memory_space shmem_space ; - typedef Kokkos::View<Scalar*,shmem_space,Kokkos::MemoryUnmanaged> shared_int; - shared_int values = shared_int(team.team_shmem(),team.team_size()*13); + shared_int values = shared_int( team.team_shmem(), team.team_size() * 13 ); - if (values.ptr_on_device () == NULL || - values.dimension_0() < (unsigned) team.team_size() * 13) { - printf ("FAILED to allocate memory of size %i\n", - static_cast<int> (team.team_size () * 13)); + if ( values.ptr_on_device() == NULL || values.dimension_0() < (unsigned) team.team_size() * 13 ) { + printf( "FAILED to allocate memory of size %i\n", static_cast<int>( team.team_size() * 13 ) ); flag() = 1; } else { - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,13), [&] (int i) + Kokkos::parallel_for( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i ) { - values(13*team.team_rank() + i) = i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size(); + values( 13 * team.team_rank() + i ) = + i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size(); }); - Kokkos::single(Kokkos::PerThread(team),[&] () + Kokkos::single( Kokkos::PerThread( team ), [&] () { Scalar test = 0; Scalar value = 0; - for (int i = 0; i < 13; ++i) { + + for ( int i = 0; i < 13; ++i ) { test += i - team.team_rank() - team.league_rank() + team.league_size() + team.team_size(); - value += values(13*team.team_rank() + i); + value += values( 13 * team.team_rank() + i ); } - if (test != value) { - printf ("FAILED vector_par_for %i %i %f %f\n", - team.league_rank (), team.team_rank (), - static_cast<double> (test), static_cast<double> (value)); + + if ( test != value ) { + printf( "FAILED vector_par_for %i %i %f %f\n", + team.league_rank(), team.team_rank(), + static_cast<double>( test ), static_cast<double>( value ) ); + flag() = 1; } }); @@ -505,169 +554,192 @@ struct functor_vec_for { } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_vec_red { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_vec_red(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_vec_red( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { + void operator()( typename policy_type::member_type team ) const { Scalar value = 0; - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val) + // When no reducer is given the default is summation. + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i, Scalar & val ) { val += i; - }, value); + }, value ); - Kokkos::single(Kokkos::PerThread(team),[&] () + Kokkos::single( Kokkos::PerThread( team ), [&] () { Scalar test = 0; - for(int i = 0; i < 13; i++) { - test+=i; - } - if(test!=value) { - printf("FAILED vector_par_reduce %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) test,(double) value); - flag()=1; + + for ( int i = 0; i < 13; i++ ) test += i; + + if ( test != value ) { + printf( "FAILED vector_par_reduce %i %i %f %f\n", + team.league_rank(), team.team_rank(), (double) test, (double) value ); + + flag() = 1; } }); } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_vec_red_join { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_vec_red_join(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + + functor_vec_red_join( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { + void operator()( typename policy_type::member_type team ) const { + // Must initialize to the identity value for the reduce operation + // for this test: + // ( identity, operation ) = ( 1 , *= ) Scalar value = 1; - Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,13) - , [&] (int i, Scalar& val) - { val *= i; } - , [&] (Scalar& val, const Scalar& src) - {val*=src;} - , value + Kokkos::parallel_reduce( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i, Scalar & val ) + { + val *= ( i % 5 + 1 ); + }, + [&] ( Scalar & val, const Scalar & src ) { val *= src; }, + value ); - Kokkos::single(Kokkos::PerThread(team),[&] () + Kokkos::single( Kokkos::PerThread( team ), [&] () { Scalar test = 1; - for(int i = 0; i < 13; i++) { - test*=i; - } - if(test!=value) { - printf("FAILED vector_par_reduce_join %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) test,(double) value); - flag()=1; + + for ( int i = 0; i < 13; i++ ) test *= ( i % 5 + 1 ); + + if ( test != value ) { + printf( "FAILED vector_par_reduce_join %i %i %f %f\n", + team.league_rank(), team.team_rank(), (double) test, (double) value ); + + flag() = 1; } }); } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_vec_scan { - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_vec_scan(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + functor_vec_scan( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team) const { - Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team,13),[&] (int i, Scalar& val, bool final) + void operator()( typename policy_type::member_type team ) const { + Kokkos::parallel_scan( Kokkos::ThreadVectorRange( team, 13 ), [&] ( int i, Scalar & val, bool final ) { val += i; - if(final) { + + if ( final ) { Scalar test = 0; - for(int k = 0; k <= i; k++) { - test+=k; - } - if(test!=val) { - printf("FAILED vector_par_scan %i %i %f %f\n",team.league_rank(),team.team_rank(),(double) test,(double) val); - flag()=1; + for ( int k = 0; k <= i; k++ ) test += k; + + if ( test != val ) { + printf( "FAILED vector_par_scan %i %i %f %f\n", + team.league_rank(), team.team_rank(), (double) test, (double) val ); + + flag() = 1; } } }); } }; -template<typename Scalar, class ExecutionSpace> +template< typename Scalar, class ExecutionSpace > struct functor_reduce { typedef double value_type; - typedef Kokkos::TeamPolicy<ExecutionSpace> policy_type; + typedef Kokkos::TeamPolicy< ExecutionSpace > policy_type; typedef ExecutionSpace execution_space; - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag; - functor_reduce(Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> flag_):flag(flag_) {} + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag; + functor_reduce( Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > flag_ ) : flag( flag_ ) {} KOKKOS_INLINE_FUNCTION - void operator() (typename policy_type::member_type team, double& sum) const { + void operator()( typename policy_type::member_type team, double & sum ) const { sum += team.league_rank() * 100 + team.thread_rank(); } }; -template<typename Scalar,class ExecutionSpace> -bool test_scalar(int nteams, int team_size, int test) { - Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace> d_flag("flag"); - typename Kokkos::View<int,Kokkos::LayoutLeft,ExecutionSpace>::HostMirror h_flag("h_flag"); - h_flag() = 0 ; - Kokkos::deep_copy(d_flag,h_flag); - - if(test==0) - Kokkos::parallel_for( std::string("A") , Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), - functor_vec_red<Scalar, ExecutionSpace>(d_flag)); - if(test==1) - Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), - functor_vec_red_join<Scalar, ExecutionSpace>(d_flag)); - if(test==2) - Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), - functor_vec_scan<Scalar, ExecutionSpace>(d_flag)); - if(test==3) - Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), - functor_vec_for<Scalar, ExecutionSpace>(d_flag)); - if(test==4) - Kokkos::parallel_for( "B" , Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), - functor_vec_single<Scalar, ExecutionSpace>(d_flag)); - if(test==5) - Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size), - functor_team_for<Scalar, ExecutionSpace>(d_flag)); - if(test==6) - Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size), - functor_team_reduce<Scalar, ExecutionSpace>(d_flag)); - if(test==7) - Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size), - functor_team_reduce_join<Scalar, ExecutionSpace>(d_flag)); - if(test==8) - Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), - functor_team_vector_for<Scalar, ExecutionSpace>(d_flag)); - if(test==9) - Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), - functor_team_vector_reduce<Scalar, ExecutionSpace>(d_flag)); - if(test==10) - Kokkos::parallel_for( Kokkos::TeamPolicy<ExecutionSpace>(nteams,team_size,8), - functor_team_vector_reduce_join<Scalar, ExecutionSpace>(d_flag)); - - Kokkos::deep_copy(h_flag,d_flag); - - return (h_flag() == 0); +template< typename Scalar, class ExecutionSpace > +bool test_scalar( int nteams, int team_size, int test ) { + Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace > d_flag( "flag" ); + typename Kokkos::View< int, Kokkos::LayoutLeft, ExecutionSpace >::HostMirror h_flag( "h_flag" ); + h_flag() = 0; + Kokkos::deep_copy( d_flag, h_flag ); + + if ( test == 0 ) { + Kokkos::parallel_for( std::string( "A" ), Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_vec_red< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 1 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_vec_red_join< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 2 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_vec_scan< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 3 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_vec_for< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 4 ) { + Kokkos::parallel_for( "B", Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_vec_single< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 5 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ), + functor_team_for< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 6 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ), + functor_team_reduce< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 7 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size ), + functor_team_reduce_join< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 8 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_team_vector_for< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 9 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_team_vector_reduce< Scalar, ExecutionSpace >( d_flag ) ); + } + else if ( test == 10 ) { + Kokkos::parallel_for( Kokkos::TeamPolicy< ExecutionSpace >( nteams, team_size, 8 ), + functor_team_vector_reduce_join< Scalar, ExecutionSpace >( d_flag ) ); + } + + Kokkos::deep_copy( h_flag, d_flag ); + + return ( h_flag() == 0 ); } -template<class ExecutionSpace> -bool Test(int test) { +template< class ExecutionSpace > +bool Test( int test ) { bool passed = true; - passed = passed && test_scalar<int, ExecutionSpace>(317,33,test); - passed = passed && test_scalar<long long int, ExecutionSpace>(317,33,test); - passed = passed && test_scalar<float, ExecutionSpace>(317,33,test); - passed = passed && test_scalar<double, ExecutionSpace>(317,33,test); - passed = passed && test_scalar<my_complex, ExecutionSpace>(317,33,test); - return passed; -} + passed = passed && test_scalar< int, ExecutionSpace >( 317, 33, test ); + passed = passed && test_scalar< long long int, ExecutionSpace >( 317, 33, test ); + passed = passed && test_scalar< float, ExecutionSpace >( 317, 33, test ); + passed = passed && test_scalar< double, ExecutionSpace >( 317, 33, test ); + passed = passed && test_scalar< my_complex, ExecutionSpace >( 317, 33, test ); + return passed; } +} // namespace TestTeamVector diff --git a/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp b/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp index 203c9526792f8a5bbef9dbcb0582ce2d8d3a80e2..7bcf3f8a32691ee8a27bac5ed997ed68c6c39082 100644 --- a/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp +++ b/lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp @@ -47,152 +47,162 @@ namespace { -template<class Scalar, class ExecutionSpace> +template< class Scalar, class ExecutionSpace > struct SumPlain { typedef ExecutionSpace execution_space; - typedef typename Kokkos::View<Scalar*,execution_space> type; + typedef typename Kokkos::View< Scalar*, execution_space > type; + type view; - SumPlain(type view_):view(view_) {} + + SumPlain( type view_ ) : view( view_ ) {} KOKKOS_INLINE_FUNCTION - void operator() (int i, Scalar& val) { + void operator() ( int i, Scalar & val ) { val += Scalar(); } }; -template<class Scalar, class ExecutionSpace> +template< class Scalar, class ExecutionSpace > struct SumInitJoinFinalValueType { typedef ExecutionSpace execution_space; - typedef typename Kokkos::View<Scalar*,execution_space> type; - type view; + typedef typename Kokkos::View< Scalar*, execution_space > type; typedef Scalar value_type; - SumInitJoinFinalValueType(type view_):view(view_) {} + + type view; + + SumInitJoinFinalValueType( type view_ ) : view( view_ ) {} KOKKOS_INLINE_FUNCTION - void init(value_type& val) const { + void init( value_type & val ) const { val = value_type(); } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& val, volatile value_type& src) const { + void join( volatile value_type & val, volatile value_type & src ) const { val += src; } KOKKOS_INLINE_FUNCTION - void operator() (int i, value_type& val) const { + void operator()( int i, value_type & val ) const { val += value_type(); } - }; -template<class Scalar, class ExecutionSpace> +template< class Scalar, class ExecutionSpace > struct SumInitJoinFinalValueType2 { typedef ExecutionSpace execution_space; - typedef typename Kokkos::View<Scalar*,execution_space> type; - type view; + typedef typename Kokkos::View< Scalar*, execution_space > type; typedef Scalar value_type; - SumInitJoinFinalValueType2(type view_):view(view_) {} + + type view; + + SumInitJoinFinalValueType2( type view_ ) : view( view_ ) {} KOKKOS_INLINE_FUNCTION - void init(volatile value_type& val) const { + void init( volatile value_type & val ) const { val = value_type(); } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& val, const volatile value_type& src) const { + void join( volatile value_type & val, const volatile value_type & src ) const { val += src; } KOKKOS_INLINE_FUNCTION - void operator() (int i, value_type& val) const { + void operator()( int i, value_type & val ) const { val += value_type(); } - }; -template<class Scalar, class ExecutionSpace> +template< class Scalar, class ExecutionSpace > struct SumInitJoinFinalValueTypeArray { typedef ExecutionSpace execution_space; - typedef typename Kokkos::View<Scalar*,execution_space> type; - type view; + typedef typename Kokkos::View< Scalar*, execution_space > type; typedef Scalar value_type[]; + + type view; int n; - SumInitJoinFinalValueTypeArray(type view_, int n_):view(view_),n(n_) {} + + SumInitJoinFinalValueTypeArray( type view_, int n_ ) : view( view_ ), n( n_ ) {} KOKKOS_INLINE_FUNCTION - void init(value_type val) const { - for(int k=0;k<n;k++) + void init( value_type val ) const { + for ( int k = 0; k < n; k++ ) { val[k] = 0; + } } KOKKOS_INLINE_FUNCTION - void join(volatile value_type val, const volatile value_type src) const { - for(int k=0;k<n;k++) + void join( volatile value_type val, const volatile value_type src ) const { + for ( int k = 0; k < n; k++ ) { val[k] += src[k]; + } } KOKKOS_INLINE_FUNCTION - void operator() (int i, value_type val) const { - for(int k=0;k<n;k++) - val[k] += k*i; + void operator()( int i, value_type val ) const { + for ( int k = 0; k < n; k++ ) { + val[k] += k * i; + } } - }; -template<class Scalar, class ExecutionSpace> +template< class Scalar, class ExecutionSpace > struct SumWrongInitJoinFinalValueType { typedef ExecutionSpace execution_space; - typedef typename Kokkos::View<Scalar*,execution_space> type; - type view; + typedef typename Kokkos::View< Scalar*, execution_space > type; typedef Scalar value_type; - SumWrongInitJoinFinalValueType(type view_):view(view_) {} + + type view; + + SumWrongInitJoinFinalValueType( type view_ ) : view( view_ ) {} KOKKOS_INLINE_FUNCTION - void init(double& val) const { + void init( double & val ) const { val = double(); } KOKKOS_INLINE_FUNCTION - void join(volatile value_type& val, const value_type& src) const { + void join( volatile value_type & val, const value_type & src ) const { val += src; } KOKKOS_INLINE_FUNCTION - void operator() (int i, value_type& val) const { + void operator()( int i, value_type & val ) const { val += value_type(); } - }; -template<class Scalar, class ExecutionSpace> +template< class Scalar, class ExecutionSpace > void TestTemplateMetaFunctions() { - typedef typename Kokkos::View<Scalar*,ExecutionSpace> type; - type a("A",100); + typedef typename Kokkos::View< Scalar*, ExecutionSpace > type; + type a( "A", 100 ); /* - int sum_plain_has_init_arg = Kokkos::Impl::FunctorHasInit<SumPlain<Scalar,ExecutionSpace>, Scalar& >::value; - ASSERT_EQ(sum_plain_has_init_arg,0); - int sum_initjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit<SumInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar >::value; - ASSERT_EQ(sum_initjoinfinalvaluetype_has_init_arg,1); - int sum_initjoinfinalvaluetype_has_init_arg2 = Kokkos::Impl::FunctorHasInit<SumInitJoinFinalValueType2<Scalar,ExecutionSpace>, Scalar >::value; - ASSERT_EQ(sum_initjoinfinalvaluetype_has_init_arg2,1); - int sum_wronginitjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit<SumWrongInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar >::value; - ASSERT_EQ(sum_wronginitjoinfinalvaluetype_has_init_arg,0); - - //int sum_initjoinfinalvaluetypearray_has_init_arg = Kokkos::Impl::FunctorHasInit<SumInitJoinFinalValueTypeArray<Scalar,ExecutionSpace>, Scalar[] >::value; - //ASSERT_EQ(sum_initjoinfinalvaluetypearray_has_init_arg,1); - - //printf("Values Init: %i %i %i\n",sum_plain_has_init_arg,sum_initjoinfinalvaluetype_has_init_arg,sum_wronginitjoinfinalvaluetype_has_init_arg); - - int sum_plain_has_join_arg = Kokkos::Impl::FunctorHasJoin<SumPlain<Scalar,ExecutionSpace>, Scalar >::value; - ASSERT_EQ(sum_plain_has_join_arg,0); - int sum_initjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin<SumInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar >::value; - ASSERT_EQ(sum_initjoinfinalvaluetype_has_join_arg,1); - int sum_initjoinfinalvaluetype_has_join_arg2 = Kokkos::Impl::FunctorHasJoin<SumInitJoinFinalValueType2<Scalar,ExecutionSpace>, Scalar >::value; - ASSERT_EQ(sum_initjoinfinalvaluetype_has_join_arg2,1); - int sum_wronginitjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin<SumWrongInitJoinFinalValueType<Scalar,ExecutionSpace>, Scalar >::value; - ASSERT_EQ(sum_wronginitjoinfinalvaluetype_has_join_arg,0); + int sum_plain_has_init_arg = Kokkos::Impl::FunctorHasInit< SumPlain<Scalar, ExecutionSpace>, Scalar & >::value; + ASSERT_EQ( sum_plain_has_init_arg, 0 ); + int sum_initjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit< SumInitJoinFinalValueType<Scalar, ExecutionSpace>, Scalar >::value; + ASSERT_EQ( sum_initjoinfinalvaluetype_has_init_arg, 1 ); + int sum_initjoinfinalvaluetype_has_init_arg2 = Kokkos::Impl::FunctorHasInit< SumInitJoinFinalValueType2<Scalar,ExecutionSpace>, Scalar >::value; + ASSERT_EQ( sum_initjoinfinalvaluetype_has_init_arg2, 1 ); + int sum_wronginitjoinfinalvaluetype_has_init_arg = Kokkos::Impl::FunctorHasInit< SumWrongInitJoinFinalValueType<Scalar, ExecutionSpace>, Scalar >::value; + ASSERT_EQ( sum_wronginitjoinfinalvaluetype_has_init_arg, 0 ); + + //int sum_initjoinfinalvaluetypearray_has_init_arg = Kokkos::Impl::FunctorHasInit< SumInitJoinFinalValueTypeArray<Scalar, ExecutionSpace>, Scalar[] >::value; + //ASSERT_EQ( sum_initjoinfinalvaluetypearray_has_init_arg, 1 ); + + //printf( "Values Init: %i %i %i\n", sum_plain_has_init_arg, sum_initjoinfinalvaluetype_has_init_arg, sum_wronginitjoinfinalvaluetype_has_init_arg ); + + int sum_plain_has_join_arg = Kokkos::Impl::FunctorHasJoin< SumPlain<Scalar, ExecutionSpace>, Scalar >::value; + ASSERT_EQ( sum_plain_has_join_arg, 0 ); + int sum_initjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin< SumInitJoinFinalValueType<Scalar, ExecutionSpace>, Scalar >::value; + ASSERT_EQ( sum_initjoinfinalvaluetype_has_join_arg, 1 ); + int sum_initjoinfinalvaluetype_has_join_arg2 = Kokkos::Impl::FunctorHasJoin< SumInitJoinFinalValueType2<Scalar, ExecutionSpace>, Scalar >::value; + ASSERT_EQ( sum_initjoinfinalvaluetype_has_join_arg2, 1 ); + int sum_wronginitjoinfinalvaluetype_has_join_arg = Kokkos::Impl::FunctorHasJoin< SumWrongInitJoinFinalValueType<Scalar, ExecutionSpace>, Scalar >::value; + ASSERT_EQ( sum_wronginitjoinfinalvaluetype_has_join_arg, 0 ); + + //printf( "Values Join: %i %i %i\n", sum_plain_has_join_arg, sum_initjoinfinalvaluetype_has_join_arg, sum_wronginitjoinfinalvaluetype_has_join_arg ); */ - //printf("Values Join: %i %i %i\n",sum_plain_has_join_arg,sum_initjoinfinalvaluetype_has_join_arg,sum_wronginitjoinfinalvaluetype_has_join_arg); } -} +} // namespace diff --git a/lib/kokkos/core/unit_test/TestTile.hpp b/lib/kokkos/core/unit_test/TestTile.hpp index 842131debb69b54ad08fd0eb90836510be50d7ca..7d096c24c38ee82a6930ed192858e538e345dc29 100644 --- a/lib/kokkos/core/unit_test/TestTile.hpp +++ b/lib/kokkos/core/unit_test/TestTile.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,7 +35,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -47,108 +47,96 @@ namespace TestTile { -template < typename Device , typename TileLayout> +template < typename Device, typename TileLayout > struct ReduceTileErrors { - typedef Device execution_space ; - - typedef Kokkos::View< ptrdiff_t**, TileLayout, Device> array_type; - typedef Kokkos::View< ptrdiff_t[ TileLayout::N0 ][ TileLayout::N1 ], Kokkos::LayoutLeft , Device > tile_type ; - - array_type m_array ; - + typedef Device execution_space; + typedef Kokkos::View< ptrdiff_t**, TileLayout, Device > array_type; + typedef Kokkos::View< ptrdiff_t[ TileLayout::N0 ][ TileLayout::N1 ], Kokkos::LayoutLeft, Device > tile_type; typedef ptrdiff_t value_type; - ReduceTileErrors( array_type a ) - : m_array(a) - {} + array_type m_array; + ReduceTileErrors( array_type a ) : m_array( a ) {} KOKKOS_INLINE_FUNCTION - static void init( value_type & errors ) - { - errors = 0; - } + static void init( value_type & errors ) { errors = 0; } KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & errors , + static void join( volatile value_type & errors, const volatile value_type & src_errors ) { errors += src_errors; } - // Initialize + // Initialize. KOKKOS_INLINE_FUNCTION void operator()( size_t iwork ) const { const size_t i = iwork % m_array.dimension_0(); const size_t j = iwork / m_array.dimension_0(); - if ( j < m_array.dimension_1() ) { - m_array(i,j) = & m_array(i,j) - & m_array(0,0); -// printf("m_array(%d,%d) = %d\n",int(i),int(j),int(m_array(i,j))); + if ( j < m_array.dimension_1() ) { + m_array( i, j ) = &m_array( i, j ) - &m_array( 0, 0 ); + //printf( "m_array(%d, %d) = %d\n", int( i ), int( j ), int( m_array( i, j ) ) ); } } // Verify: KOKKOS_INLINE_FUNCTION - void operator()( size_t iwork , value_type & errors ) const + void operator()( size_t iwork, value_type & errors ) const { - const size_t tile_dim0 = ( m_array.dimension_0() + TileLayout::N0 - 1 ) / TileLayout::N0 ; - const size_t tile_dim1 = ( m_array.dimension_1() + TileLayout::N1 - 1 ) / TileLayout::N1 ; + const size_t tile_dim0 = ( m_array.dimension_0() + TileLayout::N0 - 1 ) / TileLayout::N0; + const size_t tile_dim1 = ( m_array.dimension_1() + TileLayout::N1 - 1 ) / TileLayout::N1; - const size_t itile = iwork % tile_dim0 ; - const size_t jtile = iwork / tile_dim0 ; + const size_t itile = iwork % tile_dim0; + const size_t jtile = iwork / tile_dim0; if ( jtile < tile_dim1 ) { + tile_type tile = Kokkos::Experimental::tile_subview( m_array, itile, jtile ); - tile_type tile = Kokkos::Experimental::tile_subview( m_array , itile , jtile ); - - if ( tile(0,0) != ptrdiff_t(( itile + jtile * tile_dim0 ) * TileLayout::N0 * TileLayout::N1 ) ) { - ++errors ; + if ( tile( 0, 0 ) != ptrdiff_t( ( itile + jtile * tile_dim0 ) * TileLayout::N0 * TileLayout::N1 ) ) { + ++errors; } else { + for ( size_t j = 0; j < size_t( TileLayout::N1 ); ++j ) { + for ( size_t i = 0; i < size_t( TileLayout::N0 ); ++i ) { + const size_t iglobal = i + itile * TileLayout::N0; + const size_t jglobal = j + jtile * TileLayout::N1; - for ( size_t j = 0 ; j < size_t(TileLayout::N1) ; ++j ) { - for ( size_t i = 0 ; i < size_t(TileLayout::N0) ; ++i ) { - const size_t iglobal = i + itile * TileLayout::N0 ; - const size_t jglobal = j + jtile * TileLayout::N1 ; - - if ( iglobal < m_array.dimension_0() && jglobal < m_array.dimension_1() ) { - if ( tile(i,j) != ptrdiff_t( tile(0,0) + i + j * TileLayout::N0 ) ) ++errors ; - -// printf("tile(%d,%d)(%d,%d) = %d\n",int(itile),int(jtile),int(i),int(j),int(tile(i,j))); + if ( iglobal < m_array.dimension_0() && jglobal < m_array.dimension_1() ) { + if ( tile( i, j ) != ptrdiff_t( tile( 0, 0 ) + i + j * TileLayout::N0 ) ) ++errors; + //printf( "tile(%d, %d)(%d, %d) = %d\n", int( itile ), int( jtile ), int( i ), int( j ), int( tile( i, j ) ) ); + } } } - } } } } }; -template< class Space , unsigned N0 , unsigned N1 > -void test( const size_t dim0 , const size_t dim1 ) +template< class Space, unsigned N0, unsigned N1 > +void test( const size_t dim0, const size_t dim1 ) { - typedef Kokkos::LayoutTileLeft<N0,N1> array_layout ; - typedef ReduceTileErrors< Space , array_layout > functor_type ; + typedef Kokkos::LayoutTileLeft< N0, N1 > array_layout; + typedef ReduceTileErrors< Space, array_layout > functor_type; - const size_t tile_dim0 = ( dim0 + N0 - 1 ) / N0 ; - const size_t tile_dim1 = ( dim1 + N1 - 1 ) / N1 ; - - typename functor_type::array_type array("",dim0,dim1); + const size_t tile_dim0 = ( dim0 + N0 - 1 ) / N0; + const size_t tile_dim1 = ( dim1 + N1 - 1 ) / N1; - Kokkos::parallel_for( Kokkos::RangePolicy<Space,size_t>(0,dim0*dim1) , functor_type( array ) ); + typename functor_type::array_type array( "", dim0, dim1 ); - ptrdiff_t error = 0 ; + Kokkos::parallel_for( Kokkos::RangePolicy< Space, size_t >( 0, dim0 * dim1 ), functor_type( array ) ); - Kokkos::parallel_reduce( Kokkos::RangePolicy<Space,size_t>(0,tile_dim0*tile_dim1) , functor_type( array ) , error ); + ptrdiff_t error = 0; - EXPECT_EQ( error , ptrdiff_t(0) ); + Kokkos::parallel_reduce( Kokkos::RangePolicy< Space, size_t >( 0, tile_dim0 * tile_dim1 ), functor_type( array ), error ); + + EXPECT_EQ( error, ptrdiff_t( 0 ) ); } -} /* namespace TestTile */ +} // namespace TestTile #endif //TEST_TILE_HPP - diff --git a/lib/kokkos/core/unit_test/TestUtilities.hpp b/lib/kokkos/core/unit_test/TestUtilities.hpp index 947be03e399bee3c23f4c4f333c34c0e6a9d4d08..be4a93b8942cdfd69e97f68b9ea109a2be10de19 100644 --- a/lib/kokkos/core/unit_test/TestUtilities.hpp +++ b/lib/kokkos/core/unit_test/TestUtilities.hpp @@ -49,258 +49,253 @@ #include <Kokkos_Core.hpp> -/*--------------------------------------------------------------------------*/ - namespace Test { inline void test_utilities() { using namespace Kokkos::Impl; + { - using i = integer_sequence<int>; - using j = make_integer_sequence<int,0>; + using i = integer_sequence< int >; + using j = make_integer_sequence< int, 0 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 0u, "Error: integer_sequence.size()" ); } - { - using i = integer_sequence<int,0>; - using j = make_integer_sequence<int,1>; + using i = integer_sequence< int, 0 >; + using j = make_integer_sequence< int, 1 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 1u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); } - { - using i = integer_sequence<int,0,1>; - using j = make_integer_sequence<int,2>; + using i = integer_sequence< int, 0, 1 >; + using j = make_integer_sequence< int, 2 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 2u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence<int,0,1,2>; - using j = make_integer_sequence<int,3>; + using i = integer_sequence< int, 0, 1, 2 >; + using j = make_integer_sequence< int, 3 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 3u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence<int,0,1,2,3>; - using j = make_integer_sequence<int,4>; + using i = integer_sequence< int, 0, 1, 2, 3 >; + using j = make_integer_sequence< int, 4 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 4u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence<int,0,1,2,3,4>; - using j = make_integer_sequence<int,5>; + using i = integer_sequence< int, 0, 1, 2, 3, 4 >; + using j = make_integer_sequence< int, 5 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 5u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence<int,0,1,2,3,4,5>; - using j = make_integer_sequence<int,6>; + using i = integer_sequence< int, 0, 1, 2, 3, 4, 5 >; + using j = make_integer_sequence< int, 6 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 6u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); - - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 5, i >::value == 5, "Error: integer_sequence_at" ); + + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 5, i{} ) == 5, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence<int,0,1,2,3,4,5,6>; - using j = make_integer_sequence<int,7>; + using i = integer_sequence< int, 0, 1, 2, 3, 4, 5, 6 >; + using j = make_integer_sequence< int, 7 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 7u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); - - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 5, i >::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 6, i >::value == 6, "Error: integer_sequence_at" ); + + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 5, i{} ) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 6, i{} ) == 6, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence<int,0,1,2,3,4,5,6,7>; - using j = make_integer_sequence<int,8>; + using i = integer_sequence< int, 0, 1, 2, 3, 4, 5, 6, 7 >; + using j = make_integer_sequence< int, 8 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 8u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" ); - - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 5, i >::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 6, i >::value == 6, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 7, i >::value == 7, "Error: integer_sequence_at" ); + + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 5, i{} ) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 6, i{} ) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 7, i{} ) == 7, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence<int,0,1,2,3,4,5,6,7,8>; - using j = make_integer_sequence<int,9>; + using i = integer_sequence< int, 0, 1, 2, 3, 4, 5, 6, 7, 8 >; + using j = make_integer_sequence< int, 9 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 9u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<8, i>::value == 8, "Error: integer_sequence_at" ); - - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(8, i{}) == 8, "Error: at(unsigned, integer_sequence)" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 5, i >::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 6, i >::value == 6, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 7, i >::value == 7, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 8, i >::value == 8, "Error: integer_sequence_at" ); + + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 5, i{} ) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 6, i{} ) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 7, i{} ) == 7, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 8, i{} ) == 8, "Error: at(unsigned, integer_sequence)" ); } { - using i = integer_sequence<int,0,1,2,3,4,5,6,7,8,9>; - using j = make_integer_sequence<int,10>; + using i = integer_sequence< int, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 >; + using j = make_integer_sequence< int, 10 >; - static_assert( std::is_same<i,j>::value, "Error: make_integer_sequence" ); + static_assert( std::is_same< i, j >::value, "Error: make_integer_sequence" ); static_assert( i::size() == 10u, "Error: integer_sequence.size()" ); - static_assert( integer_sequence_at<0, i>::value == 0, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<1, i>::value == 1, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<2, i>::value == 2, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<3, i>::value == 3, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<4, i>::value == 4, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<5, i>::value == 5, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<6, i>::value == 6, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<7, i>::value == 7, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<8, i>::value == 8, "Error: integer_sequence_at" ); - static_assert( integer_sequence_at<9, i>::value == 9, "Error: integer_sequence_at" ); - - static_assert( at(0, i{}) == 0, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(1, i{}) == 1, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(2, i{}) == 2, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(3, i{}) == 3, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(4, i{}) == 4, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(5, i{}) == 5, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(6, i{}) == 6, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(7, i{}) == 7, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(8, i{}) == 8, "Error: at(unsigned, integer_sequence)" ); - static_assert( at(9, i{}) == 9, "Error: at(unsigned, integer_sequence)" ); + static_assert( integer_sequence_at< 0, i >::value == 0, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 1, i >::value == 1, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 2, i >::value == 2, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 3, i >::value == 3, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 4, i >::value == 4, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 5, i >::value == 5, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 6, i >::value == 6, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 7, i >::value == 7, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 8, i >::value == 8, "Error: integer_sequence_at" ); + static_assert( integer_sequence_at< 9, i >::value == 9, "Error: integer_sequence_at" ); + + static_assert( at( 0, i{} ) == 0, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 1, i{} ) == 1, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 2, i{} ) == 2, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 3, i{} ) == 3, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 4, i{} ) == 4, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 5, i{} ) == 5, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 6, i{} ) == 6, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 7, i{} ) == 7, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 8, i{} ) == 8, "Error: at(unsigned, integer_sequence)" ); + static_assert( at( 9, i{} ) == 9, "Error: at(unsigned, integer_sequence)" ); } { - using i = make_integer_sequence<int, 5>; - using r = reverse_integer_sequence<i>; - using gr = integer_sequence<int, 4, 3, 2, 1, 0>; + using i = make_integer_sequence< int, 5 >; + using r = reverse_integer_sequence< i >; + using gr = integer_sequence< int, 4, 3, 2, 1, 0 >; - static_assert( std::is_same<r,gr>::value, "Error: reverse_integer_sequence" ); + static_assert( std::is_same< r, gr >::value, "Error: reverse_integer_sequence" ); } { - using s = make_integer_sequence<int,10>; - using e = exclusive_scan_integer_sequence<s>; - using i = inclusive_scan_integer_sequence<s>; + using s = make_integer_sequence< int, 10 >; + using e = exclusive_scan_integer_sequence< s >; + using i = inclusive_scan_integer_sequence< s >; - using ge = integer_sequence<int, 0, 0, 1, 3, 6, 10, 15, 21, 28, 36>; - using gi = integer_sequence<int, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45>; + using ge = integer_sequence< int, 0, 0, 1, 3, 6, 10, 15, 21, 28, 36 >; + using gi = integer_sequence< int, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45 >; - static_assert( e::value == 45, "Error: scan value"); - static_assert( i::value == 45, "Error: scan value"); + static_assert( e::value == 45, "Error: scan value" ); + static_assert( i::value == 45, "Error: scan value" ); - static_assert( std::is_same< e::type, ge >::value, "Error: exclusive_scan"); - static_assert( std::is_same< i::type, gi >::value, "Error: inclusive_scan"); + static_assert( std::is_same< e::type, ge >::value, "Error: exclusive_scan" ); + static_assert( std::is_same< i::type, gi >::value, "Error: inclusive_scan" ); } - - } } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewAPI.hpp b/lib/kokkos/core/unit_test/TestViewAPI.hpp index a96f31cc12f227a66097c595e1f0fb44dd17a8c4..cbf86dc58c78fb44442d08497874a667f3923efb 100644 --- a/lib/kokkos/core/unit_test/TestViewAPI.hpp +++ b/lib/kokkos/core/unit_test/TestViewAPI.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -48,103 +48,92 @@ #include <sstream> #include <iostream> -/*--------------------------------------------------------------------------*/ - - -/*--------------------------------------------------------------------------*/ - namespace Test { -template< class T , class ... P > -size_t allocation_count( const Kokkos::View<T,P...> & view ) +template< class T, class ... P > +size_t allocation_count( const Kokkos::View< T, P... > & view ) { const size_t card = view.size(); const size_t alloc = view.span(); - const int memory_span = Kokkos::View<int*>::required_allocation_size(100); + const int memory_span = Kokkos::View< int* >::required_allocation_size( 100 ); - return (card <= alloc && memory_span == 400) ? alloc : 0 ; + return ( card <= alloc && memory_span == 400 ) ? alloc : 0; } /*--------------------------------------------------------------------------*/ -template< typename T, class DeviceType> +template< typename T, class DeviceType > struct TestViewOperator { - typedef typename DeviceType::execution_space execution_space ; + typedef typename DeviceType::execution_space execution_space; - static const unsigned N = 100 ; - static const unsigned D = 3 ; + static const unsigned N = 100; + static const unsigned D = 3; - typedef Kokkos::View< T*[D] , execution_space > view_type ; + typedef Kokkos::View< T*[D], execution_space > view_type; - const view_type v1 ; - const view_type v2 ; + const view_type v1; + const view_type v2; TestViewOperator() - : v1( "v1" , N ) - , v2( "v2" , N ) + : v1( "v1", N ) + , v2( "v2", N ) {} static void testit() { - Kokkos::parallel_for( N , TestViewOperator() ); + Kokkos::parallel_for( N, TestViewOperator() ); } KOKKOS_INLINE_FUNCTION void operator()( const unsigned i ) const { - const unsigned X = 0 ; - const unsigned Y = 1 ; - const unsigned Z = 2 ; + const unsigned X = 0; + const unsigned Y = 1; + const unsigned Z = 2; - v2(i,X) = v1(i,X); - v2(i,Y) = v1(i,Y); - v2(i,Z) = v1(i,Z); + v2( i, X ) = v1( i, X ); + v2( i, Y ) = v1( i, Y ); + v2( i, Z ) = v1( i, Z ); } }; /*--------------------------------------------------------------------------*/ -template< class DataType , - class DeviceType , +template< class DataType, + class DeviceType, unsigned Rank = Kokkos::ViewTraits< DataType >::rank > -struct TestViewOperator_LeftAndRight ; +struct TestViewOperator_LeftAndRight; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 8 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 8 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } - + { update = 0; } - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; + typedef Kokkos::View< DataType, Kokkos::LayoutStride, execution_space > stride_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; - - left_view left ; - right_view right ; - stride_view left_stride ; - stride_view right_stride ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + stride_view left_stride; + stride_view right_stride; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -157,93 +146,89 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 8 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; - - offset = -1 ; - for ( unsigned i7 = 0 ; i7 < unsigned(left.dimension_7()) ; ++i7 ) - for ( unsigned i6 = 0 ; i6 < unsigned(left.dimension_6()) ; ++i6 ) - for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) - for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) - for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + long offset = -1; + + for ( unsigned i7 = 0; i7 < unsigned( left.dimension_7() ); ++i7 ) + for ( unsigned i6 = 0; i6 < unsigned( left.dimension_6() ); ++i6 ) + for ( unsigned i5 = 0; i5 < unsigned( left.dimension_5() ); ++i5 ) + for ( unsigned i4 = 0; i4 < unsigned( left.dimension_4() ); ++i4 ) + for ( unsigned i3 = 0; i3 < unsigned( left.dimension_3() ); ++i3 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2, i3, i4, i5, i6, i7 ) - & left( 0, 0, 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; - if ( & left(i0,i1,i2,i3,i4,i5,i6,i7) != - & left_stride(i0,i1,i2,i3,i4,i5,i6,i7) ) { - update |= 4 ; + if ( & left( i0, i1, i2, i3, i4, i5, i6, i7 ) != + & left_stride( i0, i1, i2, i3, i4, i5, i6, i7 ) ) { + update |= 4; } } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) - for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) - for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) - for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) - for ( unsigned i6 = 0 ; i6 < unsigned(right.dimension_6()) ; ++i6 ) - for ( unsigned i7 = 0 ; i7 < unsigned(right.dimension_7()) ; ++i7 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) + for ( unsigned i3 = 0; i3 < unsigned( right.dimension_3() ); ++i3 ) + for ( unsigned i4 = 0; i4 < unsigned( right.dimension_4() ); ++i4 ) + for ( unsigned i5 = 0; i5 < unsigned( right.dimension_5() ); ++i5 ) + for ( unsigned i6 = 0; i6 < unsigned( right.dimension_6() ); ++i6 ) + for ( unsigned i7 = 0; i7 < unsigned( right.dimension_7() ); ++i7 ) { const long j = & right( i0, i1, i2, i3, i4, i5, i6, i7 ) - & right( 0, 0, 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; - if ( & right(i0,i1,i2,i3,i4,i5,i6,i7) != - & right_stride(i0,i1,i2,i3,i4,i5,i6,i7) ) { - update |= 8 ; + if ( & right( i0, i1, i2, i3, i4, i5, i6, i7 ) != + & right_stride( i0, i1, i2, i3, i4, i5, i6, i7 ) ) { + update |= 8; } } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 7 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 7 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } - - - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + { update = 0; } - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; - left_view left ; - right_view right ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -254,81 +239,77 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 7 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; - - offset = -1 ; - for ( unsigned i6 = 0 ; i6 < unsigned(left.dimension_6()) ; ++i6 ) - for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) - for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) - for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + long offset = -1; + + for ( unsigned i6 = 0; i6 < unsigned( left.dimension_6() ); ++i6 ) + for ( unsigned i5 = 0; i5 < unsigned( left.dimension_5() ); ++i5 ) + for ( unsigned i4 = 0; i4 < unsigned( left.dimension_4() ); ++i4 ) + for ( unsigned i3 = 0; i3 < unsigned( left.dimension_3() ); ++i3 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2, i3, i4, i5, i6 ) - & left( 0, 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) - for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) - for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) - for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) - for ( unsigned i6 = 0 ; i6 < unsigned(right.dimension_6()) ; ++i6 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) + for ( unsigned i3 = 0; i3 < unsigned( right.dimension_3() ); ++i3 ) + for ( unsigned i4 = 0; i4 < unsigned( right.dimension_4() ); ++i4 ) + for ( unsigned i5 = 0; i5 < unsigned( right.dimension_5() ); ++i5 ) + for ( unsigned i6 = 0; i6 < unsigned( right.dimension_6() ); ++i6 ) { const long j = & right( i0, i1, i2, i3, i4, i5, i6 ) - & right( 0, 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 6 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 6 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } - - - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + { update = 0; } - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; - left_view left ; - right_view right ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -339,84 +320,78 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 6 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; - - offset = -1 ; - for ( unsigned i5 = 0 ; i5 < unsigned(left.dimension_5()) ; ++i5 ) - for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) - for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + long offset = -1; + + for ( unsigned i5 = 0; i5 < unsigned( left.dimension_5() ); ++i5 ) + for ( unsigned i4 = 0; i4 < unsigned( left.dimension_4() ); ++i4 ) + for ( unsigned i3 = 0; i3 < unsigned( left.dimension_3() ); ++i3 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2, i3, i4, i5 ) - & left( 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) - for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) - for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) - for ( unsigned i5 = 0 ; i5 < unsigned(right.dimension_5()) ; ++i5 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) + for ( unsigned i3 = 0; i3 < unsigned( right.dimension_3() ); ++i3 ) + for ( unsigned i4 = 0; i4 < unsigned( right.dimension_4() ); ++i4 ) + for ( unsigned i5 = 0; i5 < unsigned( right.dimension_5() ); ++i5 ) { const long j = & right( i0, i1, i2, i3, i4, i5 ) - & right( 0, 0, 0, 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 5 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 5 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } - + { update = 0; } - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; + typedef Kokkos::View< DataType, Kokkos::LayoutStride, execution_space > stride_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; - - left_view left ; - right_view right ; - stride_view left_stride ; - stride_view right_stride ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + stride_view left_stride; + stride_view right_stride; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -429,83 +404,79 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 5 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; - - offset = -1 ; - for ( unsigned i4 = 0 ; i4 < unsigned(left.dimension_4()) ; ++i4 ) - for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + long offset = -1; + + for ( unsigned i4 = 0; i4 < unsigned( left.dimension_4() ); ++i4 ) + for ( unsigned i3 = 0; i3 < unsigned( left.dimension_3() ); ++i3 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2, i3, i4 ) - & left( 0, 0, 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; if ( & left( i0, i1, i2, i3, i4 ) != - & left_stride( i0, i1, i2, i3, i4 ) ) { update |= 4 ; } + & left_stride( i0, i1, i2, i3, i4 ) ) { update |= 4; } } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) - for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) - for ( unsigned i4 = 0 ; i4 < unsigned(right.dimension_4()) ; ++i4 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) + for ( unsigned i3 = 0; i3 < unsigned( right.dimension_3() ); ++i3 ) + for ( unsigned i4 = 0; i4 < unsigned( right.dimension_4() ); ++i4 ) { const long j = & right( i0, i1, i2, i3, i4 ) - & right( 0, 0, 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; if ( & right( i0, i1, i2, i3, i4 ) != - & right_stride( i0, i1, i2, i3, i4 ) ) { update |= 8 ; } + & right_stride( i0, i1, i2, i3, i4 ) ) { update |= 8; } } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 4 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 4 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } - + { update = 0; } - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - left_view left ; - right_view right ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -516,84 +487,78 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 4 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; + long offset = -1; - offset = -1 ; - for ( unsigned i3 = 0 ; i3 < unsigned(left.dimension_3()) ; ++i3 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i3 = 0; i3 < unsigned( left.dimension_3() ); ++i3 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2, i3 ) - & left( 0, 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) - for ( unsigned i3 = 0 ; i3 < unsigned(right.dimension_3()) ; ++i3 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) + for ( unsigned i3 = 0; i3 < unsigned( right.dimension_3() ); ++i3 ) { const long j = & right( i0, i1, i2, i3 ) - & right( 0, 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 3 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } - - - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + { update = 0; } - typedef Kokkos:: - View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; + typedef Kokkos::View< DataType, Kokkos::LayoutStride, execution_space > stride_view; - left_view left ; - right_view right ; - stride_view left_stride ; - stride_view right_stride ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + stride_view left_stride; + stride_view right_stride; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() - : left( std::string("left") ) - , right( std::string("right") ) + : left( std::string( "left" ) ) + , right( std::string( "right" ) ) , left_stride( left ) , right_stride( right ) , left_alloc( allocation_count( left ) ) @@ -602,85 +567,81 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 3 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; + long offset = -1; - offset = -1 ; - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1, i2 ) - & left( 0, 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; - if ( & left(i0,i1,i2) != & left_stride(i0,i1,i2) ) { update |= 4 ; } + if ( & left( i0, i1, i2 ) != & left_stride( i0, i1, i2 ) ) { update |= 4; } } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(right.dimension_2()) ; ++i2 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( right.dimension_2() ); ++i2 ) { const long j = & right( i0, i1, i2 ) - & right( 0, 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; - if ( & right(i0,i1,i2) != & right_stride(i0,i1,i2) ) { update |= 8 ; } + if ( & right( i0, i1, i2 ) != & right_stride( i0, i1, i2 ) ) { update |= 8; } } - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i2 = 0 ; i2 < unsigned(left.dimension_2()) ; ++i2 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i2 = 0; i2 < unsigned( left.dimension_2() ); ++i2 ) { - if ( & left(i0,i1,i2) != & left(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; } - if ( & right(i0,i1,i2) != & right(i0,i1,i2,0,0,0,0,0) ) { update |= 3 ; } + if ( & left( i0, i1, i2 ) != & left( i0, i1, i2, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( & right( i0, i1, i2 ) != & right( i0, i1, i2, 0, 0, 0, 0, 0 ) ) { update |= 3; } } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 2 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 2 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } - - - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + { update = 0; } - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; - left_view left ; - right_view right ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -691,83 +652,77 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 2 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - long offset ; + long offset = -1; - offset = -1 ; - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { const long j = & left( i0, i1 ) - & left( 0, 0 ); - if ( j <= offset || left_alloc <= j ) { update |= 1 ; } - offset = j ; + if ( j <= offset || left_alloc <= j ) { update |= 1; } + offset = j; } - offset = -1 ; - for ( unsigned i0 = 0 ; i0 < unsigned(right.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(right.dimension_1()) ; ++i1 ) + offset = -1; + + for ( unsigned i0 = 0; i0 < unsigned( right.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( right.dimension_1() ); ++i1 ) { const long j = & right( i0, i1 ) - & right( 0, 0 ); - if ( j <= offset || right_alloc <= j ) { update |= 2 ; } - offset = j ; + if ( j <= offset || right_alloc <= j ) { update |= 2; } + offset = j; } - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) - for ( unsigned i1 = 0 ; i1 < unsigned(left.dimension_1()) ; ++i1 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) + for ( unsigned i1 = 0; i1 < unsigned( left.dimension_1() ); ++i1 ) { - if ( & left(i0,i1) != & left(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; } - if ( & right(i0,i1) != & right(i0,i1,0,0,0,0,0,0) ) { update |= 3 ; } + if ( & left( i0, i1 ) != & left( i0, i1, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( & right( i0, i1 ) != & right( i0, i1, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } } } }; -template< class DataType , class DeviceType > -struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > +template< class DataType, class DeviceType > +struct TestViewOperator_LeftAndRight< DataType, DeviceType, 1 > { - typedef typename DeviceType::execution_space execution_space ; - typedef typename DeviceType::memory_space memory_space ; - typedef typename execution_space::size_type size_type ; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + typedef typename execution_space::size_type size_type; - typedef int value_type ; + typedef int value_type; KOKKOS_INLINE_FUNCTION - static void join( volatile value_type & update , + static void join( volatile value_type & update, const volatile value_type & input ) - { update |= input ; } + { update |= input; } KOKKOS_INLINE_FUNCTION static void init( value_type & update ) - { update = 0 ; } - + { update = 0; } - typedef Kokkos:: - View< DataType, Kokkos::LayoutLeft, execution_space > left_view ; + typedef Kokkos::View< DataType, Kokkos::LayoutLeft, execution_space > left_view; + typedef Kokkos::View< DataType, Kokkos::LayoutRight, execution_space > right_view; + typedef Kokkos::View< DataType, Kokkos::LayoutStride, execution_space > stride_view; - typedef Kokkos:: - View< DataType, Kokkos::LayoutRight, execution_space > right_view ; - - typedef Kokkos:: - View< DataType, Kokkos::LayoutStride, execution_space > stride_view ; - - left_view left ; - right_view right ; - stride_view left_stride ; - stride_view right_stride ; - long left_alloc ; - long right_alloc ; + left_view left; + right_view right; + stride_view left_stride; + stride_view right_stride; + long left_alloc; + long right_alloc; TestViewOperator_LeftAndRight() : left( "left" ) @@ -780,78 +735,75 @@ struct TestViewOperator_LeftAndRight< DataType , DeviceType , 1 > static void testit() { - TestViewOperator_LeftAndRight driver ; + TestViewOperator_LeftAndRight driver; - int error_flag = 0 ; + int error_flag = 0; - Kokkos::parallel_reduce( 1 , driver , error_flag ); + Kokkos::parallel_reduce( 1, driver, error_flag ); - ASSERT_EQ( error_flag , 0 ); + ASSERT_EQ( error_flag, 0 ); } KOKKOS_INLINE_FUNCTION - void operator()( const size_type , value_type & update ) const + void operator()( const size_type, value_type & update ) const { - for ( unsigned i0 = 0 ; i0 < unsigned(left.dimension_0()) ; ++i0 ) + for ( unsigned i0 = 0; i0 < unsigned( left.dimension_0() ); ++i0 ) { - if ( & left(i0) != & left(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( & right(i0) != & right(i0,0,0,0,0,0,0,0) ) { update |= 3 ; } - if ( & left(i0) != & left_stride(i0) ) { update |= 4 ; } - if ( & right(i0) != & right_stride(i0) ) { update |= 8 ; } + if ( & left( i0 ) != & left( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( & right( i0 ) != & right( i0, 0, 0, 0, 0, 0, 0, 0 ) ) { update |= 3; } + if ( & left( i0 ) != & left_stride( i0 ) ) { update |= 4; } + if ( & right( i0 ) != & right_stride( i0 ) ) { update |= 8; } } } }; -template<class Layout, class DeviceType> -struct TestViewMirror { - - template<class MemoryTraits> +template< class Layout, class DeviceType > +struct TestViewMirror +{ + template< class MemoryTraits > void static test_mirror() { - Kokkos::View<double*, Layout, Kokkos::HostSpace> a_org("A",1000); - Kokkos::View<double*, Layout, Kokkos::HostSpace, MemoryTraits> a_h = a_org; - auto a_h2 = Kokkos::create_mirror(Kokkos::HostSpace(),a_h); - auto a_d = Kokkos::create_mirror(DeviceType(),a_h); - - int equal_ptr_h_h2 = (a_h.data() ==a_h2.data())?1:0; - int equal_ptr_h_d = (a_h.data() ==a_d. data())?1:0; - int equal_ptr_h2_d = (a_h2.data()==a_d. data())?1:0; - - ASSERT_EQ(equal_ptr_h_h2,0); - ASSERT_EQ(equal_ptr_h_d ,0); - ASSERT_EQ(equal_ptr_h2_d,0); - - - ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); - ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); - } + Kokkos::View< double*, Layout, Kokkos::HostSpace > a_org( "A", 1000 ); + Kokkos::View< double*, Layout, Kokkos::HostSpace, MemoryTraits > a_h = a_org; + auto a_h2 = Kokkos::create_mirror( Kokkos::HostSpace(), a_h ); + auto a_d = Kokkos::create_mirror( DeviceType(), a_h ); + int equal_ptr_h_h2 = ( a_h.data() == a_h2.data() ) ? 1 : 0; + int equal_ptr_h_d = ( a_h.data() == a_d.data() ) ? 1 : 0; + int equal_ptr_h2_d = ( a_h2.data() == a_d.data() ) ? 1 : 0; - template<class MemoryTraits> - void static test_mirror_view() { - Kokkos::View<double*, Layout, Kokkos::HostSpace> a_org("A",1000); - Kokkos::View<double*, Layout, Kokkos::HostSpace, MemoryTraits> a_h = a_org; - auto a_h2 = Kokkos::create_mirror_view(Kokkos::HostSpace(),a_h); - auto a_d = Kokkos::create_mirror_view(DeviceType(),a_h); - - int equal_ptr_h_h2 = a_h.data() ==a_h2.data()?1:0; - int equal_ptr_h_d = a_h.data() ==a_d. data()?1:0; - int equal_ptr_h2_d = a_h2.data()==a_d. data()?1:0; - - int is_same_memspace = std::is_same<Kokkos::HostSpace,typename DeviceType::memory_space>::value?1:0; - ASSERT_EQ(equal_ptr_h_h2,1); - ASSERT_EQ(equal_ptr_h_d ,is_same_memspace); - ASSERT_EQ(equal_ptr_h2_d ,is_same_memspace); + ASSERT_EQ( equal_ptr_h_h2, 0 ); + ASSERT_EQ( equal_ptr_h_d, 0 ); + ASSERT_EQ( equal_ptr_h2_d, 0 ); + ASSERT_EQ( a_h.dimension_0(), a_h2.dimension_0() ); + ASSERT_EQ( a_h.dimension_0(), a_d .dimension_0() ); + } - ASSERT_EQ(a_h.dimension_0(),a_h2.dimension_0()); - ASSERT_EQ(a_h.dimension_0(),a_d .dimension_0()); - } + template< class MemoryTraits > + void static test_mirror_view() { + Kokkos::View< double*, Layout, Kokkos::HostSpace > a_org( "A", 1000 ); + Kokkos::View< double*, Layout, Kokkos::HostSpace, MemoryTraits > a_h = a_org; + auto a_h2 = Kokkos::create_mirror_view( Kokkos::HostSpace(), a_h ); + auto a_d = Kokkos::create_mirror_view( DeviceType(), a_h ); + + int equal_ptr_h_h2 = a_h.data() == a_h2.data() ? 1 : 0; + int equal_ptr_h_d = a_h.data() == a_d.data() ? 1 : 0; + int equal_ptr_h2_d = a_h2.data() == a_d.data() ? 1 : 0; + + int is_same_memspace = std::is_same< Kokkos::HostSpace, typename DeviceType::memory_space >::value ? 1 : 0; + ASSERT_EQ( equal_ptr_h_h2, 1 ); + ASSERT_EQ( equal_ptr_h_d, is_same_memspace ); + ASSERT_EQ( equal_ptr_h2_d, is_same_memspace ); + + ASSERT_EQ( a_h.dimension_0(), a_h2.dimension_0() ); + ASSERT_EQ( a_h.dimension_0(), a_d .dimension_0() ); + } void static testit() { - test_mirror<Kokkos::MemoryTraits<0>>(); - test_mirror<Kokkos::MemoryTraits<Kokkos::Unmanaged>>(); - test_mirror_view<Kokkos::MemoryTraits<0>>(); - test_mirror_view<Kokkos::MemoryTraits<Kokkos::Unmanaged>>(); + test_mirror< Kokkos::MemoryTraits<0> >(); + test_mirror< Kokkos::MemoryTraits<Kokkos::Unmanaged> >(); + test_mirror_view< Kokkos::MemoryTraits<0> >(); + test_mirror_view< Kokkos::MemoryTraits<Kokkos::Unmanaged> >(); } }; @@ -861,23 +813,21 @@ template< typename T, class DeviceType > class TestViewAPI { public: - typedef DeviceType device ; + typedef DeviceType device; - enum { N0 = 1000 , - N1 = 3 , - N2 = 5 , + enum { N0 = 1000, + N1 = 3, + N2 = 5, N3 = 7 }; - typedef Kokkos::View< T , device > dView0 ; - typedef Kokkos::View< T* , device > dView1 ; - typedef Kokkos::View< T*[N1] , device > dView2 ; - typedef Kokkos::View< T*[N1][N2] , device > dView3 ; - typedef Kokkos::View< T*[N1][N2][N3] , device > dView4 ; - typedef Kokkos::View< const T*[N1][N2][N3] , device > const_dView4 ; - - typedef Kokkos::View< T****, device, Kokkos::MemoryUnmanaged > dView4_unmanaged ; - - typedef typename dView0::host_mirror_space host ; + typedef Kokkos::View< T, device > dView0; + typedef Kokkos::View< T*, device > dView1; + typedef Kokkos::View< T*[N1], device > dView2; + typedef Kokkos::View< T*[N1][N2], device > dView3; + typedef Kokkos::View< T*[N1][N2][N3], device > dView4; + typedef Kokkos::View< const T*[N1][N2][N3], device > const_dView4; + typedef Kokkos::View< T****, device, Kokkos::MemoryUnmanaged > dView4_unmanaged; + typedef typename dView0::host_mirror_space host; TestViewAPI() { @@ -889,41 +839,38 @@ public: run_test_subview_strided(); run_test_vector(); - TestViewOperator< T , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2][3] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3][4] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2][3] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4][2] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3][4] , device >::testit(); - TestViewOperator_LeftAndRight< int[2][3] , device >::testit(); - TestViewOperator_LeftAndRight< int[2] , device >::testit(); - TestViewMirror<Kokkos::LayoutLeft, device >::testit(); - TestViewMirror<Kokkos::LayoutRight, device >::testit(); - + TestViewOperator< T, device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2][3], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4][2], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3][4], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2][3], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4][2], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3][4], device >::testit(); + TestViewOperator_LeftAndRight< int[2][3], device >::testit(); + TestViewOperator_LeftAndRight< int[2], device >::testit(); + TestViewMirror< Kokkos::LayoutLeft, device >::testit(); + TestViewMirror< Kokkos::LayoutRight, device >::testit(); } static void run_test_mirror() { - typedef Kokkos::View< int , host > view_type ; - typedef typename view_type::HostMirror mirror_type ; + typedef Kokkos::View< int, host > view_type; + typedef typename view_type::HostMirror mirror_type; - static_assert( std::is_same< typename view_type::memory_space - , typename mirror_type::memory_space - >::value , "" ); + static_assert( std::is_same< typename view_type::memory_space, typename mirror_type::memory_space >::value, "" ); - view_type a("a"); - mirror_type am = Kokkos::create_mirror_view(a); - mirror_type ax = Kokkos::create_mirror(a); - ASSERT_EQ( & a() , & am() ); + view_type a( "a" ); + mirror_type am = Kokkos::create_mirror_view( a ); + mirror_type ax = Kokkos::create_mirror( a ); + ASSERT_EQ( & a(), & am() ); } static void run_test_scalar() { - typedef typename dView0::HostMirror hView0 ; + typedef typename dView0::HostMirror hView0; - dView0 dx , dy ; - hView0 hx , hy ; + dView0 dx, dy; + hView0 hx, hy; dx = dView0( "dx" ); dy = dView0( "dy" ); @@ -931,11 +878,11 @@ public: hx = Kokkos::create_mirror( dx ); hy = Kokkos::create_mirror( dy ); - hx() = 1 ; + hx() = 1; - Kokkos::deep_copy( dx , hx ); - Kokkos::deep_copy( dy , dx ); - Kokkos::deep_copy( hy , dy ); + Kokkos::deep_copy( dx, hx ); + Kokkos::deep_copy( dy, dx ); + Kokkos::deep_copy( hy, dy ); ASSERT_EQ( hx(), hy() ); } @@ -948,11 +895,11 @@ public: // usual "(void)" marker to avoid compiler warnings for unused // variables. - typedef typename dView0::HostMirror hView0 ; - typedef typename dView1::HostMirror hView1 ; - typedef typename dView2::HostMirror hView2 ; - typedef typename dView3::HostMirror hView3 ; - typedef typename dView4::HostMirror hView4 ; + typedef typename dView0::HostMirror hView0; + typedef typename dView1::HostMirror hView1; + typedef typename dView2::HostMirror hView2; + typedef typename dView3::HostMirror hView3; + typedef typename dView4::HostMirror hView4; { hView0 thing; @@ -975,8 +922,8 @@ public: (void) thing; } - dView4 dx , dy , dz ; - hView4 hx , hy , hz ; + dView4 dx, dy, dz; + hView4 hx, hy, hz; ASSERT_TRUE( dx.ptr_on_device() == 0 ); ASSERT_TRUE( dy.ptr_on_device() == 0 ); @@ -984,220 +931,239 @@ public: ASSERT_TRUE( hx.ptr_on_device() == 0 ); ASSERT_TRUE( hy.ptr_on_device() == 0 ); ASSERT_TRUE( hz.ptr_on_device() == 0 ); - ASSERT_EQ( dx.dimension_0() , 0u ); - ASSERT_EQ( dy.dimension_0() , 0u ); - ASSERT_EQ( dz.dimension_0() , 0u ); - ASSERT_EQ( hx.dimension_0() , 0u ); - ASSERT_EQ( hy.dimension_0() , 0u ); - ASSERT_EQ( hz.dimension_0() , 0u ); - ASSERT_EQ( dx.dimension_1() , unsigned(N1) ); - ASSERT_EQ( dy.dimension_1() , unsigned(N1) ); - ASSERT_EQ( dz.dimension_1() , unsigned(N1) ); - ASSERT_EQ( hx.dimension_1() , unsigned(N1) ); - ASSERT_EQ( hy.dimension_1() , unsigned(N1) ); - ASSERT_EQ( hz.dimension_1() , unsigned(N1) ); - - dx = dView4( "dx" , N0 ); - dy = dView4( "dy" , N0 ); - - ASSERT_EQ( dx.use_count() , size_t(1) ); + ASSERT_EQ( dx.dimension_0(), 0u ); + ASSERT_EQ( dy.dimension_0(), 0u ); + ASSERT_EQ( dz.dimension_0(), 0u ); + ASSERT_EQ( hx.dimension_0(), 0u ); + ASSERT_EQ( hy.dimension_0(), 0u ); + ASSERT_EQ( hz.dimension_0(), 0u ); + ASSERT_EQ( dx.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( dy.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( dz.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( hx.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( hy.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( hz.dimension_1(), unsigned( N1 ) ); + + dx = dView4( "dx", N0 ); + dy = dView4( "dy", N0 ); + + ASSERT_EQ( dx.use_count(), size_t( 1 ) ); dView4_unmanaged unmanaged_dx = dx; - ASSERT_EQ( dx.use_count() , size_t(1) ); + ASSERT_EQ( dx.use_count(), size_t( 1 ) ); - dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged(dx.ptr_on_device(), - dx.dimension_0(), - dx.dimension_1(), - dx.dimension_2(), - dx.dimension_3()); + dView4_unmanaged unmanaged_from_ptr_dx = dView4_unmanaged( dx.ptr_on_device(), + dx.dimension_0(), + dx.dimension_1(), + dx.dimension_2(), + dx.dimension_3() ); { - // Destruction of this view should be harmless - const_dView4 unmanaged_from_ptr_const_dx( dx.ptr_on_device() , - dx.dimension_0() , - dx.dimension_1() , - dx.dimension_2() , + // Destruction of this view should be harmless. + const_dView4 unmanaged_from_ptr_const_dx( dx.ptr_on_device(), + dx.dimension_0(), + dx.dimension_1(), + dx.dimension_2(), dx.dimension_3() ); } - const_dView4 const_dx = dx ; - ASSERT_EQ( dx.use_count() , size_t(2) ); + const_dView4 const_dx = dx; + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); { const_dView4 const_dx2; const_dx2 = const_dx; - ASSERT_EQ( dx.use_count() , size_t(3) ); + ASSERT_EQ( dx.use_count(), size_t( 3 ) ); const_dx2 = dy; - ASSERT_EQ( dx.use_count() , size_t(2) ); + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); - const_dView4 const_dx3(dx); - ASSERT_EQ( dx.use_count() , size_t(3) ); - - dView4_unmanaged dx4_unmanaged(dx); - ASSERT_EQ( dx.use_count() , size_t(3) ); - } + const_dView4 const_dx3( dx ); + ASSERT_EQ( dx.use_count(), size_t( 3 ) ); - ASSERT_EQ( dx.use_count() , size_t(2) ); + dView4_unmanaged dx4_unmanaged( dx ); + ASSERT_EQ( dx.use_count(), size_t( 3 ) ); + } + ASSERT_EQ( dx.use_count(), size_t( 2 ) ); ASSERT_FALSE( dx.ptr_on_device() == 0 ); ASSERT_FALSE( const_dx.ptr_on_device() == 0 ); ASSERT_FALSE( unmanaged_dx.ptr_on_device() == 0 ); ASSERT_FALSE( unmanaged_from_ptr_dx.ptr_on_device() == 0 ); ASSERT_FALSE( dy.ptr_on_device() == 0 ); - ASSERT_NE( dx , dy ); + ASSERT_NE( dx, dy ); - ASSERT_EQ( dx.dimension_0() , unsigned(N0) ); - ASSERT_EQ( dx.dimension_1() , unsigned(N1) ); - ASSERT_EQ( dx.dimension_2() , unsigned(N2) ); - ASSERT_EQ( dx.dimension_3() , unsigned(N3) ); + ASSERT_EQ( dx.dimension_0(), unsigned( N0 ) ); + ASSERT_EQ( dx.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( dx.dimension_2(), unsigned( N2 ) ); + ASSERT_EQ( dx.dimension_3(), unsigned( N3 ) ); - ASSERT_EQ( dy.dimension_0() , unsigned(N0) ); - ASSERT_EQ( dy.dimension_1() , unsigned(N1) ); - ASSERT_EQ( dy.dimension_2() , unsigned(N2) ); - ASSERT_EQ( dy.dimension_3() , unsigned(N3) ); + ASSERT_EQ( dy.dimension_0(), unsigned( N0 ) ); + ASSERT_EQ( dy.dimension_1(), unsigned( N1 ) ); + ASSERT_EQ( dy.dimension_2(), unsigned( N2 ) ); + ASSERT_EQ( dy.dimension_3(), unsigned( N3 ) ); - ASSERT_EQ( unmanaged_from_ptr_dx.capacity(),unsigned(N0)*unsigned(N1)*unsigned(N2)*unsigned(N3) ); + ASSERT_EQ( unmanaged_from_ptr_dx.capacity(), unsigned( N0 ) * unsigned( N1 ) * unsigned( N2 ) * unsigned( N3 ) ); hx = Kokkos::create_mirror( dx ); hy = Kokkos::create_mirror( dy ); - // T v1 = hx() ; // Generates compile error as intended - // T v2 = hx(0,0) ; // Generates compile error as intended - // hx(0,0) = v2 ; // Generates compile error as intended + // T v1 = hx(); // Generates compile error as intended. + // T v2 = hx( 0, 0 ); // Generates compile error as intended. + // hx( 0, 0 ) = v2; // Generates compile error as intended. // Testing with asynchronous deep copy with respect to device { - size_t count = 0 ; - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { - hx(ip,i1,i2,i3) = ++count ; - }}}} - - - Kokkos::deep_copy(typename hView4::execution_space(), dx , hx ); - Kokkos::deep_copy(typename hView4::execution_space(), dy , dx ); - Kokkos::deep_copy(typename hView4::execution_space(), hy , dy ); - - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } - }}}} - - Kokkos::deep_copy(typename hView4::execution_space(), dx , T(0) ); - Kokkos::deep_copy(typename hView4::execution_space(), hx , dx ); - - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } - }}}} + size_t count = 0; + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < hx.dimension_1(); ++i1 ) + for ( size_t i2 = 0; i2 < hx.dimension_2(); ++i2 ) + for ( size_t i3 = 0; i3 < hx.dimension_3(); ++i3 ) + { + hx( ip, i1, i2, i3 ) = ++count; + } + + Kokkos::deep_copy( typename hView4::execution_space(), dx, hx ); + Kokkos::deep_copy( typename hView4::execution_space(), dy, dx ); + Kokkos::deep_copy( typename hView4::execution_space(), hy, dy ); + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), hy( ip, i1, i2, i3 ) ); + } + + Kokkos::deep_copy( typename hView4::execution_space(), dx, T( 0 ) ); + Kokkos::deep_copy( typename hView4::execution_space(), hx, dx ); + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), T( 0 ) ); + } } - // Testing with asynchronous deep copy with respect to host + // Testing with asynchronous deep copy with respect to host. { - size_t count = 0 ; - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { - hx(ip,i1,i2,i3) = ++count ; - }}}} - - Kokkos::deep_copy(typename dView4::execution_space(), dx , hx ); - Kokkos::deep_copy(typename dView4::execution_space(), dy , dx ); - Kokkos::deep_copy(typename dView4::execution_space(), hy , dy ); - - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } - }}}} - - Kokkos::deep_copy(typename dView4::execution_space(), dx , T(0) ); - Kokkos::deep_copy(typename dView4::execution_space(), hx , dx ); - - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } - }}}} + size_t count = 0; + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < hx.dimension_1(); ++i1 ) + for ( size_t i2 = 0; i2 < hx.dimension_2(); ++i2 ) + for ( size_t i3 = 0; i3 < hx.dimension_3(); ++i3 ) + { + hx( ip, i1, i2, i3 ) = ++count; + } + + Kokkos::deep_copy( typename dView4::execution_space(), dx, hx ); + Kokkos::deep_copy( typename dView4::execution_space(), dy, dx ); + Kokkos::deep_copy( typename dView4::execution_space(), hy, dy ); + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), hy( ip, i1, i2, i3 ) ); + } + + Kokkos::deep_copy( typename dView4::execution_space(), dx, T( 0 ) ); + Kokkos::deep_copy( typename dView4::execution_space(), hx, dx ); + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), T( 0 ) ); + } } - // Testing with synchronous deep copy + // Testing with synchronous deep copy. { - size_t count = 0 ; - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < hx.dimension_1() ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < hx.dimension_2() ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < hx.dimension_3() ; ++i3 ) { - hx(ip,i1,i2,i3) = ++count ; - }}}} - - Kokkos::deep_copy( dx , hx ); - Kokkos::deep_copy( dy , dx ); - Kokkos::deep_copy( hy , dy ); - - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , hy(ip,i1,i2,i3) ); } - }}}} - - Kokkos::deep_copy( dx , T(0) ); - Kokkos::deep_copy( hx , dx ); - - for ( size_t ip = 0 ; ip < N0 ; ++ip ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - { ASSERT_EQ( hx(ip,i1,i2,i3) , T(0) ); } - }}}} + size_t count = 0; + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < hx.dimension_1(); ++i1 ) + for ( size_t i2 = 0; i2 < hx.dimension_2(); ++i2 ) + for ( size_t i3 = 0; i3 < hx.dimension_3(); ++i3 ) + { + hx( ip, i1, i2, i3 ) = ++count; + } + + Kokkos::deep_copy( dx, hx ); + Kokkos::deep_copy( dy, dx ); + Kokkos::deep_copy( hy, dy ); + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), hy( ip, i1, i2, i3 ) ); + } + + Kokkos::deep_copy( dx, T( 0 ) ); + Kokkos::deep_copy( hx, dx ); + + for ( size_t ip = 0; ip < N0; ++ip ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i3 = 0; i3 < N3; ++i3 ) + { + ASSERT_EQ( hx( ip, i1, i2, i3 ), T( 0 ) ); + } } - dz = dx ; ASSERT_EQ( dx, dz); ASSERT_NE( dy, dz); - dz = dy ; ASSERT_EQ( dy, dz); ASSERT_NE( dx, dz); + + dz = dx; + ASSERT_EQ( dx, dz ); + ASSERT_NE( dy, dz ); + + dz = dy; + ASSERT_EQ( dy, dz ); + ASSERT_NE( dx, dz ); dx = dView4(); ASSERT_TRUE( dx.ptr_on_device() == 0 ); ASSERT_FALSE( dy.ptr_on_device() == 0 ); ASSERT_FALSE( dz.ptr_on_device() == 0 ); + dy = dView4(); ASSERT_TRUE( dx.ptr_on_device() == 0 ); ASSERT_TRUE( dy.ptr_on_device() == 0 ); ASSERT_FALSE( dz.ptr_on_device() == 0 ); + dz = dView4(); ASSERT_TRUE( dx.ptr_on_device() == 0 ); ASSERT_TRUE( dy.ptr_on_device() == 0 ); ASSERT_TRUE( dz.ptr_on_device() == 0 ); } - typedef T DataType[2] ; + typedef T DataType[2]; static void check_auto_conversion_to_const( - const Kokkos::View< const DataType , device > & arg_const , - const Kokkos::View< DataType , device > & arg ) + const Kokkos::View< const DataType, device > & arg_const, + const Kokkos::View< DataType, device > & arg ) { ASSERT_TRUE( arg_const == arg ); } static void run_test_const() { - typedef Kokkos::View< DataType , device > typeX ; - typedef Kokkos::View< const DataType , device > const_typeX ; - typedef Kokkos::View< const DataType , device , Kokkos::MemoryRandomAccess > const_typeR ; + typedef Kokkos::View< DataType, device > typeX; + typedef Kokkos::View< const DataType, device > const_typeX; + typedef Kokkos::View< const DataType, device, Kokkos::MemoryRandomAccess > const_typeR; + typeX x( "X" ); - const_typeX xc = x ; - const_typeR xr = x ; + const_typeX xc = x; + const_typeR xr = x; ASSERT_TRUE( xc == x ); ASSERT_TRUE( x == xc ); @@ -1206,144 +1172,142 @@ public: // an lvalue reference due to retrieving through texture cache // therefore not allowed to query the underlying pointer. #if defined( KOKKOS_ENABLE_CUDA ) - if ( ! std::is_same< typename device::execution_space , Kokkos::Cuda >::value ) + if ( !std::is_same< typename device::execution_space, Kokkos::Cuda >::value ) #endif { ASSERT_TRUE( x.ptr_on_device() == xr.ptr_on_device() ); } - // typeX xf = xc ; // setting non-const from const must not compile + // typeX xf = xc; // Setting non-const from const must not compile. - check_auto_conversion_to_const( x , x ); + check_auto_conversion_to_const( x, x ); } static void run_test_subview() { - typedef Kokkos::View< const T , device > sView ; + typedef Kokkos::View< const T, device > sView; dView0 d0( "d0" ); - dView1 d1( "d1" , N0 ); - dView2 d2( "d2" , N0 ); - dView3 d3( "d3" , N0 ); - dView4 d4( "d4" , N0 ); - - sView s0 = d0 ; - sView s1 = Kokkos::subview( d1 , 1 ); - sView s2 = Kokkos::subview( d2 , 1 , 1 ); - sView s3 = Kokkos::subview( d3 , 1 , 1 , 1 ); - sView s4 = Kokkos::subview( d4 , 1 , 1 , 1 , 1 ); + dView1 d1( "d1", N0 ); + dView2 d2( "d2", N0 ); + dView3 d3( "d3", N0 ); + dView4 d4( "d4", N0 ); + + sView s0 = d0; + sView s1 = Kokkos::subview( d1, 1 ); + sView s2 = Kokkos::subview( d2, 1, 1 ); + sView s3 = Kokkos::subview( d3, 1, 1, 1 ); + sView s4 = Kokkos::subview( d4, 1, 1, 1, 1 ); } static void run_test_subview_strided() { - typedef Kokkos::View< int **** , Kokkos::LayoutLeft , host > view_left_4 ; - typedef Kokkos::View< int **** , Kokkos::LayoutRight , host > view_right_4 ; - typedef Kokkos::View< int ** , Kokkos::LayoutLeft , host > view_left_2 ; - typedef Kokkos::View< int ** , Kokkos::LayoutRight , host > view_right_2 ; - - typedef Kokkos::View< int * , Kokkos::LayoutStride , host > view_stride_1 ; - typedef Kokkos::View< int ** , Kokkos::LayoutStride , host > view_stride_2 ; - - view_left_2 xl2("xl2", 100 , 200 ); - view_right_2 xr2("xr2", 100 , 200 ); - view_stride_1 yl1 = Kokkos::subview( xl2 , 0 , Kokkos::ALL() ); - view_stride_1 yl2 = Kokkos::subview( xl2 , 1 , Kokkos::ALL() ); - view_stride_1 yr1 = Kokkos::subview( xr2 , 0 , Kokkos::ALL() ); - view_stride_1 yr2 = Kokkos::subview( xr2 , 1 , Kokkos::ALL() ); - - ASSERT_EQ( yl1.dimension_0() , xl2.dimension_1() ); - ASSERT_EQ( yl2.dimension_0() , xl2.dimension_1() ); - ASSERT_EQ( yr1.dimension_0() , xr2.dimension_1() ); - ASSERT_EQ( yr2.dimension_0() , xr2.dimension_1() ); - - ASSERT_EQ( & yl1(0) - & xl2(0,0) , 0 ); - ASSERT_EQ( & yl2(0) - & xl2(1,0) , 0 ); - ASSERT_EQ( & yr1(0) - & xr2(0,0) , 0 ); - ASSERT_EQ( & yr2(0) - & xr2(1,0) , 0 ); - - view_left_4 xl4( "xl4" , 10 , 20 , 30 , 40 ); - view_right_4 xr4( "xr4" , 10 , 20 , 30 , 40 ); - - view_stride_2 yl4 = Kokkos::subview( xl4 , 1 , Kokkos::ALL() , 2 , Kokkos::ALL() ); - view_stride_2 yr4 = Kokkos::subview( xr4 , 1 , Kokkos::ALL() , 2 , Kokkos::ALL() ); - - ASSERT_EQ( yl4.dimension_0() , xl4.dimension_1() ); - ASSERT_EQ( yl4.dimension_1() , xl4.dimension_3() ); - ASSERT_EQ( yr4.dimension_0() , xr4.dimension_1() ); - ASSERT_EQ( yr4.dimension_1() , xr4.dimension_3() ); - - ASSERT_EQ( & yl4(4,4) - & xl4(1,4,2,4) , 0 ); - ASSERT_EQ( & yr4(4,4) - & xr4(1,4,2,4) , 0 ); + typedef Kokkos::View< int ****, Kokkos::LayoutLeft , host > view_left_4; + typedef Kokkos::View< int ****, Kokkos::LayoutRight, host > view_right_4; + typedef Kokkos::View< int ** , Kokkos::LayoutLeft , host > view_left_2; + typedef Kokkos::View< int ** , Kokkos::LayoutRight, host > view_right_2; + + typedef Kokkos::View< int * , Kokkos::LayoutStride, host > view_stride_1; + typedef Kokkos::View< int **, Kokkos::LayoutStride, host > view_stride_2; + + view_left_2 xl2( "xl2", 100, 200 ); + view_right_2 xr2( "xr2", 100, 200 ); + view_stride_1 yl1 = Kokkos::subview( xl2, 0, Kokkos::ALL() ); + view_stride_1 yl2 = Kokkos::subview( xl2, 1, Kokkos::ALL() ); + view_stride_1 yr1 = Kokkos::subview( xr2, 0, Kokkos::ALL() ); + view_stride_1 yr2 = Kokkos::subview( xr2, 1, Kokkos::ALL() ); + + ASSERT_EQ( yl1.dimension_0(), xl2.dimension_1() ); + ASSERT_EQ( yl2.dimension_0(), xl2.dimension_1() ); + ASSERT_EQ( yr1.dimension_0(), xr2.dimension_1() ); + ASSERT_EQ( yr2.dimension_0(), xr2.dimension_1() ); + + ASSERT_EQ( & yl1( 0 ) - & xl2( 0, 0 ), 0 ); + ASSERT_EQ( & yl2( 0 ) - & xl2( 1, 0 ), 0 ); + ASSERT_EQ( & yr1( 0 ) - & xr2( 0, 0 ), 0 ); + ASSERT_EQ( & yr2( 0 ) - & xr2( 1, 0 ), 0 ); + + view_left_4 xl4( "xl4", 10, 20, 30, 40 ); + view_right_4 xr4( "xr4", 10, 20, 30, 40 ); + + view_stride_2 yl4 = Kokkos::subview( xl4, 1, Kokkos::ALL(), 2, Kokkos::ALL() ); + view_stride_2 yr4 = Kokkos::subview( xr4, 1, Kokkos::ALL(), 2, Kokkos::ALL() ); + + ASSERT_EQ( yl4.dimension_0(), xl4.dimension_1() ); + ASSERT_EQ( yl4.dimension_1(), xl4.dimension_3() ); + ASSERT_EQ( yr4.dimension_0(), xr4.dimension_1() ); + ASSERT_EQ( yr4.dimension_1(), xr4.dimension_3() ); + + ASSERT_EQ( & yl4( 4, 4 ) - & xl4( 1, 4, 2, 4 ), 0 ); + ASSERT_EQ( & yr4( 4, 4 ) - & xr4( 1, 4, 2, 4 ), 0 ); } static void run_test_vector() { - static const unsigned Length = 1000 , Count = 8 ; + static const unsigned Length = 1000, Count = 8; - typedef Kokkos::View< T* , Kokkos::LayoutLeft , host > vector_type ; - typedef Kokkos::View< T** , Kokkos::LayoutLeft , host > multivector_type ; + typedef Kokkos::View< T*, Kokkos::LayoutLeft, host > vector_type; + typedef Kokkos::View< T**, Kokkos::LayoutLeft, host > multivector_type; - typedef Kokkos::View< T* , Kokkos::LayoutRight , host > vector_right_type ; - typedef Kokkos::View< T** , Kokkos::LayoutRight , host > multivector_right_type ; + typedef Kokkos::View< T*, Kokkos::LayoutRight, host > vector_right_type; + typedef Kokkos::View< T**, Kokkos::LayoutRight, host > multivector_right_type; - typedef Kokkos::View< const T* , Kokkos::LayoutRight, host > const_vector_right_type ; - typedef Kokkos::View< const T* , Kokkos::LayoutLeft , host > const_vector_type ; - typedef Kokkos::View< const T** , Kokkos::LayoutLeft , host > const_multivector_type ; + typedef Kokkos::View< const T*, Kokkos::LayoutRight, host > const_vector_right_type; + typedef Kokkos::View< const T*, Kokkos::LayoutLeft, host > const_vector_type; + typedef Kokkos::View< const T**, Kokkos::LayoutLeft, host > const_multivector_type; - multivector_type mv = multivector_type( "mv" , Length , Count ); - multivector_right_type mv_right = multivector_right_type( "mv" , Length , Count ); + multivector_type mv = multivector_type( "mv", Length, Count ); + multivector_right_type mv_right = multivector_right_type( "mv", Length, Count ); - vector_type v1 = Kokkos::subview( mv , Kokkos::ALL() , 0 ); - vector_type v2 = Kokkos::subview( mv , Kokkos::ALL() , 1 ); - vector_type v3 = Kokkos::subview( mv , Kokkos::ALL() , 2 ); + vector_type v1 = Kokkos::subview( mv, Kokkos::ALL(), 0 ); + vector_type v2 = Kokkos::subview( mv, Kokkos::ALL(), 1 ); + vector_type v3 = Kokkos::subview( mv, Kokkos::ALL(), 2 ); - vector_type rv1 = Kokkos::subview( mv_right , 0 , Kokkos::ALL() ); - vector_type rv2 = Kokkos::subview( mv_right , 1 , Kokkos::ALL() ); - vector_type rv3 = Kokkos::subview( mv_right , 2 , Kokkos::ALL() ); + vector_type rv1 = Kokkos::subview( mv_right, 0, Kokkos::ALL() ); + vector_type rv2 = Kokkos::subview( mv_right, 1, Kokkos::ALL() ); + vector_type rv3 = Kokkos::subview( mv_right, 2, Kokkos::ALL() ); - multivector_type mv1 = Kokkos::subview( mv , std::make_pair( 1 , 998 ) , - std::make_pair( 2 , 5 ) ); + multivector_type mv1 = Kokkos::subview( mv, std::make_pair( 1, 998 ), + std::make_pair( 2, 5 ) ); - multivector_right_type mvr1 = - Kokkos::subview( mv_right , - std::make_pair( 1 , 998 ) , - std::make_pair( 2 , 5 ) ); + multivector_right_type mvr1 = Kokkos::subview( mv_right, std::make_pair( 1, 998 ), + std::make_pair( 2, 5 ) ); - const_vector_type cv1 = Kokkos::subview( mv , Kokkos::ALL(), 0 ); - const_vector_type cv2 = Kokkos::subview( mv , Kokkos::ALL(), 1 ); - const_vector_type cv3 = Kokkos::subview( mv , Kokkos::ALL(), 2 ); + const_vector_type cv1 = Kokkos::subview( mv, Kokkos::ALL(), 0 ); + const_vector_type cv2 = Kokkos::subview( mv, Kokkos::ALL(), 1 ); + const_vector_type cv3 = Kokkos::subview( mv, Kokkos::ALL(), 2 ); - vector_right_type vr1 = Kokkos::subview( mv , Kokkos::ALL() , 0 ); - vector_right_type vr2 = Kokkos::subview( mv , Kokkos::ALL() , 1 ); - vector_right_type vr3 = Kokkos::subview( mv , Kokkos::ALL() , 2 ); + vector_right_type vr1 = Kokkos::subview( mv, Kokkos::ALL(), 0 ); + vector_right_type vr2 = Kokkos::subview( mv, Kokkos::ALL(), 1 ); + vector_right_type vr3 = Kokkos::subview( mv, Kokkos::ALL(), 2 ); - const_vector_right_type cvr1 = Kokkos::subview( mv , Kokkos::ALL() , 0 ); - const_vector_right_type cvr2 = Kokkos::subview( mv , Kokkos::ALL() , 1 ); - const_vector_right_type cvr3 = Kokkos::subview( mv , Kokkos::ALL() , 2 ); + const_vector_right_type cvr1 = Kokkos::subview( mv, Kokkos::ALL(), 0 ); + const_vector_right_type cvr2 = Kokkos::subview( mv, Kokkos::ALL(), 1 ); + const_vector_right_type cvr3 = Kokkos::subview( mv, Kokkos::ALL(), 2 ); - ASSERT_TRUE( & v1[0] == & v1(0) ); - ASSERT_TRUE( & v1[0] == & mv(0,0) ); - ASSERT_TRUE( & v2[0] == & mv(0,1) ); - ASSERT_TRUE( & v3[0] == & mv(0,2) ); + ASSERT_TRUE( & v1[0] == & v1( 0 ) ); + ASSERT_TRUE( & v1[0] == & mv( 0, 0 ) ); + ASSERT_TRUE( & v2[0] == & mv( 0, 1 ) ); + ASSERT_TRUE( & v3[0] == & mv( 0, 2 ) ); - ASSERT_TRUE( & cv1[0] == & mv(0,0) ); - ASSERT_TRUE( & cv2[0] == & mv(0,1) ); - ASSERT_TRUE( & cv3[0] == & mv(0,2) ); + ASSERT_TRUE( & cv1[0] == & mv( 0, 0 ) ); + ASSERT_TRUE( & cv2[0] == & mv( 0, 1 ) ); + ASSERT_TRUE( & cv3[0] == & mv( 0, 2 ) ); - ASSERT_TRUE( & vr1[0] == & mv(0,0) ); - ASSERT_TRUE( & vr2[0] == & mv(0,1) ); - ASSERT_TRUE( & vr3[0] == & mv(0,2) ); + ASSERT_TRUE( & vr1[0] == & mv( 0, 0 ) ); + ASSERT_TRUE( & vr2[0] == & mv( 0, 1 ) ); + ASSERT_TRUE( & vr3[0] == & mv( 0, 2 ) ); - ASSERT_TRUE( & cvr1[0] == & mv(0,0) ); - ASSERT_TRUE( & cvr2[0] == & mv(0,1) ); - ASSERT_TRUE( & cvr3[0] == & mv(0,2) ); + ASSERT_TRUE( & cvr1[0] == & mv( 0, 0 ) ); + ASSERT_TRUE( & cvr2[0] == & mv( 0, 1 ) ); + ASSERT_TRUE( & cvr3[0] == & mv( 0, 2 ) ); - ASSERT_TRUE( & mv1(0,0) == & mv( 1 , 2 ) ); - ASSERT_TRUE( & mv1(1,1) == & mv( 2 , 3 ) ); - ASSERT_TRUE( & mv1(3,2) == & mv( 4 , 4 ) ); - ASSERT_TRUE( & mvr1(0,0) == & mv_right( 1 , 2 ) ); - ASSERT_TRUE( & mvr1(1,1) == & mv_right( 2 , 3 ) ); - ASSERT_TRUE( & mvr1(3,2) == & mv_right( 4 , 4 ) ); + ASSERT_TRUE( & mv1( 0, 0 ) == & mv( 1, 2 ) ); + ASSERT_TRUE( & mv1( 1, 1 ) == & mv( 2, 3 ) ); + ASSERT_TRUE( & mv1( 3, 2 ) == & mv( 4, 4 ) ); + ASSERT_TRUE( & mvr1( 0, 0 ) == & mv_right( 1, 2 ) ); + ASSERT_TRUE( & mvr1( 1, 1 ) == & mv_right( 2, 3 ) ); + ASSERT_TRUE( & mvr1( 3, 2 ) == & mv_right( 4, 4 ) ); const_vector_type c_cv1( v1 ); typename vector_type::const_type c_cv2( v2 ); @@ -1356,6 +1320,3 @@ public: }; } // namespace Test - -/*--------------------------------------------------------------------------*/ - diff --git a/lib/kokkos/core/unit_test/TestViewMapping.hpp b/lib/kokkos/core/unit_test/TestViewMapping.hpp index 324f02e94730d99365804684776e48ac64c3a351..71604bed51d93e374c8de9776bb24d2135c95182 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -49,1126 +49,1140 @@ #include <Kokkos_Core.hpp> -/*--------------------------------------------------------------------------*/ - namespace Test { template< class Space > void test_view_mapping() { - typedef typename Space::execution_space ExecSpace ; - - typedef Kokkos::Experimental::Impl::ViewDimension<> dim_0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<2> dim_s2 ; - typedef Kokkos::Experimental::Impl::ViewDimension<2,3> dim_s2_s3 ; - typedef Kokkos::Experimental::Impl::ViewDimension<2,3,4> dim_s2_s3_s4 ; - - typedef Kokkos::Experimental::Impl::ViewDimension<0> dim_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,3> dim_s0_s3 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,3,4> dim_s0_s3_s4 ; - - typedef Kokkos::Experimental::Impl::ViewDimension<0,0> dim_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,4> dim_s0_s0_s4 ; - - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0> dim_s0_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0> dim_s0_s0_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0> dim_s0_s0_s0_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0_s0 ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0,0,0,0,0,0> dim_s0_s0_s0_s0_s0_s0_s0_s0 ; - - // Fully static dimensions should not be larger than an int - ASSERT_LE( sizeof(dim_0) , sizeof(int) ); - ASSERT_LE( sizeof(dim_s2) , sizeof(int) ); - ASSERT_LE( sizeof(dim_s2_s3) , sizeof(int) ); - ASSERT_LE( sizeof(dim_s2_s3_s4) , sizeof(int) ); - - // Rank 1 is size_t - ASSERT_EQ( sizeof(dim_s0) , sizeof(size_t) ); - ASSERT_EQ( sizeof(dim_s0_s3) , sizeof(size_t) ); - ASSERT_EQ( sizeof(dim_s0_s3_s4) , sizeof(size_t) ); - - // Allow for padding - ASSERT_LE( sizeof(dim_s0_s0) , 2 * sizeof(size_t) ); - ASSERT_LE( sizeof(dim_s0_s0_s4) , 2 * sizeof(size_t) ); - - ASSERT_LE( sizeof(dim_s0_s0_s0) , 4 * sizeof(size_t) ); - ASSERT_EQ( sizeof(dim_s0_s0_s0_s0) , 4 * sizeof(unsigned) ); - ASSERT_LE( sizeof(dim_s0_s0_s0_s0_s0) , 6 * sizeof(unsigned) ); - ASSERT_EQ( sizeof(dim_s0_s0_s0_s0_s0_s0) , 6 * sizeof(unsigned) ); - ASSERT_LE( sizeof(dim_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) ); - ASSERT_EQ( sizeof(dim_s0_s0_s0_s0_s0_s0_s0_s0) , 8 * sizeof(unsigned) ); - - static_assert( int(dim_0::rank) == int(0) , "" ); - static_assert( int(dim_0::rank_dynamic) == int(0) , "" ); - static_assert( int(dim_0::ArgN0) == 1 , "" ); - static_assert( int(dim_0::ArgN1) == 1 , "" ); - static_assert( int(dim_0::ArgN2) == 1 , "" ); - - static_assert( int(dim_s2::rank) == int(1) , "" ); - static_assert( int(dim_s2::rank_dynamic) == int(0) , "" ); - static_assert( int(dim_s2::ArgN0) == 2 , "" ); - static_assert( int(dim_s2::ArgN1) == 1 , "" ); - - static_assert( int(dim_s2_s3::rank) == int(2) , "" ); - static_assert( int(dim_s2_s3::rank_dynamic) == int(0) , "" ); - static_assert( int(dim_s2_s3::ArgN0) == 2 , "" ); - static_assert( int(dim_s2_s3::ArgN1) == 3 , "" ); - static_assert( int(dim_s2_s3::ArgN2) == 1 , "" ); - - static_assert( int(dim_s2_s3_s4::rank) == int(3) , "" ); - static_assert( int(dim_s2_s3_s4::rank_dynamic) == int(0) , "" ); - static_assert( int(dim_s2_s3_s4::ArgN0) == 2 , "" ); - static_assert( int(dim_s2_s3_s4::ArgN1) == 3 , "" ); - static_assert( int(dim_s2_s3_s4::ArgN2) == 4 , "" ); - static_assert( int(dim_s2_s3_s4::ArgN3) == 1 , "" ); - - static_assert( int(dim_s0::rank) == int(1) , "" ); - static_assert( int(dim_s0::rank_dynamic) == int(1) , "" ); - - static_assert( int(dim_s0_s3::rank) == int(2) , "" ); - static_assert( int(dim_s0_s3::rank_dynamic) == int(1) , "" ); - static_assert( int(dim_s0_s3::ArgN0) == 0 , "" ); - static_assert( int(dim_s0_s3::ArgN1) == 3 , "" ); - - static_assert( int(dim_s0_s3_s4::rank) == int(3) , "" ); - static_assert( int(dim_s0_s3_s4::rank_dynamic) == int(1) , "" ); - static_assert( int(dim_s0_s3_s4::ArgN0) == 0 , "" ); - static_assert( int(dim_s0_s3_s4::ArgN1) == 3 , "" ); - static_assert( int(dim_s0_s3_s4::ArgN2) == 4 , "" ); - - static_assert( int(dim_s0_s0_s4::rank) == int(3) , "" ); - static_assert( int(dim_s0_s0_s4::rank_dynamic) == int(2) , "" ); - static_assert( int(dim_s0_s0_s4::ArgN0) == 0 , "" ); - static_assert( int(dim_s0_s0_s4::ArgN1) == 0 , "" ); - static_assert( int(dim_s0_s0_s4::ArgN2) == 4 , "" ); - - static_assert( int(dim_s0_s0_s0::rank) == int(3) , "" ); - static_assert( int(dim_s0_s0_s0::rank_dynamic) == int(3) , "" ); - - static_assert( int(dim_s0_s0_s0_s0::rank) == int(4) , "" ); - static_assert( int(dim_s0_s0_s0_s0::rank_dynamic) == int(4) , "" ); - - static_assert( int(dim_s0_s0_s0_s0_s0::rank) == int(5) , "" ); - static_assert( int(dim_s0_s0_s0_s0_s0::rank_dynamic) == int(5) , "" ); - - static_assert( int(dim_s0_s0_s0_s0_s0_s0::rank) == int(6) , "" ); - static_assert( int(dim_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(6) , "" ); - - static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0::rank) == int(7) , "" ); - static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(7) , "" ); - - static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank) == int(8) , "" ); - static_assert( int(dim_s0_s0_s0_s0_s0_s0_s0_s0::rank_dynamic) == int(8) , "" ); - - dim_s0 d1( 2, 3, 4, 5, 6, 7, 8, 9 ); + typedef typename Space::execution_space ExecSpace; + + typedef Kokkos::Experimental::Impl::ViewDimension<> dim_0; + typedef Kokkos::Experimental::Impl::ViewDimension< 2 > dim_s2; + typedef Kokkos::Experimental::Impl::ViewDimension< 2, 3 > dim_s2_s3; + typedef Kokkos::Experimental::Impl::ViewDimension< 2, 3, 4 > dim_s2_s3_s4; + + typedef Kokkos::Experimental::Impl::ViewDimension< 0 > dim_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 3 > dim_s0_s3; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 3, 4 > dim_s0_s3_s4; + + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0 > dim_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 4 > dim_s0_s0_s4; + + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0 > dim_s0_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0 > dim_s0_s0_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0_s0; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0_s0_s0; + + // Fully static dimensions should not be larger than an int. + ASSERT_LE( sizeof( dim_0 ), sizeof( int ) ); + ASSERT_LE( sizeof( dim_s2 ), sizeof( int ) ); + ASSERT_LE( sizeof( dim_s2_s3 ), sizeof( int ) ); + ASSERT_LE( sizeof( dim_s2_s3_s4 ), sizeof( int ) ); + + // Rank 1 is size_t. + ASSERT_EQ( sizeof( dim_s0 ), sizeof( size_t ) ); + ASSERT_EQ( sizeof( dim_s0_s3 ), sizeof( size_t ) ); + ASSERT_EQ( sizeof( dim_s0_s3_s4 ), sizeof( size_t ) ); + + // Allow for padding. + ASSERT_LE( sizeof( dim_s0_s0 ), 2 * sizeof( size_t ) ); + ASSERT_LE( sizeof( dim_s0_s0_s4 ), 2 * sizeof( size_t ) ); + + ASSERT_LE( sizeof( dim_s0_s0_s0 ), 4 * sizeof( size_t ) ); + ASSERT_EQ( sizeof( dim_s0_s0_s0_s0 ), 4 * sizeof( unsigned ) ); + ASSERT_LE( sizeof( dim_s0_s0_s0_s0_s0 ), 6 * sizeof( unsigned ) ); + ASSERT_EQ( sizeof( dim_s0_s0_s0_s0_s0_s0 ), 6 * sizeof( unsigned ) ); + ASSERT_LE( sizeof( dim_s0_s0_s0_s0_s0_s0_s0 ), 8 * sizeof( unsigned ) ); + ASSERT_EQ( sizeof( dim_s0_s0_s0_s0_s0_s0_s0_s0 ), 8 * sizeof( unsigned ) ); + + static_assert( int( dim_0::rank ) == int( 0 ), "" ); + static_assert( int( dim_0::rank_dynamic ) == int( 0 ), "" ); + static_assert( int( dim_0::ArgN0 ) == 1, "" ); + static_assert( int( dim_0::ArgN1 ) == 1, "" ); + static_assert( int( dim_0::ArgN2 ) == 1, "" ); + + static_assert( int( dim_s2::rank ) == int( 1 ), "" ); + static_assert( int( dim_s2::rank_dynamic ) == int( 0 ), "" ); + static_assert( int( dim_s2::ArgN0 ) == 2, "" ); + static_assert( int( dim_s2::ArgN1 ) == 1, "" ); + + static_assert( int( dim_s2_s3::rank ) == int( 2 ), "" ); + static_assert( int( dim_s2_s3::rank_dynamic ) == int( 0 ), "" ); + static_assert( int( dim_s2_s3::ArgN0 ) == 2, "" ); + static_assert( int( dim_s2_s3::ArgN1 ) == 3, "" ); + static_assert( int( dim_s2_s3::ArgN2 ) == 1, "" ); + + static_assert( int( dim_s2_s3_s4::rank ) == int( 3 ), "" ); + static_assert( int( dim_s2_s3_s4::rank_dynamic ) == int( 0 ), "" ); + static_assert( int( dim_s2_s3_s4::ArgN0 ) == 2, "" ); + static_assert( int( dim_s2_s3_s4::ArgN1 ) == 3, "" ); + static_assert( int( dim_s2_s3_s4::ArgN2 ) == 4, "" ); + static_assert( int( dim_s2_s3_s4::ArgN3 ) == 1, "" ); + + static_assert( int( dim_s0::rank ) == int( 1 ), "" ); + static_assert( int( dim_s0::rank_dynamic ) == int( 1 ), "" ); + + static_assert( int( dim_s0_s3::rank ) == int( 2 ), "" ); + static_assert( int( dim_s0_s3::rank_dynamic ) == int( 1 ), "" ); + static_assert( int( dim_s0_s3::ArgN0 ) == 0, "" ); + static_assert( int( dim_s0_s3::ArgN1 ) == 3, "" ); + + static_assert( int( dim_s0_s3_s4::rank ) == int( 3 ), "" ); + static_assert( int( dim_s0_s3_s4::rank_dynamic ) == int( 1 ), "" ); + static_assert( int( dim_s0_s3_s4::ArgN0 ) == 0, "" ); + static_assert( int( dim_s0_s3_s4::ArgN1 ) == 3, "" ); + static_assert( int( dim_s0_s3_s4::ArgN2 ) == 4, "" ); + + static_assert( int( dim_s0_s0_s4::rank ) == int( 3 ), "" ); + static_assert( int( dim_s0_s0_s4::rank_dynamic ) == int( 2 ), "" ); + static_assert( int( dim_s0_s0_s4::ArgN0 ) == 0, "" ); + static_assert( int( dim_s0_s0_s4::ArgN1 ) == 0, "" ); + static_assert( int( dim_s0_s0_s4::ArgN2 ) == 4, "" ); + + static_assert( int( dim_s0_s0_s0::rank ) == int( 3 ), "" ); + static_assert( int( dim_s0_s0_s0::rank_dynamic ) == int( 3 ), "" ); + + static_assert( int( dim_s0_s0_s0_s0::rank ) == int( 4 ), "" ); + static_assert( int( dim_s0_s0_s0_s0::rank_dynamic ) == int( 4 ), "" ); + + static_assert( int( dim_s0_s0_s0_s0_s0::rank ) == int( 5 ), "" ); + static_assert( int( dim_s0_s0_s0_s0_s0::rank_dynamic ) == int( 5 ), "" ); + + static_assert( int( dim_s0_s0_s0_s0_s0_s0::rank ) == int( 6 ), "" ); + static_assert( int( dim_s0_s0_s0_s0_s0_s0::rank_dynamic ) == int( 6 ), "" ); + + static_assert( int( dim_s0_s0_s0_s0_s0_s0_s0::rank ) == int( 7 ), "" ); + static_assert( int( dim_s0_s0_s0_s0_s0_s0_s0::rank_dynamic ) == int( 7 ), "" ); + + static_assert( int( dim_s0_s0_s0_s0_s0_s0_s0_s0::rank ) == int( 8 ), "" ); + static_assert( int( dim_s0_s0_s0_s0_s0_s0_s0_s0::rank_dynamic ) == int( 8 ), "" ); + + dim_s0 d1( 2, 3, 4, 5, 6, 7, 8, 9 ); dim_s0_s0 d2( 2, 3, 4, 5, 6, 7, 8, 9 ); dim_s0_s0_s0 d3( 2, 3, 4, 5, 6, 7, 8, 9 ); dim_s0_s0_s0_s0 d4( 2, 3, 4, 5, 6, 7, 8, 9 ); - ASSERT_EQ( d1.N0 , 2 ); - ASSERT_EQ( d2.N0 , 2 ); - ASSERT_EQ( d3.N0 , 2 ); - ASSERT_EQ( d4.N0 , 2 ); + ASSERT_EQ( d1.N0, 2 ); + ASSERT_EQ( d2.N0, 2 ); + ASSERT_EQ( d3.N0, 2 ); + ASSERT_EQ( d4.N0, 2 ); - ASSERT_EQ( d1.N1 , 1 ); - ASSERT_EQ( d2.N1 , 3 ); - ASSERT_EQ( d3.N1 , 3 ); - ASSERT_EQ( d4.N1 , 3 ); + ASSERT_EQ( d1.N1, 1 ); + ASSERT_EQ( d2.N1, 3 ); + ASSERT_EQ( d3.N1, 3 ); + ASSERT_EQ( d4.N1, 3 ); - ASSERT_EQ( d1.N2 , 1 ); - ASSERT_EQ( d2.N2 , 1 ); - ASSERT_EQ( d3.N2 , 4 ); - ASSERT_EQ( d4.N2 , 4 ); + ASSERT_EQ( d1.N2, 1 ); + ASSERT_EQ( d2.N2, 1 ); + ASSERT_EQ( d3.N2, 4 ); + ASSERT_EQ( d4.N2, 4 ); - ASSERT_EQ( d1.N3 , 1 ); - ASSERT_EQ( d2.N3 , 1 ); - ASSERT_EQ( d3.N3 , 1 ); - ASSERT_EQ( d4.N3 , 5 ); + ASSERT_EQ( d1.N3, 1 ); + ASSERT_EQ( d2.N3, 1 ); + ASSERT_EQ( d3.N3, 1 ); + ASSERT_EQ( d4.N3, 5 ); //---------------------------------------- - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s0 , Kokkos::LayoutStride > stride_s0_s0_s0 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s0, Kokkos::LayoutStride > stride_s0_s0_s0; //---------------------------------------- - // Static dimension + // Static dimension. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4 , Kokkos::LayoutLeft > left_s2_s3_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4, Kokkos::LayoutLeft > left_s2_s3_s4; - ASSERT_EQ( sizeof(left_s2_s3_s4) , sizeof(dim_s2_s3_s4) ); + ASSERT_EQ( sizeof( left_s2_s3_s4 ), sizeof( dim_s2_s3_s4 ) ); - left_s2_s3_s4 off3 ; + left_s2_s3_s4 off3; - stride_s0_s0_s0 stride3( off3 ); + stride_s0_s0_s0 stride3( off3 ); - ASSERT_EQ( off3.stride_0() , 1 ); - ASSERT_EQ( off3.stride_1() , 2 ); - ASSERT_EQ( off3.stride_2() , 6 ); - ASSERT_EQ( off3.span() , 24 ); + ASSERT_EQ( off3.stride_0(), 1 ); + ASSERT_EQ( off3.stride_1(), 2 ); + ASSERT_EQ( off3.stride_2(), 6 ); + ASSERT_EQ( off3.span(), 24 ); - ASSERT_EQ( off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( off3.stride_2() , stride3.stride_2() ); - ASSERT_EQ( off3.span() , stride3.span() ); + ASSERT_EQ( off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( off3.stride_2(), stride3.stride_2() ); + ASSERT_EQ( off3.span(), stride3.span() ); - int offset = 0 ; + int offset = 0; - for ( int k = 0 ; k < 4 ; ++k ){ - for ( int j = 0 ; j < 3 ; ++j ){ - for ( int i = 0 ; i < 2 ; ++i , ++offset ){ - ASSERT_EQ( off3(i,j,k) , offset ); - ASSERT_EQ( stride3(i,j,k) , off3(i,j,k) ); - }}} + for ( int k = 0; k < 4; ++k ) + for ( int j = 0; j < 3; ++j ) + for ( int i = 0; i < 2; ++i, ++offset ) + { + ASSERT_EQ( off3( i, j, k ), offset ); + ASSERT_EQ( stride3( i, j, k ), off3( i, j, k ) ); + } } //---------------------------------------- - // Small dimension is unpadded + // Small dimension is unpadded. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; - left_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + left_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutLeft( 2, 3, 0, 0, 0, 0, 0, 0 ) ); stride_s0_s0_s0 stride3( dyn_off3 ); - ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N0 , 2 ); - ASSERT_EQ( dyn_off3.m_dim.N1 , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); - ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); - ASSERT_EQ( dyn_off3.size() , 2 * 3 * 4 ); + ASSERT_EQ( dyn_off3.m_dim.rank, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0, 2 ); + ASSERT_EQ( dyn_off3.m_dim.N1, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N2, 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3, 1 ); + ASSERT_EQ( dyn_off3.size(), 2 * 3 * 4 ); const Kokkos::LayoutLeft layout = dyn_off3.layout(); - ASSERT_EQ( layout.dimension[0] , 2 ); - ASSERT_EQ( layout.dimension[1] , 3 ); - ASSERT_EQ( layout.dimension[2] , 4 ); - ASSERT_EQ( layout.dimension[3] , 1 ); - ASSERT_EQ( layout.dimension[4] , 1 ); - ASSERT_EQ( layout.dimension[5] , 1 ); - ASSERT_EQ( layout.dimension[6] , 1 ); - ASSERT_EQ( layout.dimension[7] , 1 ); - - ASSERT_EQ( stride3.m_dim.rank , 3 ); - ASSERT_EQ( stride3.m_dim.N0 , 2 ); - ASSERT_EQ( stride3.m_dim.N1 , 3 ); - ASSERT_EQ( stride3.m_dim.N2 , 4 ); - ASSERT_EQ( stride3.m_dim.N3 , 1 ); - ASSERT_EQ( stride3.size() , 2 * 3 * 4 ); - - int offset = 0 ; - - for ( int k = 0 ; k < 4 ; ++k ){ - for ( int j = 0 ; j < 3 ; ++j ){ - for ( int i = 0 ; i < 2 ; ++i , ++offset ){ - ASSERT_EQ( offset , dyn_off3(i,j,k) ); - ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); - }}} - - ASSERT_EQ( dyn_off3.span() , offset ); - ASSERT_EQ( stride3.span() , dyn_off3.span() ); + ASSERT_EQ( layout.dimension[0], 2 ); + ASSERT_EQ( layout.dimension[1], 3 ); + ASSERT_EQ( layout.dimension[2], 4 ); + ASSERT_EQ( layout.dimension[3], 1 ); + ASSERT_EQ( layout.dimension[4], 1 ); + ASSERT_EQ( layout.dimension[5], 1 ); + ASSERT_EQ( layout.dimension[6], 1 ); + ASSERT_EQ( layout.dimension[7], 1 ); + + ASSERT_EQ( stride3.m_dim.rank, 3 ); + ASSERT_EQ( stride3.m_dim.N0, 2 ); + ASSERT_EQ( stride3.m_dim.N1, 3 ); + ASSERT_EQ( stride3.m_dim.N2, 4 ); + ASSERT_EQ( stride3.m_dim.N3, 1 ); + ASSERT_EQ( stride3.size(), 2 * 3 * 4 ); + + int offset = 0; + + for ( int k = 0; k < 4; ++k ) + for ( int j = 0; j < 3; ++j ) + for ( int i = 0; i < 2; ++i, ++offset ) + { + ASSERT_EQ( offset, dyn_off3( i, j, k ) ); + ASSERT_EQ( stride3( i, j, k ), dyn_off3( i, j, k ) ); + } + + ASSERT_EQ( dyn_off3.span(), offset ); + ASSERT_EQ( stride3.span(), dyn_off3.span() ); } - // Large dimension is likely padded + //---------------------------------------- + // Large dimension is likely padded. { - constexpr int N0 = 2000 ; - constexpr int N1 = 300 ; + constexpr int N0 = 2000; + constexpr int N1 = 300; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; - left_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + left_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutLeft( N0, N1, 0, 0, 0, 0, 0, 0 ) ); stride_s0_s0_s0 stride3( dyn_off3 ); - ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N0 , N0 ); - ASSERT_EQ( dyn_off3.m_dim.N1 , N1 ); - ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); - ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); - ASSERT_EQ( dyn_off3.size() , N0 * N1 * 4 ); - - ASSERT_EQ( stride3.m_dim.rank , 3 ); - ASSERT_EQ( stride3.m_dim.N0 , N0 ); - ASSERT_EQ( stride3.m_dim.N1 , N1 ); - ASSERT_EQ( stride3.m_dim.N2 , 4 ); - ASSERT_EQ( stride3.m_dim.N3 , 1 ); - ASSERT_EQ( stride3.size() , N0 * N1 * 4 ); - ASSERT_EQ( stride3.span() , dyn_off3.span() ); - - int offset = 0 ; - - for ( int k = 0 ; k < 4 ; ++k ){ - for ( int j = 0 ; j < N1 ; ++j ){ - for ( int i = 0 ; i < N0 ; ++i ){ - ASSERT_LE( offset , dyn_off3(i,j,k) ); - ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); - offset = dyn_off3(i,j,k) + 1 ; - }}} - - ASSERT_LE( offset , dyn_off3.span() ); + ASSERT_EQ( dyn_off3.m_dim.rank, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0, N0 ); + ASSERT_EQ( dyn_off3.m_dim.N1, N1 ); + ASSERT_EQ( dyn_off3.m_dim.N2, 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3, 1 ); + ASSERT_EQ( dyn_off3.size(), N0 * N1 * 4 ); + + ASSERT_EQ( stride3.m_dim.rank, 3 ); + ASSERT_EQ( stride3.m_dim.N0, N0 ); + ASSERT_EQ( stride3.m_dim.N1, N1 ); + ASSERT_EQ( stride3.m_dim.N2, 4 ); + ASSERT_EQ( stride3.m_dim.N3, 1 ); + ASSERT_EQ( stride3.size(), N0 * N1 * 4 ); + ASSERT_EQ( stride3.span(), dyn_off3.span() ); + + int offset = 0; + + for ( int k = 0; k < 4; ++k ) + for ( int j = 0; j < N1; ++j ) + for ( int i = 0; i < N0; ++i ) + { + ASSERT_LE( offset, dyn_off3( i, j, k ) ); + ASSERT_EQ( stride3( i, j, k ), dyn_off3( i, j, k ) ); + offset = dyn_off3( i, j, k ) + 1; + } + + ASSERT_LE( offset, dyn_off3.span() ); } //---------------------------------------- - // Static dimension + // Static dimension. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4 , Kokkos::LayoutRight > right_s2_s3_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4, Kokkos::LayoutRight > right_s2_s3_s4; - ASSERT_EQ( sizeof(right_s2_s3_s4) , sizeof(dim_s2_s3_s4) ); + ASSERT_EQ( sizeof( right_s2_s3_s4 ), sizeof( dim_s2_s3_s4 ) ); - right_s2_s3_s4 off3 ; + right_s2_s3_s4 off3; stride_s0_s0_s0 stride3( off3 ); - ASSERT_EQ( off3.stride_0() , 12 ); - ASSERT_EQ( off3.stride_1() , 4 ); - ASSERT_EQ( off3.stride_2() , 1 ); + ASSERT_EQ( off3.stride_0(), 12 ); + ASSERT_EQ( off3.stride_1(), 4 ); + ASSERT_EQ( off3.stride_2(), 1 ); - ASSERT_EQ( off3.dimension_0() , stride3.dimension_0() ); - ASSERT_EQ( off3.dimension_1() , stride3.dimension_1() ); - ASSERT_EQ( off3.dimension_2() , stride3.dimension_2() ); - ASSERT_EQ( off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( off3.stride_2() , stride3.stride_2() ); - ASSERT_EQ( off3.span() , stride3.span() ); + ASSERT_EQ( off3.dimension_0(), stride3.dimension_0() ); + ASSERT_EQ( off3.dimension_1(), stride3.dimension_1() ); + ASSERT_EQ( off3.dimension_2(), stride3.dimension_2() ); + ASSERT_EQ( off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( off3.stride_2(), stride3.stride_2() ); + ASSERT_EQ( off3.span(), stride3.span() ); - int offset = 0 ; + int offset = 0; - for ( int i = 0 ; i < 2 ; ++i ){ - for ( int j = 0 ; j < 3 ; ++j ){ - for ( int k = 0 ; k < 4 ; ++k , ++offset ){ - ASSERT_EQ( off3(i,j,k) , offset ); - ASSERT_EQ( off3(i,j,k) , stride3(i,j,k) ); - }}} + for ( int i = 0; i < 2; ++i ) + for ( int j = 0; j < 3; ++j ) + for ( int k = 0; k < 4; ++k, ++offset ) + { + ASSERT_EQ( off3( i, j, k ), offset ); + ASSERT_EQ( off3( i, j, k ), stride3( i, j, k ) ); + } - ASSERT_EQ( off3.span() , offset ); + ASSERT_EQ( off3.span(), offset ); } //---------------------------------------- - // Small dimension is unpadded + // Small dimension is unpadded. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; - right_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + right_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutRight( 2, 3, 0, 0, 0, 0, 0, 0 ) ); stride_s0_s0_s0 stride3( dyn_off3 ); - ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N0 , 2 ); - ASSERT_EQ( dyn_off3.m_dim.N1 , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); - ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); - ASSERT_EQ( dyn_off3.size() , 2 * 3 * 4 ); - - ASSERT_EQ( dyn_off3.dimension_0() , stride3.dimension_0() ); - ASSERT_EQ( dyn_off3.dimension_1() , stride3.dimension_1() ); - ASSERT_EQ( dyn_off3.dimension_2() , stride3.dimension_2() ); - ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); - ASSERT_EQ( dyn_off3.span() , stride3.span() ); - - int offset = 0 ; - - for ( int i = 0 ; i < 2 ; ++i ){ - for ( int j = 0 ; j < 3 ; ++j ){ - for ( int k = 0 ; k < 4 ; ++k , ++offset ){ - ASSERT_EQ( offset , dyn_off3(i,j,k) ); - ASSERT_EQ( dyn_off3(i,j,k) , stride3(i,j,k) ); - }}} - - ASSERT_EQ( dyn_off3.span() , offset ); + ASSERT_EQ( dyn_off3.m_dim.rank, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0, 2 ); + ASSERT_EQ( dyn_off3.m_dim.N1, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N2, 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3, 1 ); + ASSERT_EQ( dyn_off3.size(), 2 * 3 * 4 ); + + ASSERT_EQ( dyn_off3.dimension_0(), stride3.dimension_0() ); + ASSERT_EQ( dyn_off3.dimension_1(), stride3.dimension_1() ); + ASSERT_EQ( dyn_off3.dimension_2(), stride3.dimension_2() ); + ASSERT_EQ( dyn_off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2(), stride3.stride_2() ); + ASSERT_EQ( dyn_off3.span(), stride3.span() ); + + int offset = 0; + + for ( int i = 0; i < 2; ++i ) + for ( int j = 0; j < 3; ++j ) + for ( int k = 0; k < 4; ++k, ++offset ) + { + ASSERT_EQ( offset, dyn_off3( i, j, k ) ); + ASSERT_EQ( dyn_off3( i, j, k ), stride3( i, j, k ) ); + } + + ASSERT_EQ( dyn_off3.span(), offset ); } - // Large dimension is likely padded + //---------------------------------------- + // Large dimension is likely padded. { - constexpr int N0 = 2000 ; - constexpr int N1 = 300 ; + constexpr int N0 = 2000; + constexpr int N1 = 300; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; - right_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + right_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutRight( N0, N1, 0, 0, 0, 0, 0, 0 ) ); stride_s0_s0_s0 stride3( dyn_off3 ); - ASSERT_EQ( dyn_off3.m_dim.rank , 3 ); - ASSERT_EQ( dyn_off3.m_dim.N0 , N0 ); - ASSERT_EQ( dyn_off3.m_dim.N1 , N1 ); - ASSERT_EQ( dyn_off3.m_dim.N2 , 4 ); - ASSERT_EQ( dyn_off3.m_dim.N3 , 1 ); - ASSERT_EQ( dyn_off3.size() , N0 * N1 * 4 ); - - ASSERT_EQ( dyn_off3.dimension_0() , stride3.dimension_0() ); - ASSERT_EQ( dyn_off3.dimension_1() , stride3.dimension_1() ); - ASSERT_EQ( dyn_off3.dimension_2() , stride3.dimension_2() ); - ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); - ASSERT_EQ( dyn_off3.span() , stride3.span() ); - - int offset = 0 ; - - for ( int i = 0 ; i < N0 ; ++i ){ - for ( int j = 0 ; j < N1 ; ++j ){ - for ( int k = 0 ; k < 4 ; ++k ){ - ASSERT_LE( offset , dyn_off3(i,j,k) ); - ASSERT_EQ( dyn_off3(i,j,k) , stride3(i,j,k) ); - offset = dyn_off3(i,j,k) + 1 ; - }}} - - ASSERT_LE( offset , dyn_off3.span() ); + ASSERT_EQ( dyn_off3.m_dim.rank, 3 ); + ASSERT_EQ( dyn_off3.m_dim.N0, N0 ); + ASSERT_EQ( dyn_off3.m_dim.N1, N1 ); + ASSERT_EQ( dyn_off3.m_dim.N2, 4 ); + ASSERT_EQ( dyn_off3.m_dim.N3, 1 ); + ASSERT_EQ( dyn_off3.size(), N0 * N1 * 4 ); + + ASSERT_EQ( dyn_off3.dimension_0(), stride3.dimension_0() ); + ASSERT_EQ( dyn_off3.dimension_1(), stride3.dimension_1() ); + ASSERT_EQ( dyn_off3.dimension_2(), stride3.dimension_2() ); + ASSERT_EQ( dyn_off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2(), stride3.stride_2() ); + ASSERT_EQ( dyn_off3.span(), stride3.span() ); + + int offset = 0; + + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < 4; ++k ) + { + ASSERT_LE( offset, dyn_off3( i, j, k ) ); + ASSERT_EQ( dyn_off3( i, j, k ), stride3( i, j, k ) ); + offset = dyn_off3( i, j, k ) + 1; + } + + ASSERT_LE( offset, dyn_off3.span() ); } //---------------------------------------- - // Subview + // Subview. { // Mapping rank 4 to rank 3 - typedef Kokkos::Experimental::Impl::SubviewExtents<4,3> SubviewExtents ; + typedef Kokkos::Experimental::Impl::SubviewExtents< 4, 3 > SubviewExtents; - constexpr int N0 = 1000 ; - constexpr int N1 = 2000 ; - constexpr int N2 = 3000 ; - constexpr int N3 = 4000 ; + constexpr int N0 = 1000; + constexpr int N1 = 2000; + constexpr int N2 = 3000; + constexpr int N3 = 4000; - Kokkos::Experimental::Impl::ViewDimension<N0,N1,N2,N3> dim ; + Kokkos::Experimental::Impl::ViewDimension< N0, N1, N2, N3 > dim; SubviewExtents tmp( dim , N0 / 2 , Kokkos::Experimental::ALL - , std::pair<int,int>( N2 / 4 , 10 + N2 / 4 ) - , Kokkos::pair<int,int>( N3 / 4 , 20 + N3 / 4 ) + , std::pair< int, int >( N2 / 4, 10 + N2 / 4 ) + , Kokkos::pair< int, int >( N3 / 4, 20 + N3 / 4 ) ); - ASSERT_EQ( tmp.domain_offset(0) , N0 / 2 ); - ASSERT_EQ( tmp.domain_offset(1) , 0 ); - ASSERT_EQ( tmp.domain_offset(2) , N2 / 4 ); - ASSERT_EQ( tmp.domain_offset(3) , N3 / 4 ); + ASSERT_EQ( tmp.domain_offset( 0 ), N0 / 2 ); + ASSERT_EQ( tmp.domain_offset( 1 ), 0 ); + ASSERT_EQ( tmp.domain_offset( 2 ), N2 / 4 ); + ASSERT_EQ( tmp.domain_offset( 3 ), N3 / 4 ); - ASSERT_EQ( tmp.range_index(0) , 1 ); - ASSERT_EQ( tmp.range_index(1) , 2 ); - ASSERT_EQ( tmp.range_index(2) , 3 ); + ASSERT_EQ( tmp.range_index( 0 ), 1 ); + ASSERT_EQ( tmp.range_index( 1 ), 2 ); + ASSERT_EQ( tmp.range_index( 2 ), 3 ); - ASSERT_EQ( tmp.range_extent(0) , N1 ); - ASSERT_EQ( tmp.range_extent(1) , 10 ); - ASSERT_EQ( tmp.range_extent(2) , 20 ); + ASSERT_EQ( tmp.range_extent( 0 ), N1 ); + ASSERT_EQ( tmp.range_extent( 1 ), 10 ); + ASSERT_EQ( tmp.range_extent( 2 ), 20 ); } - //---------------------------------------- + { - constexpr int N0 = 2000 ; - constexpr int N1 = 300 ; + constexpr int N0 = 2000; + constexpr int N1 = 300; - constexpr int sub_N0 = 1000 ; - constexpr int sub_N1 = 200 ; - constexpr int sub_N2 = 4 ; + constexpr int sub_N0 = 1000; + constexpr int sub_N1 = 200; + constexpr int sub_N2 = 4; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutLeft > left_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; - left_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + left_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutLeft( N0, N1, 0, 0, 0, 0, 0, 0 ) ); - Kokkos::Experimental::Impl::SubviewExtents< 3 , 3 > + Kokkos::Experimental::Impl::SubviewExtents< 3, 3 > sub( dyn_off3.m_dim - , Kokkos::pair<int,int>(0,sub_N0) - , Kokkos::pair<int,int>(0,sub_N1) - , Kokkos::pair<int,int>(0,sub_N2) + , Kokkos::pair< int, int >( 0, sub_N0 ) + , Kokkos::pair< int, int >( 0, sub_N1 ) + , Kokkos::pair< int, int >( 0, sub_N2 ) ); - stride_s0_s0_s0 stride3( dyn_off3 , sub ); + stride_s0_s0_s0 stride3( dyn_off3, sub ); - ASSERT_EQ( stride3.dimension_0() , sub_N0 ); - ASSERT_EQ( stride3.dimension_1() , sub_N1 ); - ASSERT_EQ( stride3.dimension_2() , sub_N2 ); - ASSERT_EQ( stride3.size() , sub_N0 * sub_N1 * sub_N2 ); + ASSERT_EQ( stride3.dimension_0(), sub_N0 ); + ASSERT_EQ( stride3.dimension_1(), sub_N1 ); + ASSERT_EQ( stride3.dimension_2(), sub_N2 ); + ASSERT_EQ( stride3.size(), sub_N0 * sub_N1 * sub_N2 ); - ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); - ASSERT_GE( dyn_off3.span() , stride3.span() ); + ASSERT_EQ( dyn_off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2(), stride3.stride_2() ); + ASSERT_GE( dyn_off3.span() , stride3.span() ); - for ( int k = 0 ; k < sub_N2 ; ++k ){ - for ( int j = 0 ; j < sub_N1 ; ++j ){ - for ( int i = 0 ; i < sub_N0 ; ++i ){ - ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); - }}} + for ( int k = 0; k < sub_N2; ++k ) + for ( int j = 0; j < sub_N1; ++j ) + for ( int i = 0; i < sub_N0; ++i ) + { + ASSERT_EQ( stride3( i, j, k ), dyn_off3( i, j, k ) ); + } } { - constexpr int N0 = 2000 ; - constexpr int N1 = 300 ; + constexpr int N0 = 2000; + constexpr int N1 = 300; - constexpr int sub_N0 = 1000 ; - constexpr int sub_N1 = 200 ; - constexpr int sub_N2 = 4 ; + constexpr int sub_N0 = 1000; + constexpr int sub_N1 = 200; + constexpr int sub_N2 = 4; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4 , Kokkos::LayoutRight > right_s0_s0_s4 ; + typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; - right_s0_s0_s4 dyn_off3( std::integral_constant<unsigned,sizeof(int)>() + right_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutRight( N0, N1, 0, 0, 0, 0, 0, 0 ) ); - Kokkos::Experimental::Impl::SubviewExtents< 3 , 3 > + Kokkos::Experimental::Impl::SubviewExtents< 3, 3 > sub( dyn_off3.m_dim - , Kokkos::pair<int,int>(0,sub_N0) - , Kokkos::pair<int,int>(0,sub_N1) - , Kokkos::pair<int,int>(0,sub_N2) + , Kokkos::pair< int, int >( 0, sub_N0 ) + , Kokkos::pair< int, int >( 0, sub_N1 ) + , Kokkos::pair< int, int >( 0, sub_N2 ) ); - stride_s0_s0_s0 stride3( dyn_off3 , sub ); + stride_s0_s0_s0 stride3( dyn_off3, sub ); - ASSERT_EQ( stride3.dimension_0() , sub_N0 ); - ASSERT_EQ( stride3.dimension_1() , sub_N1 ); - ASSERT_EQ( stride3.dimension_2() , sub_N2 ); - ASSERT_EQ( stride3.size() , sub_N0 * sub_N1 * sub_N2 ); + ASSERT_EQ( stride3.dimension_0(), sub_N0 ); + ASSERT_EQ( stride3.dimension_1(), sub_N1 ); + ASSERT_EQ( stride3.dimension_2(), sub_N2 ); + ASSERT_EQ( stride3.size(), sub_N0 * sub_N1 * sub_N2 ); - ASSERT_EQ( dyn_off3.stride_0() , stride3.stride_0() ); - ASSERT_EQ( dyn_off3.stride_1() , stride3.stride_1() ); - ASSERT_EQ( dyn_off3.stride_2() , stride3.stride_2() ); - ASSERT_GE( dyn_off3.span() , stride3.span() ); + ASSERT_EQ( dyn_off3.stride_0(), stride3.stride_0() ); + ASSERT_EQ( dyn_off3.stride_1(), stride3.stride_1() ); + ASSERT_EQ( dyn_off3.stride_2(), stride3.stride_2() ); + ASSERT_GE( dyn_off3.span() , stride3.span() ); - for ( int i = 0 ; i < sub_N0 ; ++i ){ - for ( int j = 0 ; j < sub_N1 ; ++j ){ - for ( int k = 0 ; k < sub_N2 ; ++k ){ - ASSERT_EQ( stride3(i,j,k) , dyn_off3(i,j,k) ); - }}} + for ( int i = 0; i < sub_N0; ++i ) + for ( int j = 0; j < sub_N1; ++j ) + for ( int k = 0; k < sub_N2; ++k ) + { + ASSERT_EQ( stride3( i, j, k ), dyn_off3( i, j, k ) ); + } } //---------------------------------------- - // view data analysis + // View data analysis. { - using namespace Kokkos::Experimental::Impl ; - static_assert( rank_dynamic<>::value == 0 , "" ); - static_assert( rank_dynamic<1>::value == 0 , "" ); - static_assert( rank_dynamic<0>::value == 1 , "" ); - static_assert( rank_dynamic<0,1>::value == 1 , "" ); - static_assert( rank_dynamic<0,0,1>::value == 2 , "" ); + using namespace Kokkos::Experimental::Impl; + + static_assert( rank_dynamic<>::value == 0, "" ); + static_assert( rank_dynamic< 1 >::value == 0, "" ); + static_assert( rank_dynamic< 0 >::value == 1, "" ); + static_assert( rank_dynamic< 0, 1 >::value == 1, "" ); + static_assert( rank_dynamic< 0, 0, 1 >::value == 2, "" ); } { - using namespace Kokkos::Experimental::Impl ; - - typedef ViewArrayAnalysis< int[] > a_int_r1 ; - typedef ViewArrayAnalysis< int**[4][5][6] > a_int_r5 ; - typedef ViewArrayAnalysis< const int[] > a_const_int_r1 ; - typedef ViewArrayAnalysis< const int**[4][5][6] > a_const_int_r5 ; - - static_assert( a_int_r1::dimension::rank == 1 , "" ); - static_assert( a_int_r1::dimension::rank_dynamic == 1 , "" ); - static_assert( a_int_r5::dimension::ArgN0 == 0 , "" ); - static_assert( a_int_r5::dimension::ArgN1 == 0 , "" ); - static_assert( a_int_r5::dimension::ArgN2 == 4 , "" ); - static_assert( a_int_r5::dimension::ArgN3 == 5 , "" ); - static_assert( a_int_r5::dimension::ArgN4 == 6 , "" ); - static_assert( a_int_r5::dimension::ArgN5 == 1 , "" ); - - static_assert( std::is_same< typename a_int_r1::dimension , ViewDimension<0> >::value , "" ); - static_assert( std::is_same< typename a_int_r1::non_const_value_type , int >::value , "" ); - - static_assert( a_const_int_r1::dimension::rank == 1 , "" ); - static_assert( a_const_int_r1::dimension::rank_dynamic == 1 , "" ); - static_assert( std::is_same< typename a_const_int_r1::dimension , ViewDimension<0> >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::non_const_value_type , int >::value , "" ); - - static_assert( a_const_int_r5::dimension::rank == 5 , "" ); - static_assert( a_const_int_r5::dimension::rank_dynamic == 2 , "" ); - - static_assert( a_const_int_r5::dimension::ArgN0 == 0 , "" ); - static_assert( a_const_int_r5::dimension::ArgN1 == 0 , "" ); - static_assert( a_const_int_r5::dimension::ArgN2 == 4 , "" ); - static_assert( a_const_int_r5::dimension::ArgN3 == 5 , "" ); - static_assert( a_const_int_r5::dimension::ArgN4 == 6 , "" ); - static_assert( a_const_int_r5::dimension::ArgN5 == 1 , "" ); - - static_assert( std::is_same< typename a_const_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" ); - static_assert( std::is_same< typename a_const_int_r5::non_const_value_type , int >::value , "" ); - - static_assert( a_int_r5::dimension::rank == 5 , "" ); - static_assert( a_int_r5::dimension::rank_dynamic == 2 , "" ); - static_assert( std::is_same< typename a_int_r5::dimension , ViewDimension<0,0,4,5,6> >::value , "" ); - static_assert( std::is_same< typename a_int_r5::non_const_value_type , int >::value , "" ); + using namespace Kokkos::Experimental::Impl; + + typedef ViewArrayAnalysis< int[] > a_int_r1; + typedef ViewArrayAnalysis< int**[4][5][6] > a_int_r5; + typedef ViewArrayAnalysis< const int[] > a_const_int_r1; + typedef ViewArrayAnalysis< const int**[4][5][6] > a_const_int_r5; + + static_assert( a_int_r1::dimension::rank == 1, "" ); + static_assert( a_int_r1::dimension::rank_dynamic == 1, "" ); + static_assert( a_int_r5::dimension::ArgN0 == 0, "" ); + static_assert( a_int_r5::dimension::ArgN1 == 0, "" ); + static_assert( a_int_r5::dimension::ArgN2 == 4, "" ); + static_assert( a_int_r5::dimension::ArgN3 == 5, "" ); + static_assert( a_int_r5::dimension::ArgN4 == 6, "" ); + static_assert( a_int_r5::dimension::ArgN5 == 1, "" ); + + static_assert( std::is_same< typename a_int_r1::dimension, ViewDimension<0> >::value, "" ); + static_assert( std::is_same< typename a_int_r1::non_const_value_type, int >::value, "" ); + + static_assert( a_const_int_r1::dimension::rank == 1, "" ); + static_assert( a_const_int_r1::dimension::rank_dynamic == 1, "" ); + static_assert( std::is_same< typename a_const_int_r1::dimension, ViewDimension<0> >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::non_const_value_type, int >::value, "" ); + + static_assert( a_const_int_r5::dimension::rank == 5, "" ); + static_assert( a_const_int_r5::dimension::rank_dynamic == 2, "" ); + + static_assert( a_const_int_r5::dimension::ArgN0 == 0, "" ); + static_assert( a_const_int_r5::dimension::ArgN1 == 0, "" ); + static_assert( a_const_int_r5::dimension::ArgN2 == 4, "" ); + static_assert( a_const_int_r5::dimension::ArgN3 == 5, "" ); + static_assert( a_const_int_r5::dimension::ArgN4 == 6, "" ); + static_assert( a_const_int_r5::dimension::ArgN5 == 1, "" ); + + static_assert( std::is_same< typename a_const_int_r5::dimension, ViewDimension<0, 0, 4, 5, 6> >::value, "" ); + static_assert( std::is_same< typename a_const_int_r5::non_const_value_type, int >::value, "" ); + + static_assert( a_int_r5::dimension::rank == 5, "" ); + static_assert( a_int_r5::dimension::rank_dynamic == 2, "" ); + static_assert( std::is_same< typename a_int_r5::dimension, ViewDimension<0, 0, 4, 5, 6> >::value, "" ); + static_assert( std::is_same< typename a_int_r5::non_const_value_type, int >::value, "" ); } { - using namespace Kokkos::Experimental::Impl ; + using namespace Kokkos::Experimental::Impl; - typedef int t_i4[4] ; + typedef int t_i4[4]; // Dimensions of t_i4 are appended to the multdimensional array. - typedef ViewArrayAnalysis< t_i4 ***[3] > a_int_r5 ; - - static_assert( a_int_r5::dimension::rank == 5 , "" ); - static_assert( a_int_r5::dimension::rank_dynamic == 3 , "" ); - static_assert( a_int_r5::dimension::ArgN0 == 0 , "" ); - static_assert( a_int_r5::dimension::ArgN1 == 0 , "" ); - static_assert( a_int_r5::dimension::ArgN2 == 0 , "" ); - static_assert( a_int_r5::dimension::ArgN3 == 3 , "" ); - static_assert( a_int_r5::dimension::ArgN4 == 4 , "" ); - static_assert( std::is_same< typename a_int_r5::non_const_value_type , int >::value , "" ); + typedef ViewArrayAnalysis< t_i4 ***[3] > a_int_r5; + + static_assert( a_int_r5::dimension::rank == 5, "" ); + static_assert( a_int_r5::dimension::rank_dynamic == 3, "" ); + static_assert( a_int_r5::dimension::ArgN0 == 0, "" ); + static_assert( a_int_r5::dimension::ArgN1 == 0, "" ); + static_assert( a_int_r5::dimension::ArgN2 == 0, "" ); + static_assert( a_int_r5::dimension::ArgN3 == 3, "" ); + static_assert( a_int_r5::dimension::ArgN4 == 4, "" ); + static_assert( std::is_same< typename a_int_r5::non_const_value_type, int >::value, "" ); } { - using namespace Kokkos::Experimental::Impl ; + using namespace Kokkos::Experimental::Impl; - typedef ViewDataAnalysis< const int[] , void > a_const_int_r1 ; + typedef ViewDataAnalysis< const int[], void > a_const_int_r1; - static_assert( std::is_same< typename a_const_int_r1::specialize , void >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::dimension , Kokkos::Experimental::Impl::ViewDimension<0> >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::specialize, void >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::dimension, Kokkos::Experimental::Impl::ViewDimension<0> >::value, "" ); - static_assert( std::is_same< typename a_const_int_r1::type , const int * >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::value_type , const int >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::type, const int * >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::value_type, const int >::value, "" ); - static_assert( std::is_same< typename a_const_int_r1::scalar_array_type , const int * >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::const_type , const int * >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::const_value_type , const int >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::const_scalar_array_type , const int * >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::non_const_type , int * >::value , "" ); - static_assert( std::is_same< typename a_const_int_r1::non_const_value_type , int >::value , "" ); + static_assert( std::is_same< typename a_const_int_r1::scalar_array_type, const int * >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::const_type, const int * >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::const_value_type, const int >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::const_scalar_array_type, const int * >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::non_const_type, int * >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::non_const_value_type, int >::value, "" ); - typedef ViewDataAnalysis< const int**[4] , void > a_const_int_r3 ; + typedef ViewDataAnalysis< const int**[4], void > a_const_int_r3; - static_assert( std::is_same< typename a_const_int_r3::specialize , void >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::specialize, void >::value, "" ); - static_assert( std::is_same< typename a_const_int_r3::dimension , Kokkos::Experimental::Impl::ViewDimension<0,0,4> >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::dimension, Kokkos::Experimental::Impl::ViewDimension<0, 0, 4> >::value, "" ); - static_assert( std::is_same< typename a_const_int_r3::type , const int**[4] >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::value_type , const int >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::scalar_array_type , const int**[4] >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::const_type , const int**[4] >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::const_value_type , const int >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::const_scalar_array_type , const int**[4] >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::non_const_type , int**[4] >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::non_const_value_type , int >::value , "" ); - static_assert( std::is_same< typename a_const_int_r3::non_const_scalar_array_type , int**[4] >::value , "" ); + static_assert( std::is_same< typename a_const_int_r3::type, const int**[4] >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::value_type, const int >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::scalar_array_type, const int**[4] >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::const_type, const int**[4] >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::const_value_type, const int >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::const_scalar_array_type, const int**[4] >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::non_const_type, int**[4] >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::non_const_value_type, int >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::non_const_scalar_array_type, int**[4] >::value, "" ); - - // std::cout << "typeid(const int**[4]).name() = " << typeid(const int**[4]).name() << std::endl ; + // std::cout << "typeid( const int**[4] ).name() = " << typeid( const int**[4] ).name() << std::endl; } //---------------------------------------- { - constexpr int N = 10 ; + constexpr int N = 10; - typedef Kokkos::View<int*,Space> T ; - typedef Kokkos::View<const int*,Space> C ; + typedef Kokkos::View< int*, Space > T; + typedef Kokkos::View< const int*, Space > C; - int data[N] ; + int data[N]; - T vr1(data,N); // view of non-const - C cr1(vr1); // view of const from view of non-const - C cr2( (const int *) data , N ); + T vr1( data, N ); // View of non-const. + C cr1( vr1 ); // View of const from view of non-const. + C cr2( (const int *) data, N ); // Generate static_assert error: // T tmp( cr1 ); - ASSERT_EQ( vr1.span() , N ); - ASSERT_EQ( cr1.span() , N ); - ASSERT_EQ( vr1.data() , & data[0] ); - ASSERT_EQ( cr1.data() , & data[0] ); + ASSERT_EQ( vr1.span(), N ); + ASSERT_EQ( cr1.span(), N ); + ASSERT_EQ( vr1.data(), & data[0] ); + ASSERT_EQ( cr1.data(), & data[0] ); - ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::scalar_array_type , int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_scalar_array_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_scalar_array_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type, int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::memory_space , typename Space::memory_space >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::reference_type , int & >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::memory_space, typename Space::memory_space >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::reference_type, int & >::value ) ); - ASSERT_EQ( T::Rank , 1 ); + ASSERT_EQ( T::Rank, 1 ); - ASSERT_TRUE( ( std::is_same< typename C::data_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::const_data_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::non_const_data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::const_data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::non_const_data_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::scalar_array_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::const_scalar_array_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::non_const_scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::const_scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::non_const_scalar_array_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::value_type , const int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::const_value_type , const int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::non_const_value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::const_value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::non_const_value_type, int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::memory_space , typename Space::memory_space >::value ) ); - ASSERT_TRUE( ( std::is_same< typename C::reference_type , const int & >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::memory_space, typename Space::memory_space >::value ) ); + ASSERT_TRUE( ( std::is_same< typename C::reference_type, const int & >::value ) ); - ASSERT_EQ( C::Rank , 1 ); + ASSERT_EQ( C::Rank, 1 ); - ASSERT_EQ( vr1.dimension_0() , N ); + ASSERT_EQ( vr1.dimension_0(), N ); - if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , typename Space::memory_space >::accessible ) { - for ( int i = 0 ; i < N ; ++i ) data[i] = i + 1 ; - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 ); - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 ); + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + for ( int i = 0; i < N; ++i ) data[i] = i + 1; + for ( int i = 0; i < N; ++i ) ASSERT_EQ( vr1[i], i + 1 ); + for ( int i = 0; i < N; ++i ) ASSERT_EQ( cr1[i], i + 1 ); { T tmp( vr1 ); - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 1 ); - for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 2 ; - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 2 ); + + for ( int i = 0; i < N; ++i ) ASSERT_EQ( tmp[i], i + 1 ); + for ( int i = 0; i < N; ++i ) vr1( i ) = i + 2; + for ( int i = 0; i < N; ++i ) ASSERT_EQ( tmp[i], i + 2 ); } - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 2 ); + for ( int i = 0; i < N; ++i ) ASSERT_EQ( vr1[i], i + 2 ); } } - { - constexpr int N = 10 ; - typedef Kokkos::View<int*,Space> T ; - typedef Kokkos::View<const int*,Space> C ; + constexpr int N = 10; + typedef Kokkos::View< int*, Space > T; + typedef Kokkos::View< const int*, Space > C; + + T vr1( "vr1", N ); + C cr1( vr1 ); - T vr1("vr1",N); - C cr1(vr1); + ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::data_type , int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_data_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_data_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_scalar_array_type , const int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_scalar_array_type, int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::scalar_array_type , int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_scalar_array_type , const int* >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_scalar_array_type , int* >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type, int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::value_type , int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::const_value_type , const int >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::non_const_value_type , int >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::memory_space, typename Space::memory_space >::value ) ); + ASSERT_TRUE( ( std::is_same< typename T::reference_type, int & >::value ) ); + ASSERT_EQ( T::Rank, 1 ); - ASSERT_TRUE( ( std::is_same< typename T::memory_space , typename Space::memory_space >::value ) ); - ASSERT_TRUE( ( std::is_same< typename T::reference_type , int & >::value ) ); - ASSERT_EQ( T::Rank , 1 ); - - ASSERT_EQ( vr1.dimension_0() , N ); + ASSERT_EQ( vr1.dimension_0(), N ); - if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , typename Space::memory_space >::accessible ) { - for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 1 ; - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 1 ); - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( cr1[i] , i + 1 ); + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + for ( int i = 0; i < N; ++i ) vr1( i ) = i + 1; + for ( int i = 0; i < N; ++i ) ASSERT_EQ( vr1[i], i + 1 ); + for ( int i = 0; i < N; ++i ) ASSERT_EQ( cr1[i], i + 1 ); { T tmp( vr1 ); - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 1 ); - for ( int i = 0 ; i < N ; ++i ) vr1(i) = i + 2 ; - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( tmp[i] , i + 2 ); + for ( int i = 0; i < N; ++i ) ASSERT_EQ( tmp[i], i + 1 ); + for ( int i = 0; i < N; ++i ) vr1( i ) = i + 2; + for ( int i = 0; i < N; ++i ) ASSERT_EQ( tmp[i], i + 2 ); } - for ( int i = 0 ; i < N ; ++i ) ASSERT_EQ( vr1[i] , i + 2 ); + for ( int i = 0; i < N; ++i ) ASSERT_EQ( vr1[i], i + 2 ); } } - // Testing proper handling of zero-length allocations + // Testing proper handling of zero-length allocations. { - constexpr int N = 0 ; - typedef Kokkos::View<int*,Space> T ; - typedef Kokkos::View<const int*,Space> C ; + constexpr int N = 0; + typedef Kokkos::View< int*, Space > T; + typedef Kokkos::View< const int*, Space > C; - T vr1("vr1",N); - C cr1(vr1); + T vr1( "vr1", N ); + C cr1( vr1 ); - ASSERT_EQ( vr1.dimension_0() , 0 ); - ASSERT_EQ( cr1.dimension_0() , 0 ); + ASSERT_EQ( vr1.dimension_0(), 0 ); + ASSERT_EQ( cr1.dimension_0(), 0 ); } - // Testing using space instance for allocation. - // The execution space of the memory space must be available for view data initialization - - if ( std::is_same< ExecSpace , typename ExecSpace::memory_space::execution_space >::value ) { - - using namespace Kokkos::Experimental ; - - typedef typename ExecSpace::memory_space memory_space ; - typedef View<int*,memory_space> V ; - - constexpr int N = 10 ; - - memory_space mem_space ; - - V v( "v" , N ); - V va( view_alloc() , N ); - V vb( view_alloc( "vb" ) , N ); - V vc( view_alloc( "vc" , AllowPadding ) , N ); - V vd( view_alloc( "vd" , WithoutInitializing ) , N ); - V ve( view_alloc( "ve" , WithoutInitializing , AllowPadding ) , N ); - V vf( view_alloc( "vf" , mem_space , WithoutInitializing , AllowPadding ) , N ); - V vg( view_alloc( mem_space , "vg" , WithoutInitializing , AllowPadding ) , N ); - V vh( view_alloc( WithoutInitializing , AllowPadding ) , N ); - V vi( view_alloc( WithoutInitializing ) , N ); - V vj( view_alloc( std::string("vj") , AllowPadding ) , N ); - V vk( view_alloc( mem_space , std::string("vk") , AllowPadding ) , N ); + // The execution space of the memory space must be available for view data initialization. + if ( std::is_same< ExecSpace, typename ExecSpace::memory_space::execution_space >::value ) { + + using namespace Kokkos::Experimental; + + typedef typename ExecSpace::memory_space memory_space; + typedef View< int*, memory_space > V; + + constexpr int N = 10; + + memory_space mem_space; + + V v( "v", N ); + V va( view_alloc(), N ); + V vb( view_alloc( "vb" ), N ); + V vc( view_alloc( "vc", AllowPadding ), N ); + V vd( view_alloc( "vd", WithoutInitializing ), N ); + V ve( view_alloc( "ve", WithoutInitializing, AllowPadding ), N ); + V vf( view_alloc( "vf", mem_space, WithoutInitializing, AllowPadding ), N ); + V vg( view_alloc( mem_space, "vg", WithoutInitializing, AllowPadding ), N ); + V vh( view_alloc( WithoutInitializing, AllowPadding ), N ); + V vi( view_alloc( WithoutInitializing ), N ); + V vj( view_alloc( std::string( "vj" ), AllowPadding ), N ); + V vk( view_alloc( mem_space, std::string( "vk" ), AllowPadding ), N ); } { - typedef Kokkos::ViewTraits<int***,Kokkos::LayoutStride,ExecSpace> traits_t ; - typedef Kokkos::Experimental::Impl::ViewDimension<0,0,0> dims_t ; - typedef Kokkos::Experimental::Impl::ViewOffset< dims_t , Kokkos::LayoutStride > offset_t ; + typedef Kokkos::ViewTraits< int***, Kokkos::LayoutStride, ExecSpace > traits_t; + typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0 > dims_t; + typedef Kokkos::Experimental::Impl::ViewOffset< dims_t, Kokkos::LayoutStride > offset_t; - Kokkos::LayoutStride stride ; + Kokkos::LayoutStride stride; - stride.dimension[0] = 3 ; - stride.dimension[1] = 4 ; - stride.dimension[2] = 5 ; - stride.stride[0] = 4 ; - stride.stride[1] = 1 ; - stride.stride[2] = 12 ; + stride.dimension[0] = 3; + stride.dimension[1] = 4; + stride.dimension[2] = 5; + stride.stride[0] = 4; + stride.stride[1] = 1; + stride.stride[2] = 12; - const offset_t offset( std::integral_constant<unsigned,0>() , stride ); + const offset_t offset( std::integral_constant< unsigned, 0 >(), stride ); - ASSERT_EQ( offset.dimension_0() , 3 ); - ASSERT_EQ( offset.dimension_1() , 4 ); - ASSERT_EQ( offset.dimension_2() , 5 ); + ASSERT_EQ( offset.dimension_0(), 3 ); + ASSERT_EQ( offset.dimension_1(), 4 ); + ASSERT_EQ( offset.dimension_2(), 5 ); - ASSERT_EQ( offset.stride_0() , 4 ); - ASSERT_EQ( offset.stride_1() , 1 ); - ASSERT_EQ( offset.stride_2() , 12 ); + ASSERT_EQ( offset.stride_0(), 4 ); + ASSERT_EQ( offset.stride_1(), 1 ); + ASSERT_EQ( offset.stride_2(), 12 ); - ASSERT_EQ( offset.span() , 60 ); + ASSERT_EQ( offset.span(), 60 ); ASSERT_TRUE( offset.span_is_contiguous() ); - Kokkos::Experimental::Impl::ViewMapping< traits_t , void > - v( Kokkos::Experimental::Impl::ViewCtorProp<int*>((int*)0), stride ); + Kokkos::Experimental::Impl::ViewMapping< traits_t, void > + v( Kokkos::Experimental::Impl::ViewCtorProp< int* >( (int*) 0 ), stride ); } { - typedef Kokkos::View<int**,Space> V ; - typedef typename V::HostMirror M ; - typedef typename Kokkos::View<int**,Space>::array_layout layout_type; + typedef Kokkos::View< int**, Space > V; + typedef typename V::HostMirror M; + typedef typename Kokkos::View< int**, Space >::array_layout layout_type; - constexpr int N0 = 10 ; - constexpr int N1 = 11 ; + constexpr int N0 = 10; + constexpr int N1 = 11; - V a("a",N0,N1); - M b = Kokkos::Experimental::create_mirror(a); - M c = Kokkos::Experimental::create_mirror_view(a); - M d ; + V a( "a", N0, N1 ); + M b = Kokkos::Experimental::create_mirror( a ); + M c = Kokkos::Experimental::create_mirror_view( a ); + M d; - for ( int i0 = 0 ; i0 < N0 ; ++i0 ) - for ( int i1 = 0 ; i1 < N1 ; ++i1 ) - b(i0,i1) = 1 + i0 + i1 * N0 ; + for ( int i0 = 0; i0 < N0; ++i0 ) + for ( int i1 = 0; i1 < N1; ++i1 ) + { + b( i0, i1 ) = 1 + i0 + i1 * N0; + } - Kokkos::Experimental::deep_copy( a , b ); - Kokkos::Experimental::deep_copy( c , a ); + Kokkos::Experimental::deep_copy( a, b ); + Kokkos::Experimental::deep_copy( c, a ); - for ( int i0 = 0 ; i0 < N0 ; ++i0 ) - for ( int i1 = 0 ; i1 < N1 ; ++i1 ) - ASSERT_EQ( b(i0,i1) , c(i0,i1) ); + for ( int i0 = 0; i0 < N0; ++i0 ) + for ( int i1 = 0; i1 < N1; ++i1 ) + { + ASSERT_EQ( b( i0, i1 ), c( i0, i1 ) ); + } - Kokkos::Experimental::resize( b , 5 , 6 ); + Kokkos::Experimental::resize( b, 5, 6 ); - for ( int i0 = 0 ; i0 < 5 ; ++i0 ) - for ( int i1 = 0 ; i1 < 6 ; ++i1 ) { + for ( int i0 = 0; i0 < 5; ++i0 ) + for ( int i1 = 0; i1 < 6; ++i1 ) + { int val = 1 + i0 + i1 * N0; - ASSERT_EQ( b(i0,i1) , c(i0,i1) ); - ASSERT_EQ( b(i0,i1) , val ); + ASSERT_EQ( b( i0, i1 ), c( i0, i1 ) ); + ASSERT_EQ( b( i0, i1 ), val ); } - Kokkos::Experimental::realloc( c , 5 , 6 ); - Kokkos::Experimental::realloc( d , 5 , 6 ); + Kokkos::Experimental::realloc( c, 5, 6 ); + Kokkos::Experimental::realloc( d, 5, 6 ); - ASSERT_EQ( b.dimension_0() , 5 ); - ASSERT_EQ( b.dimension_1() , 6 ); - ASSERT_EQ( c.dimension_0() , 5 ); - ASSERT_EQ( c.dimension_1() , 6 ); - ASSERT_EQ( d.dimension_0() , 5 ); - ASSERT_EQ( d.dimension_1() , 6 ); + ASSERT_EQ( b.dimension_0(), 5 ); + ASSERT_EQ( b.dimension_1(), 6 ); + ASSERT_EQ( c.dimension_0(), 5 ); + ASSERT_EQ( c.dimension_1(), 6 ); + ASSERT_EQ( d.dimension_0(), 5 ); + ASSERT_EQ( d.dimension_1(), 6 ); - layout_type layout(7,8); - Kokkos::Experimental::resize( b , layout ); - for ( int i0 = 0 ; i0 < 7 ; ++i0 ) - for ( int i1 = 6 ; i1 < 8 ; ++i1 ) - b(i0,i1) = 1 + i0 + i1 * N0 ; + layout_type layout( 7, 8 ); + Kokkos::Experimental::resize( b, layout ); + for ( int i0 = 0; i0 < 7; ++i0 ) + for ( int i1 = 6; i1 < 8; ++i1 ) + { + b( i0, i1 ) = 1 + i0 + i1 * N0; + } - for ( int i0 = 5 ; i0 < 7 ; ++i0 ) - for ( int i1 = 0 ; i1 < 8 ; ++i1 ) - b(i0,i1) = 1 + i0 + i1 * N0 ; + for ( int i0 = 5; i0 < 7; ++i0 ) + for ( int i1 = 0; i1 < 8; ++i1 ) + { + b( i0, i1 ) = 1 + i0 + i1 * N0; + } - for ( int i0 = 0 ; i0 < 7 ; ++i0 ) - for ( int i1 = 0 ; i1 < 8 ; ++i1 ) { + for ( int i0 = 0; i0 < 7; ++i0 ) + for ( int i1 = 0; i1 < 8; ++i1 ) + { int val = 1 + i0 + i1 * N0; - ASSERT_EQ( b(i0,i1) , val ); + ASSERT_EQ( b( i0, i1 ), val ); } - Kokkos::Experimental::realloc( c , layout ); - Kokkos::Experimental::realloc( d , layout ); - - ASSERT_EQ( b.dimension_0() , 7 ); - ASSERT_EQ( b.dimension_1() , 8 ); - ASSERT_EQ( c.dimension_0() , 7 ); - ASSERT_EQ( c.dimension_1() , 8 ); - ASSERT_EQ( d.dimension_0() , 7 ); - ASSERT_EQ( d.dimension_1() , 8 ); + Kokkos::Experimental::realloc( c, layout ); + Kokkos::Experimental::realloc( d, layout ); + ASSERT_EQ( b.dimension_0(), 7 ); + ASSERT_EQ( b.dimension_1(), 8 ); + ASSERT_EQ( c.dimension_0(), 7 ); + ASSERT_EQ( c.dimension_1(), 8 ); + ASSERT_EQ( d.dimension_0(), 7 ); + ASSERT_EQ( d.dimension_1(), 8 ); } { - typedef Kokkos::View<int**,Kokkos::LayoutStride,Space> V ; - typedef typename V::HostMirror M ; - typedef typename Kokkos::View<int**,Kokkos::LayoutStride,Space>::array_layout layout_type; + typedef Kokkos::View< int**, Kokkos::LayoutStride, Space > V; + typedef typename V::HostMirror M; + typedef typename Kokkos::View< int**, Kokkos::LayoutStride, Space >::array_layout layout_type; - constexpr int N0 = 10 ; - constexpr int N1 = 11 ; + constexpr int N0 = 10; + constexpr int N1 = 11; - const int dimensions[] = {N0,N1}; - const int order[] = {1,0}; + const int dimensions[] = { N0, N1 }; + const int order[] = { 1, 0 }; - V a("a",Kokkos::LayoutStride::order_dimensions(2,order,dimensions)); - M b = Kokkos::Experimental::create_mirror(a); - M c = Kokkos::Experimental::create_mirror_view(a); - M d ; + V a( "a", Kokkos::LayoutStride::order_dimensions( 2, order, dimensions ) ); + M b = Kokkos::Experimental::create_mirror( a ); + M c = Kokkos::Experimental::create_mirror_view( a ); + M d; - for ( int i0 = 0 ; i0 < N0 ; ++i0 ) - for ( int i1 = 0 ; i1 < N1 ; ++i1 ) - b(i0,i1) = 1 + i0 + i1 * N0 ; + for ( int i0 = 0; i0 < N0; ++i0 ) + for ( int i1 = 0; i1 < N1; ++i1 ) + { + b( i0, i1 ) = 1 + i0 + i1 * N0; + } - Kokkos::Experimental::deep_copy( a , b ); - Kokkos::Experimental::deep_copy( c , a ); + Kokkos::Experimental::deep_copy( a, b ); + Kokkos::Experimental::deep_copy( c, a ); - for ( int i0 = 0 ; i0 < N0 ; ++i0 ) - for ( int i1 = 0 ; i1 < N1 ; ++i1 ) - ASSERT_EQ( b(i0,i1) , c(i0,i1) ); + for ( int i0 = 0; i0 < N0; ++i0 ) + for ( int i1 = 0; i1 < N1; ++i1 ) + { + ASSERT_EQ( b( i0, i1 ), c( i0, i1 ) ); + } - const int dimensions2[] = {7,8}; - const int order2[] = {1,0}; - layout_type layout = layout_type::order_dimensions(2,order2,dimensions2); - Kokkos::Experimental::resize( b , layout ); + const int dimensions2[] = { 7, 8 }; + const int order2[] = { 1, 0 }; + layout_type layout = layout_type::order_dimensions( 2, order2, dimensions2 ); + Kokkos::Experimental::resize( b, layout ); - for ( int i0 = 0 ; i0 < 7 ; ++i0 ) - for ( int i1 = 0 ; i1 < 8 ; ++i1 ) { + for ( int i0 = 0; i0 < 7; ++i0 ) + for ( int i1 = 0; i1 < 8; ++i1 ) + { int val = 1 + i0 + i1 * N0; - ASSERT_EQ( b(i0,i1) , c(i0,i1) ); - ASSERT_EQ( b(i0,i1) , val ); + ASSERT_EQ( b( i0, i1 ), c( i0, i1 ) ); + ASSERT_EQ( b( i0, i1 ), val ); } - Kokkos::Experimental::realloc( c , layout ); - Kokkos::Experimental::realloc( d , layout ); + Kokkos::Experimental::realloc( c, layout ); + Kokkos::Experimental::realloc( d, layout ); - ASSERT_EQ( b.dimension_0() , 7 ); - ASSERT_EQ( b.dimension_1() , 8 ); - ASSERT_EQ( c.dimension_0() , 7 ); - ASSERT_EQ( c.dimension_1() , 8 ); - ASSERT_EQ( d.dimension_0() , 7 ); - ASSERT_EQ( d.dimension_1() , 8 ); + ASSERT_EQ( b.dimension_0(), 7 ); + ASSERT_EQ( b.dimension_1(), 8 ); + ASSERT_EQ( c.dimension_0(), 7 ); + ASSERT_EQ( c.dimension_1(), 8 ); + ASSERT_EQ( d.dimension_0(), 7 ); + ASSERT_EQ( d.dimension_1(), 8 ); } { - typedef Kokkos::View<int*,Space> V ; - typedef Kokkos::View<int*,Space,Kokkos::MemoryUnmanaged> U ; + typedef Kokkos::View< int*, Space > V; + typedef Kokkos::View< int*, Space, Kokkos::MemoryUnmanaged > U; + V a( "a", 10 ); - V a("a",10); + ASSERT_EQ( a.use_count(), 1 ); - ASSERT_EQ( a.use_count() , 1 ); + V b = a; - V b = a ; - - ASSERT_EQ( a.use_count() , 2 ); - ASSERT_EQ( b.use_count() , 2 ); + ASSERT_EQ( a.use_count(), 2 ); + ASSERT_EQ( b.use_count(), 2 ); { - U c = b ; // 'c' is compile-time unmanaged + U c = b; // 'c' is compile-time unmanaged. - ASSERT_EQ( a.use_count() , 2 ); - ASSERT_EQ( b.use_count() , 2 ); - ASSERT_EQ( c.use_count() , 2 ); + ASSERT_EQ( a.use_count(), 2 ); + ASSERT_EQ( b.use_count(), 2 ); + ASSERT_EQ( c.use_count(), 2 ); - V d = c ; // 'd' is run-time unmanaged + V d = c; // 'd' is run-time unmanaged. - ASSERT_EQ( a.use_count() , 2 ); - ASSERT_EQ( b.use_count() , 2 ); - ASSERT_EQ( c.use_count() , 2 ); - ASSERT_EQ( d.use_count() , 2 ); + ASSERT_EQ( a.use_count(), 2 ); + ASSERT_EQ( b.use_count(), 2 ); + ASSERT_EQ( c.use_count(), 2 ); + ASSERT_EQ( d.use_count(), 2 ); } - ASSERT_EQ( a.use_count() , 2 ); - ASSERT_EQ( b.use_count() , 2 ); + ASSERT_EQ( a.use_count(), 2 ); + ASSERT_EQ( b.use_count(), 2 ); b = V(); - ASSERT_EQ( a.use_count() , 1 ); - ASSERT_EQ( b.use_count() , 0 ); - -#if ! defined ( KOKKOS_ENABLE_CUDA_LAMBDA ) - /* Cannot launch host lambda when CUDA lambda is enabled */ - - typedef typename Kokkos::Impl::HostMirror< Space >::Space::execution_space - host_exec_space ; - - Kokkos::parallel_for( - Kokkos::RangePolicy< host_exec_space >(0,10) , - KOKKOS_LAMBDA( int i ){ - // 'a' is captured by copy and the capture mechanism - // converts 'a' to an unmanaged copy. - // When the parallel dispatch accepts a move for the lambda - // this count should become 1 - ASSERT_EQ( a.use_count() , 2 ); - V x = a ; - ASSERT_EQ( a.use_count() , 2 ); - ASSERT_EQ( x.use_count() , 2 ); - }); -#endif /* #if ! defined ( KOKKOS_ENABLE_CUDA_LAMBDA ) */ + ASSERT_EQ( a.use_count(), 1 ); + ASSERT_EQ( b.use_count(), 0 ); + +#if !defined( KOKKOS_ENABLE_CUDA_LAMBDA ) + // Cannot launch host lambda when CUDA lambda is enabled. + + typedef typename Kokkos::Impl::HostMirror< Space >::Space::execution_space host_exec_space; + + Kokkos::parallel_for( Kokkos::RangePolicy< host_exec_space >( 0, 10 ), KOKKOS_LAMBDA ( int i ) { + // 'a' is captured by copy, and the capture mechanism converts 'a' to an + // unmanaged copy. When the parallel dispatch accepts a move for the + // lambda, this count should become 1. + ASSERT_EQ( a.use_count(), 2 ); + V x = a; + ASSERT_EQ( a.use_count(), 2 ); + ASSERT_EQ( x.use_count(), 2 ); + }); +#endif // #if !defined( KOKKOS_ENABLE_CUDA_LAMBDA ) } } template< class Space > struct TestViewMappingSubview { - typedef typename Space::execution_space ExecSpace ; - typedef typename Space::memory_space MemSpace ; + typedef typename Space::execution_space ExecSpace; + typedef typename Space::memory_space MemSpace; - typedef Kokkos::pair<int,int> range ; + typedef Kokkos::pair< int, int > range; enum { AN = 10 }; - typedef Kokkos::View<int*,ExecSpace> AT ; - typedef Kokkos::View<const int*,ExecSpace> ACT ; - typedef Kokkos::Subview< AT , range > AS ; + typedef Kokkos::View< int*, ExecSpace > AT; + typedef Kokkos::View< const int*, ExecSpace > ACT; + typedef Kokkos::Subview< AT, range > AS; - enum { BN0 = 10 , BN1 = 11 , BN2 = 12 }; - typedef Kokkos::View<int***,ExecSpace> BT ; - typedef Kokkos::Subview< BT , range , range , range > BS ; + enum { BN0 = 10, BN1 = 11, BN2 = 12 }; + typedef Kokkos::View< int***, ExecSpace > BT; + typedef Kokkos::Subview< BT, range, range, range > BS; - enum { CN0 = 10 , CN1 = 11 , CN2 = 12 }; - typedef Kokkos::View<int***[13][14],ExecSpace> CT ; - typedef Kokkos::Subview< CT , range , range , range , int , int > CS ; + enum { CN0 = 10, CN1 = 11, CN2 = 12 }; + typedef Kokkos::View< int***[13][14], ExecSpace > CT; + typedef Kokkos::Subview< CT, range, range, range, int, int > CS; - enum { DN0 = 10 , DN1 = 11 , DN2 = 12 , DN3 = 13 , DN4 = 14 }; - typedef Kokkos::View<int***[DN3][DN4],ExecSpace> DT ; - typedef Kokkos::Subview< DT , int , range , range , range , int > DS ; + enum { DN0 = 10, DN1 = 11, DN2 = 12, DN3 = 13, DN4 = 14 }; + typedef Kokkos::View< int***[DN3][DN4], ExecSpace > DT; + typedef Kokkos::Subview< DT, int, range, range, range, int > DS; + typedef Kokkos::View< int***[13][14], Kokkos::LayoutLeft, ExecSpace > DLT; + typedef Kokkos::Subview< DLT, range, int, int, int, int > DLS1; - typedef Kokkos::View<int***[13][14],Kokkos::LayoutLeft,ExecSpace> DLT ; - typedef Kokkos::Subview< DLT , range , int , int , int , int > DLS1 ; - - static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout , Kokkos::LayoutLeft >::value + static_assert( DLS1::rank == 1 && std::is_same< typename DLS1::array_layout, Kokkos::LayoutLeft >::value , "Subview layout error for rank 1 subview of left-most range of LayoutLeft" ); - typedef Kokkos::View<int***[13][14],Kokkos::LayoutRight,ExecSpace> DRT ; - typedef Kokkos::Subview< DRT , int , int , int , int , range > DRS1 ; + typedef Kokkos::View< int***[13][14], Kokkos::LayoutRight, ExecSpace > DRT; + typedef Kokkos::Subview< DRT, int, int, int, int, range > DRS1; - static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout , Kokkos::LayoutRight >::value + static_assert( DRS1::rank == 1 && std::is_same< typename DRS1::array_layout, Kokkos::LayoutRight >::value , "Subview layout error for rank 1 subview of right-most range of LayoutRight" ); - AT Aa ; - AS Ab ; - ACT Ac ; - BT Ba ; - BS Bb ; - CT Ca ; - CS Cb ; - DT Da ; - DS Db ; + AT Aa; + AS Ab; + ACT Ac; + BT Ba; + BS Bb; + CT Ca; + CS Cb; + DT Da; + DS Db; TestViewMappingSubview() - : Aa("Aa",AN) - , Ab( Kokkos::Experimental::subview( Aa , std::pair<int,int>(1,AN-1) ) ) - , Ac( Aa , std::pair<int,int>(1,AN-1) ) - , Ba("Ba",BN0,BN1,BN2) + : Aa( "Aa", AN ) + , Ab( Kokkos::Experimental::subview( Aa, std::pair< int, int >( 1, AN - 1 ) ) ) + , Ac( Aa, std::pair< int, int >( 1, AN - 1 ) ) + , Ba( "Ba", BN0, BN1, BN2 ) , Bb( Kokkos::Experimental::subview( Ba - , std::pair<int,int>(1,BN0-1) - , std::pair<int,int>(1,BN1-1) - , std::pair<int,int>(1,BN2-1) + , std::pair< int, int >( 1, BN0 - 1 ) + , std::pair< int, int >( 1, BN1 - 1 ) + , std::pair< int, int >( 1, BN2 - 1 ) ) ) - , Ca("Ca",CN0,CN1,CN2) + , Ca( "Ca", CN0, CN1, CN2 ) , Cb( Kokkos::Experimental::subview( Ca - , std::pair<int,int>(1,CN0-1) - , std::pair<int,int>(1,CN1-1) - , std::pair<int,int>(1,CN2-1) + , std::pair< int, int >( 1, CN0 - 1 ) + , std::pair< int, int >( 1, CN1 - 1 ) + , std::pair< int, int >( 1, CN2 - 1 ) , 1 , 2 ) ) - , Da("Da",DN0,DN1,DN2) + , Da( "Da", DN0, DN1, DN2 ) , Db( Kokkos::Experimental::subview( Da , 1 - , std::pair<int,int>(1,DN1-1) - , std::pair<int,int>(1,DN2-1) - , std::pair<int,int>(1,DN3-1) + , std::pair< int, int >( 1, DN1 - 1 ) + , std::pair< int, int >( 1, DN2 - 1 ) + , std::pair< int, int >( 1, DN3 - 1 ) , 2 ) ) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int, long & error_count ) const + { + auto Ad = Kokkos::Experimental::subview< Kokkos::MemoryUnmanaged >( Aa, Kokkos::pair< int, int >( 1, AN - 1 ) ); + + for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ab[i - 1] ) ++error_count; + for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ac[i - 1] ) ++error_count; + for ( int i = 1; i < AN - 1; ++i ) if( & Aa[i] != & Ad[i - 1] ) ++error_count; + + for ( int i2 = 1; i2 < BN2 - 1; ++i2 ) + for ( int i1 = 1; i1 < BN1 - 1; ++i1 ) + for ( int i0 = 1; i0 < BN0 - 1; ++i0 ) { + if ( & Ba( i0, i1, i2 ) != & Bb( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; } + for ( int i2 = 1; i2 < CN2 - 1; ++i2 ) + for ( int i1 = 1; i1 < CN1 - 1; ++i1 ) + for ( int i0 = 1; i0 < CN0 - 1; ++i0 ) + { + if ( & Ca( i0, i1, i2, 1, 2 ) != & Cb( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; + } - KOKKOS_INLINE_FUNCTION - void operator()( const int , long & error_count ) const + for ( int i2 = 1; i2 < DN3 - 1; ++i2 ) + for ( int i1 = 1; i1 < DN2 - 1; ++i1 ) + for ( int i0 = 1; i0 < DN1 - 1; ++i0 ) { - auto Ad = Kokkos::Experimental::subview< Kokkos::MemoryUnmanaged >( Aa , Kokkos::pair<int,int>(1,AN-1) ); - - for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ab[i-1] ) ++error_count ; - for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ac[i-1] ) ++error_count ; - for ( int i = 1 ; i < AN-1 ; ++i ) if( & Aa[i] != & Ad[i-1] ) ++error_count ; - - for ( int i2 = 1 ; i2 < BN2-1 ; ++i2 ) { - for ( int i1 = 1 ; i1 < BN1-1 ; ++i1 ) { - for ( int i0 = 1 ; i0 < BN0-1 ; ++i0 ) { - if ( & Ba(i0,i1,i2) != & Bb(i0-1,i1-1,i2-1) ) ++error_count ; - }}} - - for ( int i2 = 1 ; i2 < CN2-1 ; ++i2 ) { - for ( int i1 = 1 ; i1 < CN1-1 ; ++i1 ) { - for ( int i0 = 1 ; i0 < CN0-1 ; ++i0 ) { - if ( & Ca(i0,i1,i2,1,2) != & Cb(i0-1,i1-1,i2-1) ) ++error_count ; - }}} - - for ( int i2 = 1 ; i2 < DN3-1 ; ++i2 ) { - for ( int i1 = 1 ; i1 < DN2-1 ; ++i1 ) { - for ( int i0 = 1 ; i0 < DN1-1 ; ++i0 ) { - if ( & Da(1,i0,i1,i2,2) != & Db(i0-1,i1-1,i2-1) ) ++error_count ; - }}} + if ( & Da( 1, i0, i1, i2, 2 ) != & Db( i0 - 1, i1 - 1, i2 - 1 ) ) ++error_count; } + } static void run() { - TestViewMappingSubview self ; - - ASSERT_EQ( self.Aa.dimension_0() , AN ); - ASSERT_EQ( self.Ab.dimension_0() , AN - 2 ); - ASSERT_EQ( self.Ac.dimension_0() , AN - 2 ); - ASSERT_EQ( self.Ba.dimension_0() , BN0 ); - ASSERT_EQ( self.Ba.dimension_1() , BN1 ); - ASSERT_EQ( self.Ba.dimension_2() , BN2 ); - ASSERT_EQ( self.Bb.dimension_0() , BN0 - 2 ); - ASSERT_EQ( self.Bb.dimension_1() , BN1 - 2 ); - ASSERT_EQ( self.Bb.dimension_2() , BN2 - 2 ); - - ASSERT_EQ( self.Ca.dimension_0() , CN0 ); - ASSERT_EQ( self.Ca.dimension_1() , CN1 ); - ASSERT_EQ( self.Ca.dimension_2() , CN2 ); - ASSERT_EQ( self.Ca.dimension_3() , 13 ); - ASSERT_EQ( self.Ca.dimension_4() , 14 ); - ASSERT_EQ( self.Cb.dimension_0() , CN0 - 2 ); - ASSERT_EQ( self.Cb.dimension_1() , CN1 - 2 ); - ASSERT_EQ( self.Cb.dimension_2() , CN2 - 2 ); - - ASSERT_EQ( self.Da.dimension_0() , DN0 ); - ASSERT_EQ( self.Da.dimension_1() , DN1 ); - ASSERT_EQ( self.Da.dimension_2() , DN2 ); - ASSERT_EQ( self.Da.dimension_3() , DN3 ); - ASSERT_EQ( self.Da.dimension_4() , DN4 ); - - ASSERT_EQ( self.Db.dimension_0() , DN1 - 2 ); - ASSERT_EQ( self.Db.dimension_1() , DN2 - 2 ); - ASSERT_EQ( self.Db.dimension_2() , DN3 - 2 ); - - ASSERT_EQ( self.Da.stride_1() , self.Db.stride_0() ); - ASSERT_EQ( self.Da.stride_2() , self.Db.stride_1() ); - ASSERT_EQ( self.Da.stride_3() , self.Db.stride_2() ); - - long error_count = -1 ; - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >(0,1) , self , error_count ); - ASSERT_EQ( error_count , 0 ); + TestViewMappingSubview self; + + ASSERT_EQ( self.Aa.dimension_0(), AN ); + ASSERT_EQ( self.Ab.dimension_0(), AN - 2 ); + ASSERT_EQ( self.Ac.dimension_0(), AN - 2 ); + ASSERT_EQ( self.Ba.dimension_0(), BN0 ); + ASSERT_EQ( self.Ba.dimension_1(), BN1 ); + ASSERT_EQ( self.Ba.dimension_2(), BN2 ); + ASSERT_EQ( self.Bb.dimension_0(), BN0 - 2 ); + ASSERT_EQ( self.Bb.dimension_1(), BN1 - 2 ); + ASSERT_EQ( self.Bb.dimension_2(), BN2 - 2 ); + + ASSERT_EQ( self.Ca.dimension_0(), CN0 ); + ASSERT_EQ( self.Ca.dimension_1(), CN1 ); + ASSERT_EQ( self.Ca.dimension_2(), CN2 ); + ASSERT_EQ( self.Ca.dimension_3(), 13 ); + ASSERT_EQ( self.Ca.dimension_4(), 14 ); + ASSERT_EQ( self.Cb.dimension_0(), CN0 - 2 ); + ASSERT_EQ( self.Cb.dimension_1(), CN1 - 2 ); + ASSERT_EQ( self.Cb.dimension_2(), CN2 - 2 ); + + ASSERT_EQ( self.Da.dimension_0(), DN0 ); + ASSERT_EQ( self.Da.dimension_1(), DN1 ); + ASSERT_EQ( self.Da.dimension_2(), DN2 ); + ASSERT_EQ( self.Da.dimension_3(), DN3 ); + ASSERT_EQ( self.Da.dimension_4(), DN4 ); + + ASSERT_EQ( self.Db.dimension_0(), DN1 - 2 ); + ASSERT_EQ( self.Db.dimension_1(), DN2 - 2 ); + ASSERT_EQ( self.Db.dimension_2(), DN3 - 2 ); + + ASSERT_EQ( self.Da.stride_1(), self.Db.stride_0() ); + ASSERT_EQ( self.Da.stride_2(), self.Db.stride_1() ); + ASSERT_EQ( self.Da.stride_3(), self.Db.stride_2() ); + + long error_count = -1; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace >( 0, 1 ), self, error_count ); + ASSERT_EQ( error_count, 0 ); } - }; template< class Space > void test_view_mapping_subview() { - typedef typename Space::execution_space ExecSpace ; + typedef typename Space::execution_space ExecSpace; TestViewMappingSubview< ExecSpace >::run(); } @@ -1181,214 +1195,228 @@ struct TestViewMapOperator { static_assert( ViewType::reference_type_is_lvalue_reference , "Test only valid for lvalue reference type" ); - const ViewType v ; + const ViewType v; KOKKOS_INLINE_FUNCTION - void test_left( size_t i0 , long & error_count ) const + void test_left( size_t i0, long & error_count ) const + { + typename ViewType::value_type * const base_ptr = & v( 0, 0, 0, 0, 0, 0, 0, 0 ); + const size_t n1 = v.dimension_1(); + const size_t n2 = v.dimension_2(); + const size_t n3 = v.dimension_3(); + const size_t n4 = v.dimension_4(); + const size_t n5 = v.dimension_5(); + const size_t n6 = v.dimension_6(); + const size_t n7 = v.dimension_7(); + + long offset = 0; + + for ( size_t i7 = 0; i7 < n7; ++i7 ) + for ( size_t i6 = 0; i6 < n6; ++i6 ) + for ( size_t i5 = 0; i5 < n5; ++i5 ) + for ( size_t i4 = 0; i4 < n4; ++i4 ) + for ( size_t i3 = 0; i3 < n3; ++i3 ) + for ( size_t i2 = 0; i2 < n2; ++i2 ) + for ( size_t i1 = 0; i1 < n1; ++i1 ) { - typename ViewType::value_type * const base_ptr = & v(0,0,0,0,0,0,0,0); - const size_t n1 = v.dimension_1(); - const size_t n2 = v.dimension_2(); - const size_t n3 = v.dimension_3(); - const size_t n4 = v.dimension_4(); - const size_t n5 = v.dimension_5(); - const size_t n6 = v.dimension_6(); - const size_t n7 = v.dimension_7(); - - long offset = 0 ; - - for ( size_t i7 = 0 ; i7 < n7 ; ++i7 ) - for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) - for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) - for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) - for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) - for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) - for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) - { - const long d = & v(i0,i1,i2,i3,i4,i5,i6,i7) - base_ptr ; - if ( d < offset ) ++error_count ; - offset = d ; - } - - if ( v.span() <= size_t(offset) ) ++error_count ; + const long d = & v( i0, i1, i2, i3, i4, i5, i6, i7 ) - base_ptr; + if ( d < offset ) ++error_count; + offset = d; } + if ( v.span() <= size_t( offset ) ) ++error_count; + } + KOKKOS_INLINE_FUNCTION - void test_right( size_t i0 , long & error_count ) const + void test_right( size_t i0, long & error_count ) const + { + typename ViewType::value_type * const base_ptr = & v( 0, 0, 0, 0, 0, 0, 0, 0 ); + const size_t n1 = v.dimension_1(); + const size_t n2 = v.dimension_2(); + const size_t n3 = v.dimension_3(); + const size_t n4 = v.dimension_4(); + const size_t n5 = v.dimension_5(); + const size_t n6 = v.dimension_6(); + const size_t n7 = v.dimension_7(); + + long offset = 0; + + for ( size_t i1 = 0; i1 < n1; ++i1 ) + for ( size_t i2 = 0; i2 < n2; ++i2 ) + for ( size_t i3 = 0; i3 < n3; ++i3 ) + for ( size_t i4 = 0; i4 < n4; ++i4 ) + for ( size_t i5 = 0; i5 < n5; ++i5 ) + for ( size_t i6 = 0; i6 < n6; ++i6 ) + for ( size_t i7 = 0; i7 < n7; ++i7 ) { - typename ViewType::value_type * const base_ptr = & v(0,0,0,0,0,0,0,0); - const size_t n1 = v.dimension_1(); - const size_t n2 = v.dimension_2(); - const size_t n3 = v.dimension_3(); - const size_t n4 = v.dimension_4(); - const size_t n5 = v.dimension_5(); - const size_t n6 = v.dimension_6(); - const size_t n7 = v.dimension_7(); - - long offset = 0 ; - - for ( size_t i1 = 0 ; i1 < n1 ; ++i1 ) - for ( size_t i2 = 0 ; i2 < n2 ; ++i2 ) - for ( size_t i3 = 0 ; i3 < n3 ; ++i3 ) - for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) - for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) - for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) - for ( size_t i7 = 0 ; i7 < n7 ; ++i7 ) - { - const long d = & v(i0,i1,i2,i3,i4,i5,i6,i7) - base_ptr ; - if ( d < offset ) ++error_count ; - offset = d ; - } - - if ( v.span() <= size_t(offset) ) ++error_count ; + const long d = & v( i0, i1, i2, i3, i4, i5, i6, i7 ) - base_ptr; + if ( d < offset ) ++error_count; + offset = d; } + if ( v.span() <= size_t( offset ) ) ++error_count; + } + KOKKOS_INLINE_FUNCTION - void operator()( size_t i , long & error_count ) const - { - if ( std::is_same< typename ViewType::array_layout , Kokkos::LayoutLeft >::value ) - test_left(i,error_count); - else if ( std::is_same< typename ViewType::array_layout , Kokkos::LayoutRight >::value ) - test_right(i,error_count); + void operator()( size_t i, long & error_count ) const + { + if ( std::is_same< typename ViewType::array_layout, Kokkos::LayoutLeft >::value ) { + test_left( i, error_count ); } + else if ( std::is_same< typename ViewType::array_layout, Kokkos::LayoutRight >::value ) { + test_right( i, error_count ); + } + } - constexpr static size_t N0 = 10 ; - constexpr static size_t N1 = 9 ; - constexpr static size_t N2 = 8 ; - constexpr static size_t N3 = 7 ; - constexpr static size_t N4 = 6 ; - constexpr static size_t N5 = 5 ; - constexpr static size_t N6 = 4 ; - constexpr static size_t N7 = 3 ; + constexpr static size_t N0 = 10; + constexpr static size_t N1 = 9; + constexpr static size_t N2 = 8; + constexpr static size_t N3 = 7; + constexpr static size_t N4 = 6; + constexpr static size_t N5 = 5; + constexpr static size_t N6 = 4; + constexpr static size_t N7 = 3; - TestViewMapOperator() : v( "Test" , N0, N1, N2, N3, N4, N5, N6, N7 ) {} + TestViewMapOperator() : v( "Test", N0, N1, N2, N3, N4, N5, N6, N7 ) {} static void run() - { - TestViewMapOperator self ; - - ASSERT_EQ( self.v.dimension_0() , ( 0 < ViewType::rank ? N0 : 1 ) ); - ASSERT_EQ( self.v.dimension_1() , ( 1 < ViewType::rank ? N1 : 1 ) ); - ASSERT_EQ( self.v.dimension_2() , ( 2 < ViewType::rank ? N2 : 1 ) ); - ASSERT_EQ( self.v.dimension_3() , ( 3 < ViewType::rank ? N3 : 1 ) ); - ASSERT_EQ( self.v.dimension_4() , ( 4 < ViewType::rank ? N4 : 1 ) ); - ASSERT_EQ( self.v.dimension_5() , ( 5 < ViewType::rank ? N5 : 1 ) ); - ASSERT_EQ( self.v.dimension_6() , ( 6 < ViewType::rank ? N6 : 1 ) ); - ASSERT_EQ( self.v.dimension_7() , ( 7 < ViewType::rank ? N7 : 1 ) ); - - ASSERT_LE( self.v.dimension_0()* - self.v.dimension_1()* - self.v.dimension_2()* - self.v.dimension_3()* - self.v.dimension_4()* - self.v.dimension_5()* - self.v.dimension_6()* - self.v.dimension_7() - , self.v.span() ); - - long error_count ; - Kokkos::RangePolicy< typename ViewType::execution_space > range(0,self.v.dimension_0()); - Kokkos::parallel_reduce( range , self , error_count ); - ASSERT_EQ( 0 , error_count ); - } + { + TestViewMapOperator self; + + ASSERT_EQ( self.v.dimension_0(), ( 0 < ViewType::rank ? N0 : 1 ) ); + ASSERT_EQ( self.v.dimension_1(), ( 1 < ViewType::rank ? N1 : 1 ) ); + ASSERT_EQ( self.v.dimension_2(), ( 2 < ViewType::rank ? N2 : 1 ) ); + ASSERT_EQ( self.v.dimension_3(), ( 3 < ViewType::rank ? N3 : 1 ) ); + ASSERT_EQ( self.v.dimension_4(), ( 4 < ViewType::rank ? N4 : 1 ) ); + ASSERT_EQ( self.v.dimension_5(), ( 5 < ViewType::rank ? N5 : 1 ) ); + ASSERT_EQ( self.v.dimension_6(), ( 6 < ViewType::rank ? N6 : 1 ) ); + ASSERT_EQ( self.v.dimension_7(), ( 7 < ViewType::rank ? N7 : 1 ) ); + + ASSERT_LE( self.v.dimension_0() * + self.v.dimension_1() * + self.v.dimension_2() * + self.v.dimension_3() * + self.v.dimension_4() * + self.v.dimension_5() * + self.v.dimension_6() * + self.v.dimension_7() + , self.v.span() ); + + long error_count; + Kokkos::RangePolicy< typename ViewType::execution_space > range( 0, self.v.dimension_0() ); + Kokkos::parallel_reduce( range, self, error_count ); + ASSERT_EQ( 0, error_count ); + } }; - template< class Space > void test_view_mapping_operator() { - typedef typename Space::execution_space ExecSpace ; - - TestViewMapOperator< Kokkos::View<int,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int**,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int***,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int****,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*****,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int******,Kokkos::LayoutLeft,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*******,Kokkos::LayoutLeft,ExecSpace> >::run(); - - TestViewMapOperator< Kokkos::View<int,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int**,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int***,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int****,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*****,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int******,Kokkos::LayoutRight,ExecSpace> >::run(); - TestViewMapOperator< Kokkos::View<int*******,Kokkos::LayoutRight,ExecSpace> >::run(); + typedef typename Space::execution_space ExecSpace; + + TestViewMapOperator< Kokkos::View<int, Kokkos::LayoutLeft, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*, Kokkos::LayoutLeft, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int**, Kokkos::LayoutLeft, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int***, Kokkos::LayoutLeft, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int****, Kokkos::LayoutLeft, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*****, Kokkos::LayoutLeft, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int******, Kokkos::LayoutLeft, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*******, Kokkos::LayoutLeft, ExecSpace> >::run(); + + TestViewMapOperator< Kokkos::View<int, Kokkos::LayoutRight, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*, Kokkos::LayoutRight, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int**, Kokkos::LayoutRight, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int***, Kokkos::LayoutRight, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int****, Kokkos::LayoutRight, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*****, Kokkos::LayoutRight, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int******, Kokkos::LayoutRight, ExecSpace> >::run(); + TestViewMapOperator< Kokkos::View<int*******, Kokkos::LayoutRight, ExecSpace> >::run(); } /*--------------------------------------------------------------------------*/ template< class Space > struct TestViewMappingAtomic { - typedef typename Space::execution_space ExecSpace ; - typedef typename Space::memory_space MemSpace ; + typedef typename Space::execution_space ExecSpace; + typedef typename Space::memory_space MemSpace; - typedef Kokkos::MemoryTraits< Kokkos::Atomic > mem_trait ; + typedef Kokkos::MemoryTraits< Kokkos::Atomic > mem_trait; - typedef Kokkos::View< int * , ExecSpace > T ; - typedef Kokkos::View< int * , ExecSpace , mem_trait > T_atom ; + typedef Kokkos::View< int *, ExecSpace > T; + typedef Kokkos::View< int *, ExecSpace, mem_trait > T_atom; - T x ; - T_atom x_atom ; + T x; + T_atom x_atom; - constexpr static size_t N = 100000 ; + constexpr static size_t N = 100000; struct TagInit {}; struct TagUpdate {}; struct TagVerify {}; KOKKOS_INLINE_FUNCTION - void operator()( const TagInit & , const int i ) const - { x(i) = i ; } + void operator()( const TagInit &, const int i ) const + { x( i ) = i; } KOKKOS_INLINE_FUNCTION - void operator()( const TagUpdate & , const int i ) const - { x_atom(i%2) += 1 ; } + void operator()( const TagUpdate &, const int i ) const + { x_atom( i % 2 ) += 1; } KOKKOS_INLINE_FUNCTION - void operator()( const TagVerify & , const int i , long & error_count ) const - { - if ( i < 2 ) { if ( x(i) != int(i + N / 2) ) ++error_count ; } - else { if ( x(i) != int(i) ) ++error_count ; } - } + void operator()( const TagVerify &, const int i, long & error_count ) const + { + if ( i < 2 ) { if ( x( i ) != int( i + N / 2 ) ) ++error_count; } + else { if ( x( i ) != int( i ) ) ++error_count; } + } TestViewMappingAtomic() - : x("x",N) + : x( "x", N ) , x_atom( x ) {} static void run() + { + ASSERT_TRUE( T::reference_type_is_lvalue_reference ); + ASSERT_FALSE( T_atom::reference_type_is_lvalue_reference ); + + TestViewMappingAtomic self; + + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, TagInit >( 0, N ), self ); + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, TagUpdate >( 0, N ), self ); + + long error_count = -1; + + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TagVerify >( 0, N ), self, error_count ); + + ASSERT_EQ( 0, error_count ); + + typename TestViewMappingAtomic::T_atom::HostMirror x_host = Kokkos::create_mirror_view( self.x ); + Kokkos::deep_copy( x_host, self.x ); + + error_count = -1; + + Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::DefaultHostExecutionSpace, TagVerify >( 0, N ), + [=] ( const TagVerify &, const int i, long & tmp_error_count ) { - ASSERT_TRUE( T::reference_type_is_lvalue_reference ); - ASSERT_FALSE( T_atom::reference_type_is_lvalue_reference ); - - TestViewMappingAtomic self ; - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace , TagInit >(0,N) , self ); - Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace , TagUpdate >(0,N) , self ); - long error_count = -1 ; - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagVerify >(0,N) , self , error_count ); - ASSERT_EQ( 0 , error_count ); - typename TestViewMappingAtomic::T_atom::HostMirror x_host = Kokkos::create_mirror_view(self.x); - Kokkos::deep_copy(x_host,self.x); - error_count = -1; - Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::DefaultHostExecutionSpace, TagVerify>(0,N), - [=] ( const TagVerify & , const int i , long & tmp_error_count ) { - if ( i < 2 ) { if ( x_host(i) != int(i + N / 2) ) ++tmp_error_count ; } - else { if ( x_host(i) != int(i) ) ++tmp_error_count ; } - }, error_count); - ASSERT_EQ( 0 , error_count ); - Kokkos::deep_copy(self.x,x_host); - } + if ( i < 2 ) { + if ( x_host( i ) != int( i + N / 2 ) ) ++tmp_error_count ; + } + else { + if ( x_host( i ) != int( i ) ) ++tmp_error_count ; + } + }, error_count); + + ASSERT_EQ( 0 , error_count ); + Kokkos::deep_copy( self.x, x_host ); + } }; /*--------------------------------------------------------------------------*/ template< class Space > struct TestViewMappingClassValue { - typedef typename Space::execution_space ExecSpace ; - typedef typename Space::memory_space MemSpace ; + typedef typename Space::execution_space ExecSpace; + typedef typename Space::memory_space MemSpace; struct ValueType { KOKKOS_INLINE_FUNCTION @@ -1396,11 +1424,11 @@ struct TestViewMappingClassValue { { #if 0 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) - printf("TestViewMappingClassValue construct on Cuda\n"); + printf( "TestViewMappingClassValue construct on Cuda\n" ); #elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - printf("TestViewMappingClassValue construct on Host\n"); + printf( "TestViewMappingClassValue construct on Host\n" ); #else - printf("TestViewMappingClassValue construct unknown\n"); + printf( "TestViewMappingClassValue construct unknown\n" ); #endif #endif } @@ -1409,11 +1437,11 @@ struct TestViewMappingClassValue { { #if 0 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) - printf("TestViewMappingClassValue destruct on Cuda\n"); + printf( "TestViewMappingClassValue destruct on Cuda\n" ); #elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - printf("TestViewMappingClassValue destruct on Host\n"); + printf( "TestViewMappingClassValue destruct on Host\n" ); #else - printf("TestViewMappingClassValue destruct unknown\n"); + printf( "TestViewMappingClassValue destruct unknown\n" ); #endif #endif } @@ -1421,17 +1449,15 @@ struct TestViewMappingClassValue { static void run() { - using namespace Kokkos::Experimental ; + using namespace Kokkos::Experimental; + ExecSpace::fence(); { - View< ValueType , ExecSpace > a("a"); + View< ValueType, ExecSpace > a( "a" ); ExecSpace::fence(); } ExecSpace::fence(); } }; -} /* namespace Test */ - -/*--------------------------------------------------------------------------*/ - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewOfClass.hpp b/lib/kokkos/core/unit_test/TestViewOfClass.hpp index 381b8786bc740dfcfb922eb6ddf5443ffa7136cd..d624c5dda2034b04b5b1a427614f38186aa032d8 100644 --- a/lib/kokkos/core/unit_test/TestViewOfClass.hpp +++ b/lib/kokkos/core/unit_test/TestViewOfClass.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -48,34 +48,29 @@ #include <sstream> #include <iostream> -/*--------------------------------------------------------------------------*/ - namespace Test { template< class Space > struct NestedView { - - Kokkos::View<int*,Space> member ; + Kokkos::View< int*, Space > member; public: - KOKKOS_INLINE_FUNCTION - NestedView() : member() - {} + NestedView() : member() {} KOKKOS_INLINE_FUNCTION - NestedView & operator = ( const Kokkos::View<int*,Space> & lhs ) - { - member = lhs ; - if ( member.dimension_0() ) Kokkos::atomic_add( & member(0) , 1 ); - return *this ; - } + NestedView & operator=( const Kokkos::View< int*, Space > & lhs ) + { + member = lhs; + if ( member.dimension_0() ) Kokkos::atomic_add( & member( 0 ), 1 ); + return *this; + } KOKKOS_INLINE_FUNCTION ~NestedView() - { + { if ( member.dimension_0() ) { - Kokkos::atomic_add( & member(0) , -1 ); + Kokkos::atomic_add( & member( 0 ), -1 ); } } }; @@ -83,49 +78,44 @@ public: template< class Space > struct NestedViewFunctor { - Kokkos::View< NestedView<Space> * , Space > nested ; - Kokkos::View<int*,Space> array ; + Kokkos::View< NestedView<Space> *, Space > nested; + Kokkos::View< int*, Space > array; - NestedViewFunctor( - const Kokkos::View< NestedView<Space> * , Space > & arg_nested , - const Kokkos::View<int*,Space> & arg_array ) + NestedViewFunctor( + const Kokkos::View< NestedView<Space> *, Space > & arg_nested, + const Kokkos::View< int*, Space > & arg_array ) : nested( arg_nested ) , array( arg_array ) {} KOKKOS_INLINE_FUNCTION - void operator()( int i ) const - { nested[i] = array ; } + void operator()( int i ) const { nested[i] = array; } }; - template< class Space > void view_nested_view() { - Kokkos::View<int*,Space> tracking("tracking",1); + Kokkos::View< int*, Space > tracking( "tracking", 1 ); - typename Kokkos::View<int*,Space>::HostMirror - host_tracking = Kokkos::create_mirror( tracking ); + typename Kokkos::View< int*, Space >::HostMirror host_tracking = Kokkos::create_mirror( tracking ); { - Kokkos::View< NestedView<Space> * , Space > a("a_nested_view",2); + Kokkos::View< NestedView<Space> *, Space > a( "a_nested_view", 2 ); - Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,2) , NestedViewFunctor<Space>( a , tracking ) ); - Kokkos::deep_copy( host_tracking , tracking ); - ASSERT_EQ( 2 , host_tracking(0) ); + Kokkos::parallel_for( Kokkos::RangePolicy< Space >( 0, 2 ), NestedViewFunctor< Space >( a, tracking ) ); + Kokkos::deep_copy( host_tracking, tracking ); + ASSERT_EQ( 2, host_tracking( 0 ) ); - Kokkos::View< NestedView<Space> * , Space > b("b_nested_view",2); - Kokkos::parallel_for( Kokkos::RangePolicy<Space>(0,2) , NestedViewFunctor<Space>( b , tracking ) ); - Kokkos::deep_copy( host_tracking , tracking ); - ASSERT_EQ( 4 , host_tracking(0) ); + Kokkos::View< NestedView<Space> *, Space > b( "b_nested_view", 2 ); + Kokkos::parallel_for( Kokkos::RangePolicy< Space >( 0, 2 ), NestedViewFunctor< Space >( b, tracking ) ); + Kokkos::deep_copy( host_tracking, tracking ); + ASSERT_EQ( 4, host_tracking( 0 ) ); } - Kokkos::deep_copy( host_tracking , tracking ); - ASSERT_EQ( 0 , host_tracking(0) ); -} + Kokkos::deep_copy( host_tracking, tracking ); + ASSERT_EQ( 0, host_tracking( 0 ) ); } -/*--------------------------------------------------------------------------*/ - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewSpaceAssign.hpp b/lib/kokkos/core/unit_test/TestViewSpaceAssign.hpp index 09141e582c48423341029bae51c09fe51d14c893..21ae92e93ccdc09c3e42057f706c7bec383239eb 100644 --- a/lib/kokkos/core/unit_test/TestViewSpaceAssign.hpp +++ b/lib/kokkos/core/unit_test/TestViewSpaceAssign.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -48,35 +48,29 @@ #include <sstream> #include <iostream> -/*--------------------------------------------------------------------------*/ - namespace Test { -template< typename SpaceDst , typename SpaceSrc > +template< typename SpaceDst, typename SpaceSrc > void view_space_assign() { - Kokkos::View<double*,SpaceDst> a = - Kokkos::View<double*,SpaceSrc>("a",1); + Kokkos::View< double*, SpaceDst > a = + Kokkos::View< double*, SpaceSrc >( "a", 1 ); - Kokkos::View<double*,Kokkos::LayoutLeft,SpaceDst> b = - Kokkos::View<double*,Kokkos::LayoutLeft,SpaceSrc>("b",1); + Kokkos::View< double*, Kokkos::LayoutLeft, SpaceDst > b = + Kokkos::View< double*, Kokkos::LayoutLeft, SpaceSrc >( "b", 1 ); - Kokkos::View<double*,Kokkos::LayoutRight,SpaceDst> c = - Kokkos::View<double*,Kokkos::LayoutRight,SpaceSrc>("c",1); + Kokkos::View< double*, Kokkos::LayoutRight, SpaceDst > c = + Kokkos::View< double*, Kokkos::LayoutRight, SpaceSrc >( "c", 1 ); - Kokkos::View<double*,SpaceDst,Kokkos::MemoryRandomAccess> d = - Kokkos::View<double*,SpaceSrc>("d",1); + Kokkos::View< double*, SpaceDst, Kokkos::MemoryRandomAccess > d = + Kokkos::View< double*, SpaceSrc >( "d", 1 ); - Kokkos::View<double*,Kokkos::LayoutLeft,SpaceDst,Kokkos::MemoryRandomAccess> e = - Kokkos::View<double*,Kokkos::LayoutLeft,SpaceSrc>("e",1); + Kokkos::View< double*, Kokkos::LayoutLeft, SpaceDst, Kokkos::MemoryRandomAccess > e = + Kokkos::View< double*, Kokkos::LayoutLeft, SpaceSrc >( "e", 1 ); // Rank-one layout can assign: - Kokkos::View<double*,Kokkos::LayoutRight,SpaceDst> f = - Kokkos::View<double*,Kokkos::LayoutLeft,SpaceSrc>("f",1); + Kokkos::View< double*, Kokkos::LayoutRight, SpaceDst > f = + Kokkos::View< double*, Kokkos::LayoutLeft, SpaceSrc >( "f", 1 ); } - } // namespace Test - -/*--------------------------------------------------------------------------*/ - diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp index 1c2575b6f61c9fa11b28963852085960ecc420aa..386301b45dbc9f9d6bb5770133d818a7eccba40e 100644 --- a/lib/kokkos/core/unit_test/TestViewSubview.hpp +++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -48,64 +48,68 @@ #include <sstream> #include <iostream> -/*--------------------------------------------------------------------------*/ - namespace TestViewSubview { -template<class Layout, class Space> +template< class Layout, class Space > struct getView { static - Kokkos::View<double**,Layout,Space> get(int n, int m) { - return Kokkos::View<double**,Layout,Space>("G",n,m); + Kokkos::View< double**, Layout, Space > get( int n, int m ) { + return Kokkos::View< double**, Layout, Space >( "G", n, m ); } }; -template<class Space> -struct getView<Kokkos::LayoutStride,Space> { +template< class Space > +struct getView< Kokkos::LayoutStride, Space > { static - Kokkos::View<double**,Kokkos::LayoutStride,Space> get(int n, int m) { - const int rank = 2 ; + Kokkos::View< double**, Kokkos::LayoutStride, Space > get( int n, int m ) { + const int rank = 2; const int order[] = { 0, 1 }; - const unsigned dim[] = { unsigned(n), unsigned(m) }; - Kokkos::LayoutStride stride = Kokkos::LayoutStride::order_dimensions( rank , order , dim ); - return Kokkos::View<double**,Kokkos::LayoutStride,Space>("G",stride); + const unsigned dim[] = { unsigned( n ), unsigned( m ) }; + Kokkos::LayoutStride stride = Kokkos::LayoutStride::order_dimensions( rank, order, dim ); + + return Kokkos::View< double**, Kokkos::LayoutStride, Space >( "G", stride ); } }; -template<class ViewType, class Space> +template< class ViewType, class Space > struct fill_1D { typedef typename Space::execution_space execution_space; typedef typename ViewType::size_type size_type; + ViewType a; double val; - fill_1D(ViewType a_, double val_):a(a_),val(val_) { - } + + fill_1D( ViewType a_, double val_ ) : a( a_ ), val( val_ ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int i) const { - a(i) = val; - } + void operator()( const int i ) const { a( i ) = val; } }; -template<class ViewType, class Space> +template< class ViewType, class Space > struct fill_2D { typedef typename Space::execution_space execution_space; typedef typename ViewType::size_type size_type; + ViewType a; double val; - fill_2D(ViewType a_, double val_):a(a_),val(val_) { - } + + fill_2D( ViewType a_, double val_ ) : a( a_ ), val( val_ ) {} + KOKKOS_INLINE_FUNCTION - void operator() (const int i) const{ - for(int j = 0; j < static_cast<int>(a.dimension_1()); j++) - a(i,j) = val; + void operator()( const int i ) const + { + for ( int j = 0; j < static_cast< int >( a.dimension_1() ); j++ ) { + a( i, j ) = val; + } } }; -template<class Layout, class Space> +template< class Layout, class Space > void test_auto_1d () { - typedef Kokkos::View<double**, Layout, Space> mv_type; + typedef Kokkos::View< double**, Layout, Space > mv_type; typedef typename mv_type::size_type size_type; + const double ZERO = 0.0; const double ONE = 1.0; const double TWO = 2.0; @@ -113,359 +117,359 @@ void test_auto_1d () const size_type numRows = 10; const size_type numCols = 3; - mv_type X = getView<Layout,Space>::get(numRows, numCols); - typename mv_type::HostMirror X_h = Kokkos::create_mirror_view (X); + mv_type X = getView< Layout, Space >::get( numRows, numCols ); + typename mv_type::HostMirror X_h = Kokkos::create_mirror_view( X ); - fill_2D<mv_type,Space> f1(X, ONE); - Kokkos::parallel_for(X.dimension_0(),f1); - Kokkos::deep_copy (X_h, X); - for (size_type j = 0; j < numCols; ++j) { - for (size_type i = 0; i < numRows; ++i) { - ASSERT_TRUE(X_h(i,j) == ONE); + fill_2D< mv_type, Space > f1( X, ONE ); + Kokkos::parallel_for( X.dimension_0(), f1 ); + Kokkos::deep_copy( X_h, X ); + for ( size_type j = 0; j < numCols; ++j ) { + for ( size_type i = 0; i < numRows; ++i ) { + ASSERT_TRUE( X_h( i, j ) == ONE ); } } - fill_2D<mv_type,Space> f2(X, 0.0); - Kokkos::parallel_for(X.dimension_0(),f2); - Kokkos::deep_copy (X_h, X); - for (size_type j = 0; j < numCols; ++j) { - for (size_type i = 0; i < numRows; ++i) { - ASSERT_TRUE(X_h(i,j) == ZERO); + fill_2D< mv_type, Space > f2( X, 0.0 ); + Kokkos::parallel_for( X.dimension_0(), f2 ); + Kokkos::deep_copy( X_h, X ); + for ( size_type j = 0; j < numCols; ++j ) { + for ( size_type i = 0; i < numRows; ++i ) { + ASSERT_TRUE( X_h( i, j ) == ZERO ); } } - fill_2D<mv_type,Space> f3(X, TWO); - Kokkos::parallel_for(X.dimension_0(),f3); - Kokkos::deep_copy (X_h, X); - for (size_type j = 0; j < numCols; ++j) { - for (size_type i = 0; i < numRows; ++i) { - ASSERT_TRUE(X_h(i,j) == TWO); + fill_2D< mv_type, Space > f3( X, TWO ); + Kokkos::parallel_for( X.dimension_0(), f3 ); + Kokkos::deep_copy( X_h, X ); + for ( size_type j = 0; j < numCols; ++j ) { + for ( size_type i = 0; i < numRows; ++i ) { + ASSERT_TRUE( X_h( i, j ) == TWO ); } } - for (size_type j = 0; j < numCols; ++j) { - auto X_j = Kokkos::subview (X, Kokkos::ALL, j); + for ( size_type j = 0; j < numCols; ++j ) { + auto X_j = Kokkos::subview( X, Kokkos::ALL, j ); - fill_1D<decltype(X_j),Space> f4(X_j, ZERO); - Kokkos::parallel_for(X_j.dimension_0(),f4); - Kokkos::deep_copy (X_h, X); - for (size_type i = 0; i < numRows; ++i) { - ASSERT_TRUE(X_h(i,j) == ZERO); + fill_1D< decltype( X_j ), Space > f4( X_j, ZERO ); + Kokkos::parallel_for( X_j.dimension_0(), f4 ); + Kokkos::deep_copy( X_h, X ); + for ( size_type i = 0; i < numRows; ++i ) { + ASSERT_TRUE( X_h( i, j ) == ZERO ); } - for (size_type jj = 0; jj < numCols; ++jj) { - auto X_jj = Kokkos::subview (X, Kokkos::ALL, jj); - fill_1D<decltype(X_jj),Space> f5(X_jj, ONE); - Kokkos::parallel_for(X_jj.dimension_0(),f5); - Kokkos::deep_copy (X_h, X); - for (size_type i = 0; i < numRows; ++i) { - ASSERT_TRUE(X_h(i,jj) == ONE); + for ( size_type jj = 0; jj < numCols; ++jj ) { + auto X_jj = Kokkos::subview ( X, Kokkos::ALL, jj ); + fill_1D< decltype( X_jj ), Space > f5( X_jj, ONE ); + Kokkos::parallel_for( X_jj.dimension_0(), f5 ); + Kokkos::deep_copy( X_h, X ); + for ( size_type i = 0; i < numRows; ++i ) { + ASSERT_TRUE( X_h( i, jj ) == ONE ); } } } } -template<class LD, class LS, class Space> -void test_1d_strided_assignment_impl(bool a, bool b, bool c, bool d, int n, int m) { - Kokkos::View<double**,LS,Space> l2d("l2d",n,m); +template< class LD, class LS, class Space > +void test_1d_strided_assignment_impl( bool a, bool b, bool c, bool d, int n, int m ) { + Kokkos::View< double**, LS, Space > l2d( "l2d", n, m ); - int col = n>2?2:0; - int row = m>2?2:0; + int col = n > 2 ? 2 : 0; + int row = m > 2 ? 2 : 0; - if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { - if(a) { - Kokkos::View<double*,LD,Space> l1da = Kokkos::subview(l2d,Kokkos::ALL,row); - ASSERT_TRUE( & l1da(0) == & l2d(0,row) ); - if(n>1) - ASSERT_TRUE( & l1da(1) == & l2d(1,row) ); - } - if(b && n>13) { - Kokkos::View<double*,LD,Space> l1db = Kokkos::subview(l2d,std::pair<unsigned,unsigned>(2,13),row); - ASSERT_TRUE( & l1db(0) == & l2d(2,row) ); - ASSERT_TRUE( & l1db(1) == & l2d(3,row) ); - } - if(c) { - Kokkos::View<double*,LD,Space> l1dc = Kokkos::subview(l2d,col,Kokkos::ALL); - ASSERT_TRUE( & l1dc(0) == & l2d(col,0) ); - if(m>1) - ASSERT_TRUE( & l1dc(1) == & l2d(col,1) ); - } - if(d && m>13) { - Kokkos::View<double*,LD,Space> l1dd = Kokkos::subview(l2d,col,std::pair<unsigned,unsigned>(2,13)); - ASSERT_TRUE( & l1dd(0) == & l2d(col,2) ); - ASSERT_TRUE( & l1dd(1) == & l2d(col,3) ); - } + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + if ( a ) { + Kokkos::View< double*, LD, Space > l1da = Kokkos::subview( l2d, Kokkos::ALL, row ); + ASSERT_TRUE( & l1da( 0 ) == & l2d( 0, row ) ); + if ( n > 1 ) { + ASSERT_TRUE( & l1da( 1 ) == & l2d( 1, row ) ); + } + } + + if ( b && n > 13 ) { + Kokkos::View< double*, LD, Space > l1db = Kokkos::subview( l2d, std::pair< unsigned, unsigned >( 2, 13 ), row ); + ASSERT_TRUE( & l1db( 0 ) == & l2d( 2, row ) ); + ASSERT_TRUE( & l1db( 1 ) == & l2d( 3, row ) ); + } + + if ( c ) { + Kokkos::View< double*, LD, Space > l1dc = Kokkos::subview( l2d, col, Kokkos::ALL ); + ASSERT_TRUE( & l1dc( 0 ) == & l2d( col, 0 ) ); + if( m > 1 ) { + ASSERT_TRUE( & l1dc( 1 ) == & l2d( col, 1 ) ); + } + } + + if ( d && m > 13 ) { + Kokkos::View< double*, LD, Space > l1dd = Kokkos::subview( l2d, col, std::pair< unsigned, unsigned >( 2, 13 ) ); + ASSERT_TRUE( & l1dd( 0 ) == & l2d( col, 2 ) ); + ASSERT_TRUE( & l1dd( 1 ) == & l2d( col, 3 ) ); + } } } -template<class Space > +template< class Space > void test_1d_strided_assignment() { - test_1d_strided_assignment_impl<Kokkos::LayoutStride,Kokkos::LayoutLeft,Space>(true,true,true,true,17,3); - test_1d_strided_assignment_impl<Kokkos::LayoutStride,Kokkos::LayoutRight,Space>(true,true,true,true,17,3); - - test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutLeft,Space>(true,true,false,false,17,3); - test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutLeft,Space>(true,true,false,false,17,3); - test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutRight,Space>(false,false,true,true,17,3); - test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutRight,Space>(false,false,true,true,17,3); - - test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutLeft,Space>(true,true,false,false,17,1); - test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutLeft,Space>(true,true,true,true,1,17); - test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutLeft,Space>(true,true,true,true,1,17); - test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutLeft,Space>(true,true,false,false,17,1); - - test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutRight,Space>(true,true,true,true,17,1); - test_1d_strided_assignment_impl<Kokkos::LayoutLeft,Kokkos::LayoutRight,Space>(false,false,true,true,1,17); - test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutRight,Space>(false,false,true,true,1,17); - test_1d_strided_assignment_impl<Kokkos::LayoutRight,Kokkos::LayoutRight,Space>(true,true,true,true,17,1); + test_1d_strided_assignment_impl< Kokkos::LayoutStride, Kokkos::LayoutLeft, Space >( true, true, true, true, 17, 3 ); + test_1d_strided_assignment_impl< Kokkos::LayoutStride, Kokkos::LayoutRight, Space >( true, true, true, true, 17, 3 ); + + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutLeft, Space >( true, true, false, false, 17, 3 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutLeft, Space >( true, true, false, false, 17, 3 ); + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutRight, Space >( false, false, true, true, 17, 3 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutRight, Space >( false, false, true, true, 17, 3 ); + + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutLeft, Space >( true, true, false, false, 17, 1 ); + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutLeft, Space >( true, true, true, true, 1, 17 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutLeft, Space >( true, true, true, true, 1, 17 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutLeft, Space >( true, true, false, false, 17, 1 ); + + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutRight, Space >( true, true, true, true, 17, 1 ); + test_1d_strided_assignment_impl< Kokkos::LayoutLeft, Kokkos::LayoutRight, Space >( false, false, true, true, 1, 17 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutRight, Space >( false, false, true, true, 1, 17 ); + test_1d_strided_assignment_impl< Kokkos::LayoutRight, Kokkos::LayoutRight, Space >( true, true, true, true, 17, 1 ); } template< class Space > void test_left_0() { - typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutLeft , Space > - view_static_8_type ; - - if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { + typedef Kokkos::View< int [2][3][4][5][2][3][4][5], Kokkos::LayoutLeft, Space > view_static_8_type; - view_static_8_type x_static_8("x_static_left_8"); + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + view_static_8_type x_static_8( "x_static_left_8" ); - ASSERT_TRUE( x_static_8.is_contiguous() ); + ASSERT_TRUE( x_static_8.is_contiguous() ); - Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( x_static_8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + Kokkos::View< int, Kokkos::LayoutLeft, Space > x0 = Kokkos::subview( x_static_8, 0, 0, 0, 0, 0, 0, 0, 0 ); - ASSERT_TRUE( x0.is_contiguous() ); - ASSERT_TRUE( & x0() == & x_static_8(0,0,0,0,0,0,0,0) ); + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & x_static_8( 0, 0, 0, 0, 0, 0, 0, 0 ) ); - Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 = - Kokkos::subview( x_static_8, Kokkos::pair<int,int>(0,2), 1, 2, 3, 0, 1, 2, 3 ); + Kokkos::View< int*, Kokkos::LayoutLeft, Space > x1 = + Kokkos::subview( x_static_8, Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3, 0, 1, 2, 3 ); - ASSERT_TRUE( x1.is_contiguous() ); - ASSERT_TRUE( & x1(0) == & x_static_8(0,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x1(1) == & x_static_8(1,1,2,3,0,1,2,3) ); + ASSERT_TRUE( x1.is_contiguous() ); + ASSERT_TRUE( & x1( 0 ) == & x_static_8( 0, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x1( 1 ) == & x_static_8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); - Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 = - Kokkos::subview( x_static_8, Kokkos::pair<int,int>(0,2), 1, 2, 3 - , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2 = + Kokkos::subview( x_static_8, Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - ASSERT_TRUE( ! x2.is_contiguous() ); - ASSERT_TRUE( & x2(0,0) == & x_static_8(0,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x2(0,1) == & x_static_8(0,1,2,3,1,1,2,3) ); - ASSERT_TRUE( & x2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) ); + ASSERT_TRUE( ! x2.is_contiguous() ); + ASSERT_TRUE( & x2( 0, 0 ) == & x_static_8( 0, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 1, 0 ) == & x_static_8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 0, 1 ) == & x_static_8( 0, 1, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 1, 1 ) == & x_static_8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - // Kokkos::View<int**,Kokkos::LayoutLeft,Space> error_2 = - Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 = - Kokkos::subview( x_static_8, 1, Kokkos::pair<int,int>(0,2), 2, 3 - , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + // Kokkos::View< int**, Kokkos::LayoutLeft, Space > error_2 = + Kokkos::View< int**, Kokkos::LayoutStride, Space > sx2 = + Kokkos::subview( x_static_8, 1, Kokkos::pair< int, int >( 0, 2 ), 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - ASSERT_TRUE( ! sx2.is_contiguous() ); - ASSERT_TRUE( & sx2(0,0) == & x_static_8(1,0,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(0,1) == & x_static_8(1,0,2,3,1,1,2,3) ); - ASSERT_TRUE( & sx2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) ); + ASSERT_TRUE( ! sx2.is_contiguous() ); + ASSERT_TRUE( & sx2( 0, 0 ) == & x_static_8( 1, 0, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 0 ) == & x_static_8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 0, 1 ) == & x_static_8( 1, 0, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 1 ) == & x_static_8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 = - Kokkos::subview( x_static_8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */ - , 1, Kokkos::pair<int,int>(1,3) /* of [5] */ - , 1, Kokkos::pair<int,int>(0,2) /* of [3] */ - , 2, Kokkos::pair<int,int>(2,4) /* of [5] */ - ); + Kokkos::View< int****, Kokkos::LayoutStride, Space > sx4 = + Kokkos::subview( x_static_8, 0, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 1, Kokkos::pair< int, int >( 1, 3 ) /* of [5] */ + , 1, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 2, Kokkos::pair< int, int >( 2, 4 ) /* of [5] */ + ); - ASSERT_TRUE( ! sx4.is_contiguous() ); - - for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) - for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) - for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) - for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { - ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x_static_8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) ); - } + ASSERT_TRUE( ! sx4.is_contiguous() ); + for ( int i0 = 0; i0 < (int) sx4.dimension_0(); ++i0 ) + for ( int i1 = 0; i1 < (int) sx4.dimension_1(); ++i1 ) + for ( int i2 = 0; i2 < (int) sx4.dimension_2(); ++i2 ) + for ( int i3 = 0; i3 < (int) sx4.dimension_3(); ++i3 ) + { + ASSERT_TRUE( & sx4( i0, i1, i2, i3 ) == & x_static_8( 0, 0 + i0, 1, 1 + i1, 1, 0 + i2, 2, 2 + i3 ) ); + } } } template< class Space > void test_left_1() { - typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutLeft , Space > - view_type ; - - if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { + typedef Kokkos::View< int ****[2][3][4][5], Kokkos::LayoutLeft, Space > view_type; - view_type x8("x_left_8",2,3,4,5); + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + view_type x8( "x_left_8", 2, 3, 4, 5 ); - ASSERT_TRUE( x8.is_contiguous() ); + ASSERT_TRUE( x8.is_contiguous() ); - Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( x8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + Kokkos::View< int, Kokkos::LayoutLeft, Space > x0 = Kokkos::subview( x8, 0, 0, 0, 0, 0, 0, 0, 0 ); - ASSERT_TRUE( x0.is_contiguous() ); - ASSERT_TRUE( & x0() == & x8(0,0,0,0,0,0,0,0) ); + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & x8( 0, 0, 0, 0, 0, 0, 0, 0 ) ); - Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 = - Kokkos::subview( x8, Kokkos::pair<int,int>(0,2), 1, 2, 3, 0, 1, 2, 3 ); + Kokkos::View< int*, Kokkos::LayoutLeft, Space > x1 = + Kokkos::subview( x8, Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3, 0, 1, 2, 3 ); - ASSERT_TRUE( x1.is_contiguous() ); - ASSERT_TRUE( & x1(0) == & x8(0,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x1(1) == & x8(1,1,2,3,0,1,2,3) ); + ASSERT_TRUE( x1.is_contiguous() ); + ASSERT_TRUE( & x1( 0 ) == & x8( 0, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x1( 1 ) == & x8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); - Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 = - Kokkos::subview( x8, Kokkos::pair<int,int>(0,2), 1, 2, 3 - , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2 = + Kokkos::subview( x8, Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - ASSERT_TRUE( ! x2.is_contiguous() ); - ASSERT_TRUE( & x2(0,0) == & x8(0,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x2(1,0) == & x8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & x2(0,1) == & x8(0,1,2,3,1,1,2,3) ); - ASSERT_TRUE( & x2(1,1) == & x8(1,1,2,3,1,1,2,3) ); + ASSERT_TRUE( ! x2.is_contiguous() ); + ASSERT_TRUE( & x2( 0, 0 ) == & x8( 0, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 1, 0 ) == & x8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 0, 1 ) == & x8( 0, 1, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & x2( 1, 1 ) == & x8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - // Kokkos::View<int**,Kokkos::LayoutLeft,Space> error_2 = - Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 = - Kokkos::subview( x8, 1, Kokkos::pair<int,int>(0,2), 2, 3 - , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + // Kokkos::View< int**, Kokkos::LayoutLeft, Space > error_2 = + Kokkos::View< int**, Kokkos::LayoutStride, Space > sx2 = + Kokkos::subview( x8, 1, Kokkos::pair< int, int >( 0, 2 ), 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - ASSERT_TRUE( ! sx2.is_contiguous() ); - ASSERT_TRUE( & sx2(0,0) == & x8(1,0,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(1,0) == & x8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(0,1) == & x8(1,0,2,3,1,1,2,3) ); - ASSERT_TRUE( & sx2(1,1) == & x8(1,1,2,3,1,1,2,3) ); + ASSERT_TRUE( ! sx2.is_contiguous() ); + ASSERT_TRUE( & sx2( 0, 0 ) == & x8( 1, 0, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 0 ) == & x8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 0, 1 ) == & x8( 1, 0, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 1 ) == & x8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 = - Kokkos::subview( x8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */ - , 1, Kokkos::pair<int,int>(1,3) /* of [5] */ - , 1, Kokkos::pair<int,int>(0,2) /* of [3] */ - , 2, Kokkos::pair<int,int>(2,4) /* of [5] */ - ); + Kokkos::View< int****, Kokkos::LayoutStride, Space > sx4 = + Kokkos::subview( x8, 0, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 1, Kokkos::pair< int, int >( 1, 3 ) /* of [5] */ + , 1, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 2, Kokkos::pair< int, int >( 2, 4 ) /* of [5] */ + ); - ASSERT_TRUE( ! sx4.is_contiguous() ); - - for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) - for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) - for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) - for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { - ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) ); - } + ASSERT_TRUE( ! sx4.is_contiguous() ); + for ( int i0 = 0; i0 < (int) sx4.dimension_0(); ++i0 ) + for ( int i1 = 0; i1 < (int) sx4.dimension_1(); ++i1 ) + for ( int i2 = 0; i2 < (int) sx4.dimension_2(); ++i2 ) + for ( int i3 = 0; i3 < (int) sx4.dimension_3(); ++i3 ) + { + ASSERT_TRUE( & sx4( i0, i1, i2, i3 ) == & x8( 0, 0 + i0, 1, 1 + i1, 1, 0 + i2, 2, 2 + i3 ) ); + } } } template< class Space > void test_left_2() { - typedef Kokkos::View< int **** , Kokkos::LayoutLeft , Space > view_type ; - - if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { - - view_type x4("x4",2,3,4,5); - - ASSERT_TRUE( x4.is_contiguous() ); - - Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( x4 , 0, 0, 0, 0 ); - - ASSERT_TRUE( x0.is_contiguous() ); - ASSERT_TRUE( & x0() == & x4(0,0,0,0) ); - - Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 = - Kokkos::subview( x4, Kokkos::pair<int,int>(0,2), 1, 2, 3 ); - - ASSERT_TRUE( x1.is_contiguous() ); - ASSERT_TRUE( & x1(0) == & x4(0,1,2,3) ); - ASSERT_TRUE( & x1(1) == & x4(1,1,2,3) ); - - Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 = - Kokkos::subview( x4, Kokkos::pair<int,int>(0,2), 1, Kokkos::pair<int,int>(1,3), 2 ); - - ASSERT_TRUE( ! x2.is_contiguous() ); - ASSERT_TRUE( & x2(0,0) == & x4(0,1,1,2) ); - ASSERT_TRUE( & x2(1,0) == & x4(1,1,1,2) ); - ASSERT_TRUE( & x2(0,1) == & x4(0,1,2,2) ); - ASSERT_TRUE( & x2(1,1) == & x4(1,1,2,2) ); - - // Kokkos::View<int**,Kokkos::LayoutLeft,Space> error_2 = - Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 = - Kokkos::subview( x4, 1, Kokkos::pair<int,int>(0,2) - , 2, Kokkos::pair<int,int>(1,4) ); - - ASSERT_TRUE( ! sx2.is_contiguous() ); - ASSERT_TRUE( & sx2(0,0) == & x4(1,0,2,1) ); - ASSERT_TRUE( & sx2(1,0) == & x4(1,1,2,1) ); - ASSERT_TRUE( & sx2(0,1) == & x4(1,0,2,2) ); - ASSERT_TRUE( & sx2(1,1) == & x4(1,1,2,2) ); - ASSERT_TRUE( & sx2(0,2) == & x4(1,0,2,3) ); - ASSERT_TRUE( & sx2(1,2) == & x4(1,1,2,3) ); - - Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 = - Kokkos::subview( x4, Kokkos::pair<int,int>(1,2) /* of [2] */ - , Kokkos::pair<int,int>(1,3) /* of [3] */ - , Kokkos::pair<int,int>(0,4) /* of [4] */ - , Kokkos::pair<int,int>(2,4) /* of [5] */ - ); - - ASSERT_TRUE( ! sx4.is_contiguous() ); - - for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) - for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) - for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) - for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { - ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x4( 1+i0, 1+i1, 0+i2, 2+i3 ) ); - } - + typedef Kokkos::View< int ****, Kokkos::LayoutLeft, Space > view_type; + + if ( Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace, typename Space::memory_space>::accessible ) { + view_type x4( "x4", 2, 3, 4, 5 ); + + ASSERT_TRUE( x4.is_contiguous() ); + + Kokkos::View< int, Kokkos::LayoutLeft, Space > x0 = Kokkos::subview( x4, 0, 0, 0, 0 ); + + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & x4( 0, 0, 0, 0 ) ); + + Kokkos::View< int*, Kokkos::LayoutLeft, Space > x1 = + Kokkos::subview( x4, Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); + + ASSERT_TRUE( x1.is_contiguous() ); + ASSERT_TRUE( & x1( 0 ) == & x4( 0, 1, 2, 3 ) ); + ASSERT_TRUE( & x1( 1 ) == & x4( 1, 1, 2, 3 ) ); + + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2 = + Kokkos::subview( x4, Kokkos::pair< int, int >( 0, 2 ), 1 + , Kokkos::pair< int, int >( 1, 3 ), 2 ); + + ASSERT_TRUE( ! x2.is_contiguous() ); + ASSERT_TRUE( & x2( 0, 0 ) == & x4( 0, 1, 1, 2 ) ); + ASSERT_TRUE( & x2( 1, 0 ) == & x4( 1, 1, 1, 2 ) ); + ASSERT_TRUE( & x2( 0, 1 ) == & x4( 0, 1, 2, 2 ) ); + ASSERT_TRUE( & x2( 1, 1 ) == & x4( 1, 1, 2, 2 ) ); + + // Kokkos::View< int**, Kokkos::LayoutLeft, Space > error_2 = + Kokkos::View< int**, Kokkos::LayoutStride, Space > sx2 = + Kokkos::subview( x4, 1, Kokkos::pair< int, int >( 0, 2 ) + , 2, Kokkos::pair< int, int >( 1, 4 ) ); + + ASSERT_TRUE( ! sx2.is_contiguous() ); + ASSERT_TRUE( & sx2( 0, 0 ) == & x4( 1, 0, 2, 1 ) ); + ASSERT_TRUE( & sx2( 1, 0 ) == & x4( 1, 1, 2, 1 ) ); + ASSERT_TRUE( & sx2( 0, 1 ) == & x4( 1, 0, 2, 2 ) ); + ASSERT_TRUE( & sx2( 1, 1 ) == & x4( 1, 1, 2, 2 ) ); + ASSERT_TRUE( & sx2( 0, 2 ) == & x4( 1, 0, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 2 ) == & x4( 1, 1, 2, 3 ) ); + + Kokkos::View< int****, Kokkos::LayoutStride, Space > sx4 = + Kokkos::subview( x4, Kokkos::pair< int, int >( 1, 2 ) /* of [2] */ + , Kokkos::pair< int, int >( 1, 3 ) /* of [3] */ + , Kokkos::pair< int, int >( 0, 4 ) /* of [4] */ + , Kokkos::pair< int, int >( 2, 4 ) /* of [5] */ + ); + + ASSERT_TRUE( ! sx4.is_contiguous() ); + + for ( int i0 = 0; i0 < (int) sx4.dimension_0(); ++i0 ) + for ( int i1 = 0; i1 < (int) sx4.dimension_1(); ++i1 ) + for ( int i2 = 0; i2 < (int) sx4.dimension_2(); ++i2 ) + for ( int i3 = 0; i3 < (int) sx4.dimension_3(); ++i3 ) + { + ASSERT_TRUE( & sx4( i0, i1, i2, i3 ) == & x4( 1 + i0, 1 + i1, 0 + i2, 2 + i3 ) ); + } } } template< class Space > void test_left_3() { - typedef Kokkos::View< int ** , Kokkos::LayoutLeft , Space > view_type ; - - if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { + typedef Kokkos::View< int **, Kokkos::LayoutLeft, Space > view_type; - view_type xm("x4",10,5); + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + view_type xm( "x4", 10, 5 ); - ASSERT_TRUE( xm.is_contiguous() ); + ASSERT_TRUE( xm.is_contiguous() ); - Kokkos::View<int,Kokkos::LayoutLeft,Space> x0 = Kokkos::subview( xm , 5, 3 ); + Kokkos::View< int, Kokkos::LayoutLeft, Space > x0 = Kokkos::subview( xm, 5, 3 ); - ASSERT_TRUE( x0.is_contiguous() ); - ASSERT_TRUE( & x0() == & xm(5,3) ); + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & xm( 5, 3 ) ); - Kokkos::View<int*,Kokkos::LayoutLeft,Space> x1 = - Kokkos::subview( xm, Kokkos::ALL, 3 ); + Kokkos::View< int*, Kokkos::LayoutLeft, Space > x1 = Kokkos::subview( xm, Kokkos::ALL, 3 ); - ASSERT_TRUE( x1.is_contiguous() ); - for ( int i = 0 ; i < int(xm.dimension_0()) ; ++i ) { - ASSERT_TRUE( & x1(i) == & xm(i,3) ); - } - - Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2 = - Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL ); + ASSERT_TRUE( x1.is_contiguous() ); + for ( int i = 0; i < int( xm.dimension_0() ); ++i ) { + ASSERT_TRUE( & x1( i ) == & xm( i, 3 ) ); + } - ASSERT_TRUE( ! x2.is_contiguous() ); - for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j ) - for ( int i = 0 ; i < int(x2.dimension_0()) ; ++i ) { - ASSERT_TRUE( & x2(i,j) == & xm(1+i,j) ); - } + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2 = + Kokkos::subview( xm, Kokkos::pair< int, int >( 1, 9 ), Kokkos::ALL ); - Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2c = - Kokkos::subview( xm, Kokkos::ALL, std::pair<int,int>(2,4) ); + ASSERT_TRUE( ! x2.is_contiguous() ); + for ( int j = 0; j < int( x2.dimension_1() ); ++j ) + for ( int i = 0; i < int( x2.dimension_0() ); ++i ) + { + ASSERT_TRUE( & x2( i, j ) == & xm( 1 + i, j ) ); + } - ASSERT_TRUE( x2c.is_contiguous() ); - for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j ) - for ( int i = 0 ; i < int(x2c.dimension_0()) ; ++i ) { - ASSERT_TRUE( & x2c(i,j) == & xm(i,2+j) ); - } + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2c = + Kokkos::subview( xm, Kokkos::ALL, std::pair< int, int >( 2, 4 ) ); - Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2_n1 = - Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL ); + ASSERT_TRUE( x2c.is_contiguous() ); + for ( int j = 0; j < int( x2c.dimension_1() ); ++j ) + for ( int i = 0; i < int( x2c.dimension_0() ); ++i ) + { + ASSERT_TRUE( & x2c( i, j ) == & xm( i, 2 + j ) ); + } - ASSERT_TRUE( x2_n1.dimension_0() == 0 ); - ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2_n1 = + Kokkos::subview( xm, std::pair< int, int >( 1, 1 ), Kokkos::ALL ); - Kokkos::View<int**,Kokkos::LayoutLeft,Space> x2_n2 = - Kokkos::subview( xm , Kokkos::ALL , std::pair<int,int>(1,1) ); + ASSERT_TRUE( x2_n1.dimension_0() == 0 ); + ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); - ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); - ASSERT_TRUE( x2_n2.dimension_1() == 0 ); + Kokkos::View< int**, Kokkos::LayoutLeft, Space > x2_n2 = + Kokkos::subview( xm, Kokkos::ALL, std::pair< int, int >( 1, 1 ) ); + ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); + ASSERT_TRUE( x2_n2.dimension_1() == 0 ); } } @@ -474,766 +478,814 @@ void test_left_3() template< class Space > void test_right_0() { - typedef Kokkos::View< int [2][3][4][5][2][3][4][5] , Kokkos::LayoutRight , Space > - view_static_8_type ; - - if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { - - view_static_8_type x_static_8("x_static_right_8"); - - Kokkos::View<int,Kokkos::LayoutRight,Space> x0 = Kokkos::subview( x_static_8 , 0, 0, 0, 0, 0, 0, 0, 0 ); - - ASSERT_TRUE( & x0() == & x_static_8(0,0,0,0,0,0,0,0) ); - - Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 = - Kokkos::subview( x_static_8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair<int,int>(1,3) ); - - ASSERT_TRUE( x1.dimension_0() == 2 ); - ASSERT_TRUE( & x1(0) == & x_static_8(0,1,2,3,0,1,2,1) ); - ASSERT_TRUE( & x1(1) == & x_static_8(0,1,2,3,0,1,2,2) ); - - Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 = - Kokkos::subview( x_static_8, 0, 1, 2, Kokkos::pair<int,int>(1,3) - , 0, 1, 2, Kokkos::pair<int,int>(1,3) ); - - ASSERT_TRUE( x2.dimension_0() == 2 ); - ASSERT_TRUE( x2.dimension_1() == 2 ); - ASSERT_TRUE( & x2(0,0) == & x_static_8(0,1,2,1,0,1,2,1) ); - ASSERT_TRUE( & x2(1,0) == & x_static_8(0,1,2,2,0,1,2,1) ); - ASSERT_TRUE( & x2(0,1) == & x_static_8(0,1,2,1,0,1,2,2) ); - ASSERT_TRUE( & x2(1,1) == & x_static_8(0,1,2,2,0,1,2,2) ); - - // Kokkos::View<int**,Kokkos::LayoutRight,Space> error_2 = - Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 = - Kokkos::subview( x_static_8, 1, Kokkos::pair<int,int>(0,2), 2, 3 - , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); - - ASSERT_TRUE( sx2.dimension_0() == 2 ); - ASSERT_TRUE( sx2.dimension_1() == 2 ); - ASSERT_TRUE( & sx2(0,0) == & x_static_8(1,0,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(1,0) == & x_static_8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(0,1) == & x_static_8(1,0,2,3,1,1,2,3) ); - ASSERT_TRUE( & sx2(1,1) == & x_static_8(1,1,2,3,1,1,2,3) ); - - Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 = - Kokkos::subview( x_static_8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */ - , 1, Kokkos::pair<int,int>(1,3) /* of [5] */ - , 1, Kokkos::pair<int,int>(0,2) /* of [3] */ - , 2, Kokkos::pair<int,int>(2,4) /* of [5] */ - ); - - ASSERT_TRUE( sx4.dimension_0() == 2 ); - ASSERT_TRUE( sx4.dimension_1() == 2 ); - ASSERT_TRUE( sx4.dimension_2() == 2 ); - ASSERT_TRUE( sx4.dimension_3() == 2 ); - for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) - for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) - for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) - for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { - ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x_static_8(0, 0+i0, 1, 1+i1, 1, 0+i2, 2, 2+i3) ); - } - + typedef Kokkos::View< int [2][3][4][5][2][3][4][5], Kokkos::LayoutRight, Space > view_static_8_type; + + if ( Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace, typename Space::memory_space>::accessible ) { + view_static_8_type x_static_8( "x_static_right_8" ); + + Kokkos::View< int, Kokkos::LayoutRight, Space > x0 = Kokkos::subview( x_static_8, 0, 0, 0, 0, 0, 0, 0, 0 ); + + ASSERT_TRUE( & x0() == & x_static_8( 0, 0, 0, 0, 0, 0, 0, 0 ) ); + + Kokkos::View< int*, Kokkos::LayoutRight, Space > x1 = + Kokkos::subview( x_static_8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) ); + + ASSERT_TRUE( x1.dimension_0() == 2 ); + ASSERT_TRUE( & x1( 0 ) == & x_static_8( 0, 1, 2, 3, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x1( 1 ) == & x_static_8( 0, 1, 2, 3, 0, 1, 2, 2 ) ); + + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2 = + Kokkos::subview( x_static_8, 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) + , 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) ); + + ASSERT_TRUE( x2.dimension_0() == 2 ); + ASSERT_TRUE( x2.dimension_1() == 2 ); + ASSERT_TRUE( & x2( 0, 0 ) == & x_static_8( 0, 1, 2, 1, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x2( 1, 0 ) == & x_static_8( 0, 1, 2, 2, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x2( 0, 1 ) == & x_static_8( 0, 1, 2, 1, 0, 1, 2, 2 ) ); + ASSERT_TRUE( & x2( 1, 1 ) == & x_static_8( 0, 1, 2, 2, 0, 1, 2, 2 ) ); + + // Kokkos::View< int**, Kokkos::LayoutRight, Space > error_2 = + Kokkos::View< int**, Kokkos::LayoutStride, Space > sx2 = + Kokkos::subview( x_static_8, 1, Kokkos::pair< int, int >( 0, 2 ), 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); + + ASSERT_TRUE( sx2.dimension_0() == 2 ); + ASSERT_TRUE( sx2.dimension_1() == 2 ); + ASSERT_TRUE( & sx2( 0, 0 ) == & x_static_8( 1, 0, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 0 ) == & x_static_8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 0, 1 ) == & x_static_8( 1, 0, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 1 ) == & x_static_8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); + + Kokkos::View< int****, Kokkos::LayoutStride, Space > sx4 = + Kokkos::subview( x_static_8, 0, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 1, Kokkos::pair< int, int >( 1, 3 ) /* of [5] */ + , 1, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 2, Kokkos::pair< int, int >( 2, 4 ) /* of [5] */ + ); + + ASSERT_TRUE( sx4.dimension_0() == 2 ); + ASSERT_TRUE( sx4.dimension_1() == 2 ); + ASSERT_TRUE( sx4.dimension_2() == 2 ); + ASSERT_TRUE( sx4.dimension_3() == 2 ); + for ( int i0 = 0; i0 < (int) sx4.dimension_0(); ++i0 ) + for ( int i1 = 0; i1 < (int) sx4.dimension_1(); ++i1 ) + for ( int i2 = 0; i2 < (int) sx4.dimension_2(); ++i2 ) + for ( int i3 = 0; i3 < (int) sx4.dimension_3(); ++i3 ) + { + ASSERT_TRUE( & sx4( i0, i1, i2, i3 ) == & x_static_8( 0, 0 + i0, 1, 1 + i1, 1, 0 + i2, 2, 2 + i3 ) ); + } } } template< class Space > void test_right_1() { - typedef Kokkos::View< int ****[2][3][4][5] , Kokkos::LayoutRight , Space > - view_type ; - - if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { + typedef Kokkos::View< int ****[2][3][4][5], Kokkos::LayoutRight, Space > view_type; - view_type x8("x_right_8",2,3,4,5); + if ( Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace, typename Space::memory_space>::accessible ) { + view_type x8( "x_right_8", 2, 3, 4, 5 ); - Kokkos::View<int,Kokkos::LayoutRight,Space> x0 = Kokkos::subview( x8 , 0, 0, 0, 0, 0, 0, 0, 0 ); + Kokkos::View< int, Kokkos::LayoutRight, Space > x0 = Kokkos::subview( x8, 0, 0, 0, 0, 0, 0, 0, 0 ); - ASSERT_TRUE( & x0() == & x8(0,0,0,0,0,0,0,0) ); + ASSERT_TRUE( & x0() == & x8( 0, 0, 0, 0, 0, 0, 0, 0 ) ); - Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 = - Kokkos::subview( x8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair<int,int>(1,3) ); + Kokkos::View< int*, Kokkos::LayoutRight, Space > x1 = + Kokkos::subview( x8, 0, 1, 2, 3, 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) ); - ASSERT_TRUE( & x1(0) == & x8(0,1,2,3,0,1,2,1) ); - ASSERT_TRUE( & x1(1) == & x8(0,1,2,3,0,1,2,2) ); + ASSERT_TRUE( & x1( 0 ) == & x8( 0, 1, 2, 3, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x1( 1 ) == & x8( 0, 1, 2, 3, 0, 1, 2, 2 ) ); - Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 = - Kokkos::subview( x8, 0, 1, 2, Kokkos::pair<int,int>(1,3) - , 0, 1, 2, Kokkos::pair<int,int>(1,3) ); + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2 = + Kokkos::subview( x8, 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) + , 0, 1, 2, Kokkos::pair< int, int >( 1, 3 ) ); - ASSERT_TRUE( & x2(0,0) == & x8(0,1,2,1,0,1,2,1) ); - ASSERT_TRUE( & x2(1,0) == & x8(0,1,2,2,0,1,2,1) ); - ASSERT_TRUE( & x2(0,1) == & x8(0,1,2,1,0,1,2,2) ); - ASSERT_TRUE( & x2(1,1) == & x8(0,1,2,2,0,1,2,2) ); + ASSERT_TRUE( & x2( 0, 0 ) == & x8( 0, 1, 2, 1, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x2( 1, 0 ) == & x8( 0, 1, 2, 2, 0, 1, 2, 1 ) ); + ASSERT_TRUE( & x2( 0, 1 ) == & x8( 0, 1, 2, 1, 0, 1, 2, 2 ) ); + ASSERT_TRUE( & x2( 1, 1 ) == & x8( 0, 1, 2, 2, 0, 1, 2, 2 ) ); - // Kokkos::View<int**,Kokkos::LayoutRight,Space> error_2 = - Kokkos::View<int**,Kokkos::LayoutStride,Space> sx2 = - Kokkos::subview( x8, 1, Kokkos::pair<int,int>(0,2), 2, 3 - , Kokkos::pair<int,int>(0,2), 1, 2, 3 ); + // Kokkos::View< int**, Kokkos::LayoutRight, Space > error_2 = + Kokkos::View< int**, Kokkos::LayoutStride, Space > sx2 = + Kokkos::subview( x8, 1, Kokkos::pair< int, int >( 0, 2 ), 2, 3 + , Kokkos::pair< int, int >( 0, 2 ), 1, 2, 3 ); - ASSERT_TRUE( & sx2(0,0) == & x8(1,0,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(1,0) == & x8(1,1,2,3,0,1,2,3) ); - ASSERT_TRUE( & sx2(0,1) == & x8(1,0,2,3,1,1,2,3) ); - ASSERT_TRUE( & sx2(1,1) == & x8(1,1,2,3,1,1,2,3) ); + ASSERT_TRUE( & sx2( 0, 0 ) == & x8( 1, 0, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 0 ) == & x8( 1, 1, 2, 3, 0, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 0, 1 ) == & x8( 1, 0, 2, 3, 1, 1, 2, 3 ) ); + ASSERT_TRUE( & sx2( 1, 1 ) == & x8( 1, 1, 2, 3, 1, 1, 2, 3 ) ); - Kokkos::View<int****,Kokkos::LayoutStride,Space> sx4 = - Kokkos::subview( x8, 0, Kokkos::pair<int,int>(0,2) /* of [3] */ - , 1, Kokkos::pair<int,int>(1,3) /* of [5] */ - , 1, Kokkos::pair<int,int>(0,2) /* of [3] */ - , 2, Kokkos::pair<int,int>(2,4) /* of [5] */ - ); - - for ( int i0 = 0 ; i0 < (int) sx4.dimension_0() ; ++i0 ) - for ( int i1 = 0 ; i1 < (int) sx4.dimension_1() ; ++i1 ) - for ( int i2 = 0 ; i2 < (int) sx4.dimension_2() ; ++i2 ) - for ( int i3 = 0 ; i3 < (int) sx4.dimension_3() ; ++i3 ) { - ASSERT_TRUE( & sx4(i0,i1,i2,i3) == & x8(0,0+i0, 1,1+i1, 1,0+i2, 2,2+i3) ); - } + Kokkos::View< int****, Kokkos::LayoutStride, Space > sx4 = + Kokkos::subview( x8, 0, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 1, Kokkos::pair< int, int >( 1, 3 ) /* of [5] */ + , 1, Kokkos::pair< int, int >( 0, 2 ) /* of [3] */ + , 2, Kokkos::pair< int, int >( 2, 4 ) /* of [5] */ + ); + for ( int i0 = 0; i0 < (int) sx4.dimension_0(); ++i0 ) + for ( int i1 = 0; i1 < (int) sx4.dimension_1(); ++i1 ) + for ( int i2 = 0; i2 < (int) sx4.dimension_2(); ++i2 ) + for ( int i3 = 0; i3 < (int) sx4.dimension_3(); ++i3 ) + { + ASSERT_TRUE( & sx4( i0, i1, i2, i3 ) == & x8( 0, 0 + i0, 1, 1 + i1, 1, 0 + i2, 2, 2 + i3 ) ); + } } } template< class Space > void test_right_3() { - typedef Kokkos::View< int ** , Kokkos::LayoutRight , Space > view_type ; - - if(Kokkos::Impl::SpaceAccessibility<Kokkos::HostSpace,typename Space::memory_space>::accessible) { + typedef Kokkos::View< int **, Kokkos::LayoutRight, Space > view_type; - view_type xm("x4",10,5); + if ( Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, typename Space::memory_space >::accessible ) { + view_type xm( "x4", 10, 5 ); - ASSERT_TRUE( xm.is_contiguous() ); + ASSERT_TRUE( xm.is_contiguous() ); - Kokkos::View<int,Kokkos::LayoutRight,Space> x0 = Kokkos::subview( xm , 5, 3 ); + Kokkos::View< int, Kokkos::LayoutRight, Space > x0 = Kokkos::subview( xm, 5, 3 ); - ASSERT_TRUE( x0.is_contiguous() ); - ASSERT_TRUE( & x0() == & xm(5,3) ); + ASSERT_TRUE( x0.is_contiguous() ); + ASSERT_TRUE( & x0() == & xm( 5, 3 ) ); - Kokkos::View<int*,Kokkos::LayoutRight,Space> x1 = - Kokkos::subview( xm, 3, Kokkos::ALL ); - - ASSERT_TRUE( x1.is_contiguous() ); - for ( int i = 0 ; i < int(xm.dimension_1()) ; ++i ) { - ASSERT_TRUE( & x1(i) == & xm(3,i) ); - } + Kokkos::View< int*, Kokkos::LayoutRight, Space > x1 = Kokkos::subview( xm, 3, Kokkos::ALL ); - Kokkos::View<int**,Kokkos::LayoutRight,Space> x2c = - Kokkos::subview( xm, Kokkos::pair<int,int>(1,9), Kokkos::ALL ); + ASSERT_TRUE( x1.is_contiguous() ); + for ( int i = 0; i < int( xm.dimension_1() ); ++i ) { + ASSERT_TRUE( & x1( i ) == & xm( 3, i ) ); + } - ASSERT_TRUE( x2c.is_contiguous() ); - for ( int j = 0 ; j < int(x2c.dimension_1()) ; ++j ) - for ( int i = 0 ; i < int(x2c.dimension_0()) ; ++i ) { - ASSERT_TRUE( & x2c(i,j) == & xm(1+i,j) ); - } + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2c = + Kokkos::subview( xm, Kokkos::pair< int, int >( 1, 9 ), Kokkos::ALL ); - Kokkos::View<int**,Kokkos::LayoutRight,Space> x2 = - Kokkos::subview( xm, Kokkos::ALL, std::pair<int,int>(2,4) ); + ASSERT_TRUE( x2c.is_contiguous() ); + for ( int j = 0; j < int( x2c.dimension_1() ); ++j ) + for ( int i = 0; i < int( x2c.dimension_0() ); ++i ) { + ASSERT_TRUE( & x2c( i, j ) == & xm( 1 + i, j ) ); + } - ASSERT_TRUE( ! x2.is_contiguous() ); - for ( int j = 0 ; j < int(x2.dimension_1()) ; ++j ) - for ( int i = 0 ; i < int(x2.dimension_0()) ; ++i ) { - ASSERT_TRUE( & x2(i,j) == & xm(i,2+j) ); - } + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2 = + Kokkos::subview( xm, Kokkos::ALL, std::pair< int, int >( 2, 4 ) ); - Kokkos::View<int**,Kokkos::LayoutRight,Space> x2_n1 = - Kokkos::subview( xm , std::pair<int,int>(1,1) , Kokkos::ALL ); + ASSERT_TRUE( ! x2.is_contiguous() ); + for ( int j = 0; j < int( x2.dimension_1() ); ++j ) + for ( int i = 0; i < int( x2.dimension_0() ); ++i ) + { + ASSERT_TRUE( & x2( i, j ) == & xm( i, 2 + j ) ); + } - ASSERT_TRUE( x2_n1.dimension_0() == 0 ); - ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2_n1 = + Kokkos::subview( xm, std::pair< int, int >( 1, 1 ), Kokkos::ALL ); - Kokkos::View<int**,Kokkos::LayoutRight,Space> x2_n2 = - Kokkos::subview( xm , Kokkos::ALL , std::pair<int,int>(1,1) ); + ASSERT_TRUE( x2_n1.dimension_0() == 0 ); + ASSERT_TRUE( x2_n1.dimension_1() == xm.dimension_1() ); - ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); - ASSERT_TRUE( x2_n2.dimension_1() == 0 ); + Kokkos::View< int**, Kokkos::LayoutRight, Space > x2_n2 = + Kokkos::subview( xm, Kokkos::ALL, std::pair< int, int >( 1, 1 ) ); + ASSERT_TRUE( x2_n2.dimension_0() == xm.dimension_0() ); + ASSERT_TRUE( x2_n2.dimension_1() == 0 ); } } namespace Impl { -constexpr int N0=113; -constexpr int N1=11; -constexpr int N2=17; -constexpr int N3=5; -constexpr int N4=7; +constexpr int N0 = 113; +constexpr int N1 = 11; +constexpr int N2 = 17; +constexpr int N3 = 5; +constexpr int N4 = 7; -template<class SubView,class View> -void test_Check1D(SubView a, View b, std::pair<int,int> range) { +template< class SubView, class View > +void test_Check1D( SubView a, View b, std::pair< int, int > range ) { int errors = 0; - for(int i=0;i<range.second-range.first;i++) { - if(a(i)!=b(i+range.first)) - errors++; + + for ( int i = 0; i < range.second - range.first; i++ ) { + if ( a( i ) != b( i + range.first ) ) errors++; + } + + if ( errors > 0 ) { + std::cout << "Error Suviews test_Check1D: " << errors << std::endl; } - if(errors>0) - std::cout << "Error Suviews test_Check1D: " << errors <<std::endl; + ASSERT_TRUE( errors == 0 ); } -template<class SubView,class View> -void test_Check1D2D(SubView a, View b, int i0, std::pair<int,int> range) { +template< class SubView, class View > +void test_Check1D2D( SubView a, View b, int i0, std::pair< int, int > range ) { int errors = 0; - for(int i1=0;i1<range.second-range.first;i1++) { - if(a(i1)!=b(i0,i1+range.first)) - errors++; + + for ( int i1 = 0; i1 < range.second - range.first; i1++ ) { + if ( a( i1 ) != b( i0, i1 + range.first ) ) errors++; } - if(errors>0) - std::cout << "Error Suviews test_Check1D2D: " << errors <<std::endl; + + if ( errors > 0 ) { + std::cout << "Error Suviews test_Check1D2D: " << errors << std::endl; + } + ASSERT_TRUE( errors == 0 ); } -template<class SubView,class View> -void test_Check2D3D(SubView a, View b, int i0, std::pair<int,int> range1, std::pair<int,int> range2) { +template< class SubView, class View > +void test_Check2D3D( SubView a, View b, int i0, std::pair< int, int > range1 + , std::pair< int, int > range2 ) +{ int errors = 0; - for(int i1=0;i1<range1.second-range1.first;i1++) { - for(int i2=0;i2<range2.second-range2.first;i2++) { - if(a(i1,i2)!=b(i0,i1+range1.first,i2+range2.first)) - errors++; + + for ( int i1 = 0; i1 < range1.second - range1.first; i1++ ) { + for ( int i2 = 0; i2 < range2.second - range2.first; i2++ ) { + if ( a( i1, i2 ) != b( i0, i1 + range1.first, i2 + range2.first ) ) errors++; } } - if(errors>0) - std::cout << "Error Suviews test_Check2D3D: " << errors <<std::endl; + + if ( errors > 0 ) { + std::cout << "Error Suviews test_Check2D3D: " << errors << std::endl; + } + ASSERT_TRUE( errors == 0 ); } -template<class SubView,class View> -void test_Check3D5D(SubView a, View b, int i0, int i1, std::pair<int,int> range2, std::pair<int,int> range3, std::pair<int,int> range4) { +template<class SubView, class View> +void test_Check3D5D( SubView a, View b, int i0, int i1, std::pair< int, int > range2 + , std::pair< int, int > range3, std::pair< int, int > range4 ) +{ int errors = 0; - for(int i2=0;i2<range2.second-range2.first;i2++) { - for(int i3=0;i3<range3.second-range3.first;i3++) { - for(int i4=0;i4<range4.second-range4.first;i4++) { - if(a(i2,i3,i4)!=b(i0,i1,i2+range2.first,i3+range3.first,i4+range4.first)) + + for ( int i2 = 0; i2 < range2.second - range2.first; i2++ ) { + for ( int i3 = 0; i3 < range3.second - range3.first; i3++ ) { + for ( int i4 = 0; i4 < range4.second - range4.first; i4++ ) { + if ( a( i2, i3, i4 ) != b( i0, i1, i2 + range2.first, i3 + range3.first, i4 + range4.first ) ) { errors++; + } } } } - if(errors>0) - std::cout << "Error Suviews test_Check3D5D: " << errors <<std::endl; + + if ( errors > 0 ) { + std::cout << "Error Suviews test_Check3D5D: " << errors << std::endl; + } + ASSERT_TRUE( errors == 0 ); } -template<class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits> +template< class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits > void test_1d_assign_impl() { - - { //Breaks - Kokkos::View<int*,LayoutOrg,Space> a_org("A",N0); - Kokkos::View<int*,LayoutOrg,Space,MemTraits> a(a_org); + { // Breaks. + Kokkos::View< int*, LayoutOrg, Space > a_org( "A", N0 ); + Kokkos::View< int*, LayoutOrg, Space, MemTraits > a( a_org ); Kokkos::fence(); - for(int i=0; i<N0; i++) - a_org(i) = i; + for ( int i = 0; i < N0; i++ ) a_org( i ) = i; - Kokkos::View<int[N0],Layout,Space,MemTraits> a1(a); + Kokkos::View< int[N0], Layout, Space, MemTraits > a1( a ); Kokkos::fence(); - test_Check1D(a1,a,std::pair<int,int>(0,N0)); + test_Check1D( a1, a, std::pair< int, int >( 0, N0 ) ); - Kokkos::View<int[N0],LayoutSub,Space,MemTraits> a2(a1); + Kokkos::View< int[N0], LayoutSub, Space, MemTraits > a2( a1 ); Kokkos::fence(); - test_Check1D(a2,a,std::pair<int,int>(0,N0)); + test_Check1D( a2, a, std::pair< int, int >( 0, N0 ) ); a1 = a; - test_Check1D(a1,a,std::pair<int,int>(0,N0)); + test_Check1D( a1, a, std::pair< int, int >( 0, N0 ) ); - //Runtime Fail expected - //Kokkos::View<int[N1]> afail1(a); + // Runtime Fail expected. + //Kokkos::View< int[N1] > afail1( a ); - //Compile Time Fail expected - //Kokkos::View<int[N1]> afail2(a1); + // Compile Time Fail expected. + //Kokkos::View< int[N1] > afail2( a1 ); } - { // Works - Kokkos::View<int[N0],LayoutOrg,Space,MemTraits> a("A"); - Kokkos::View<int*,Layout,Space,MemTraits> a1(a); + { // Works. + Kokkos::View< int[N0], LayoutOrg, Space, MemTraits > a( "A" ); + Kokkos::View< int*, Layout, Space, MemTraits > a1( a ); Kokkos::fence(); - test_Check1D(a1,a,std::pair<int,int>(0,N0)); + test_Check1D( a1, a, std::pair< int, int >( 0, N0 ) ); a1 = a; Kokkos::fence(); - test_Check1D(a1,a,std::pair<int,int>(0,N0)); + test_Check1D( a1, a, std::pair< int, int >( 0, N0 ) ); } } -template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg,class MemTraits> +template< class Space, class Type, class TypeSub, class LayoutSub, class Layout, class LayoutOrg, class MemTraits > void test_2d_subview_3d_impl_type() { - Kokkos::View<int***,LayoutOrg,Space> a_org("A",N0,N1,N2); - Kokkos::View<Type,Layout,Space,MemTraits> a(a_org); - for(int i0=0; i0<N0; i0++) - for(int i1=0; i1<N1; i1++) - for(int i2=0; i2<N2; i2++) - a_org(i0,i1,i2) = i0*1000000+i1*1000+i2; - Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a1; - a1 = Kokkos::subview(a,3,Kokkos::ALL,Kokkos::ALL); + Kokkos::View< int***, LayoutOrg, Space > a_org( "A", N0, N1, N2 ); + Kokkos::View< Type, Layout, Space, MemTraits > a( a_org ); + + for ( int i0 = 0; i0 < N0; i0++ ) + for ( int i1 = 0; i1 < N1; i1++ ) + for ( int i2 = 0; i2 < N2; i2++ ) + { + a_org( i0, i1, i2 ) = i0 * 1000000 + i1 * 1000 + i2; + } + + Kokkos::View< TypeSub, LayoutSub, Space, MemTraits > a1; + a1 = Kokkos::subview( a, 3, Kokkos::ALL, Kokkos::ALL ); Kokkos::fence(); - test_Check2D3D(a1,a,3,std::pair<int,int>(0,N1),std::pair<int,int>(0,N2)); + test_Check2D3D( a1, a, 3, std::pair< int, int >( 0, N1 ), std::pair< int, int >( 0, N2 ) ); - Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a2(a,3,Kokkos::ALL,Kokkos::ALL); + Kokkos::View< TypeSub, LayoutSub, Space, MemTraits > a2( a, 3, Kokkos::ALL, Kokkos::ALL ); Kokkos::fence(); - test_Check2D3D(a2,a,3,std::pair<int,int>(0,N1),std::pair<int,int>(0,N2)); + test_Check2D3D( a2, a, 3, std::pair< int, int >( 0, N1 ), std::pair< int, int >( 0, N2 ) ); } -template<class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits> +template< class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits > void test_2d_subview_3d_impl_layout() { - test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int[N0][N1][N2],int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type< Space, int[N0][N1][N2], int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int[N0][N1][N2], int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int[N0][N1][N2], int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type<Space,int* [N1][N2],int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int* [N1][N2],int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int* [N1][N2],int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type< Space, int* [N1][N2], int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int* [N1][N2], int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int* [N1][N2], int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type<Space,int** [N2],int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int** [N2],int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int** [N2],int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type< Space, int** [N2], int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int** [N2], int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int** [N2], int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type<Space,int*** ,int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int*** ,int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,int*** ,int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type< Space, int*** , int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int*** , int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, int*** , int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type<Space,const int[N0][N1][N2],const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,const int[N0][N1][N2],const int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,const int[N0][N1][N2],const int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type< Space, const int[N0][N1][N2], const int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int[N0][N1][N2], const int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int[N0][N1][N2], const int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type<Space,const int* [N1][N2],const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,const int* [N1][N2],const int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,const int* [N1][N2],const int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type< Space, const int* [N1][N2], const int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int* [N1][N2], const int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int* [N1][N2], const int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type<Space,const int** [N2],const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,const int** [N2],const int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,const int** [N2],const int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type< Space, const int** [N2], const int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int** [N2], const int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int** [N2], const int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); - test_2d_subview_3d_impl_type<Space,const int*** ,const int[N1][N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,const int*** ,const int* [N2],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_2d_subview_3d_impl_type<Space,const int*** ,const int** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_2d_subview_3d_impl_type< Space, const int*** , const int[N1][N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int*** , const int* [N2], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_2d_subview_3d_impl_type< Space, const int*** , const int** , LayoutSub, Layout, LayoutOrg, MemTraits >(); } -template<class Space, class Type, class TypeSub,class LayoutSub, class Layout, class LayoutOrg, class MemTraits> +template< class Space, class Type, class TypeSub, class LayoutSub, class Layout, class LayoutOrg, class MemTraits > void test_3d_subview_5d_impl_type() { - Kokkos::View<int*****,LayoutOrg,Space> a_org("A",N0,N1,N2,N3,N4); - Kokkos::View<Type,Layout,Space,MemTraits> a(a_org); - for(int i0=0; i0<N0; i0++) - for(int i1=0; i1<N1; i1++) - for(int i2=0; i2<N2; i2++) - for(int i3=0; i3<N3; i3++) - for(int i4=0; i4<N4; i4++) - a_org(i0,i1,i2,i3,i4) = i0*1000000+i1*10000+i2*100+i3*10+i4; - Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a1; - a1 = Kokkos::subview(a,3,5,Kokkos::ALL,Kokkos::ALL,Kokkos::ALL); + Kokkos::View< int*****, LayoutOrg, Space > a_org( "A", N0, N1, N2, N3, N4 ); + Kokkos::View< Type, Layout, Space, MemTraits > a( a_org ); + + for ( int i0 = 0; i0 < N0; i0++ ) + for ( int i1 = 0; i1 < N1; i1++ ) + for ( int i2 = 0; i2 < N2; i2++ ) + for ( int i3 = 0; i3 < N3; i3++ ) + for ( int i4 = 0; i4 < N4; i4++ ) + { + a_org( i0, i1, i2, i3, i4 ) = i0 * 1000000 + i1 * 10000 + i2 * 100 + i3 * 10 + i4; + } + + Kokkos::View< TypeSub, LayoutSub, Space, MemTraits > a1; + a1 = Kokkos::subview( a, 3, 5, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL ); Kokkos::fence(); - test_Check3D5D(a1,a,3,5,std::pair<int,int>(0,N2),std::pair<int,int>(0,N3),std::pair<int,int>(0,N4)); + test_Check3D5D( a1, a, 3, 5, std::pair< int, int >( 0, N2 ), std::pair< int, int >( 0, N3 ), std::pair< int, int >( 0, N4 ) ); - Kokkos::View<TypeSub,LayoutSub,Space,MemTraits> a2(a,3,5,Kokkos::ALL,Kokkos::ALL,Kokkos::ALL); + Kokkos::View< TypeSub, LayoutSub, Space, MemTraits > a2( a, 3, 5, Kokkos::ALL, Kokkos::ALL, Kokkos::ALL ); Kokkos::fence(); - test_Check3D5D(a2,a,3,5,std::pair<int,int>(0,N2),std::pair<int,int>(0,N3),std::pair<int,int>(0,N4)); + test_Check3D5D( a2, a, 3, 5, std::pair< int, int >( 0, N2 ), std::pair< int, int >( 0, N3 ), std::pair< int, int >( 0, N4 ) ); } -template<class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits> +template< class Space, class LayoutSub, class Layout, class LayoutOrg, class MemTraits > void test_3d_subview_5d_impl_layout() { - test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int[N0][N1][N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int* [N1][N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, int** [N2][N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int** [N2][N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int** [N2][N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int** [N2][N3][N4],int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, int*** [N3][N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int*** [N3][N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int*** [N3][N4],int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int*** [N3][N4],int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, int**** [N4],int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int**** [N4],int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int**** [N4],int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int**** [N4],int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, int***** ,int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int***** ,int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int***** ,int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, int***** ,int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int[N0][N1][N2][N3][N4],const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, const int* [N1][N2][N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int* [N1][N2][N3][N4],const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int* [N1][N2][N3][N4],const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int* [N1][N2][N3][N4],const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, const int** [N2][N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int** [N2][N3][N4],const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int** [N2][N3][N4],const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int** [N2][N3][N4],const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, const int*** [N3][N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int*** [N3][N4],const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int*** [N3][N4],const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int*** [N3][N4],const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, const int**** [N4],const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int**** [N4],const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int**** [N4],const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int**** [N4],const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); - - test_3d_subview_5d_impl_type<Space, const int***** ,const int[N2][N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int***** ,const int* [N3][N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int***** ,const int** [N4],LayoutSub, Layout, LayoutOrg, MemTraits>(); - test_3d_subview_5d_impl_type<Space, const int***** ,const int*** ,LayoutSub, Layout, LayoutOrg, MemTraits>(); + test_3d_subview_5d_impl_type< Space, int[N0][N1][N2][N3][N4], int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int[N0][N1][N2][N3][N4], int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int[N0][N1][N2][N3][N4], int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int[N0][N1][N2][N3][N4], int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, int* [N1][N2][N3][N4], int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int* [N1][N2][N3][N4], int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int* [N1][N2][N3][N4], int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int* [N1][N2][N3][N4], int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, int** [N2][N3][N4], int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int** [N2][N3][N4], int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int** [N2][N3][N4], int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int** [N2][N3][N4], int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, int*** [N3][N4], int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int*** [N3][N4], int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int*** [N3][N4], int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int*** [N3][N4], int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, int**** [N4], int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int**** [N4], int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int**** [N4], int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int**** [N4], int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, int***** , int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int***** , int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int***** , int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, int***** , int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, const int[N0][N1][N2][N3][N4], const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int[N0][N1][N2][N3][N4], const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int[N0][N1][N2][N3][N4], const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int[N0][N1][N2][N3][N4], const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, const int* [N1][N2][N3][N4], const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int* [N1][N2][N3][N4], const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int* [N1][N2][N3][N4], const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int* [N1][N2][N3][N4], const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, const int** [N2][N3][N4], const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int** [N2][N3][N4], const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int** [N2][N3][N4], const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int** [N2][N3][N4], const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, const int*** [N3][N4], const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int*** [N3][N4], const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int*** [N3][N4], const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int*** [N3][N4], const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, const int**** [N4], const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int**** [N4], const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int**** [N4], const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int**** [N4], const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); + + test_3d_subview_5d_impl_type< Space, const int***** , const int[N2][N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int***** , const int* [N3][N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int***** , const int** [N4], LayoutSub, Layout, LayoutOrg, MemTraits >(); + test_3d_subview_5d_impl_type< Space, const int***** , const int*** , LayoutSub, Layout, LayoutOrg, MemTraits >(); } inline void test_subview_legal_args_right() { - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value)); - - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); - - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); - - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); - - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value)); - - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); - - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutRight,Kokkos::LayoutRight,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, int >::value ) ); + + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int >::value ) ); + + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int>, int >::value ) ); + + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int >::value ) ); + + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t >::value ) ); + + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t >::value ) ); + + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::pair<int, int> >::value ) ); } inline void test_subview_legal_args_left() { - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,int>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,int>::value)); - - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); - - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>,int>::value)); - - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int>::value)); - - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,int,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,int,Kokkos::Impl::ALL_t>::value)); - - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,5,0,int,int,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); - - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(1,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::Impl::ALL_t,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::Impl::ALL_t>::value)); - ASSERT_EQ(0,(Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime<Kokkos::LayoutLeft,Kokkos::LayoutLeft,3,3,0,Kokkos::pair<int,int>,Kokkos::pair<int,int>,Kokkos::pair<int,int>>::value)); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, int >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, int >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, int >::value ) ); + + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int >::value ) ); + + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int>, int >::value ) ); + + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int >::value ) ); + + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, int, Kokkos::Impl::ALL_t >::value ) ); + + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t >::value ) ); + + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair<int, int>, Kokkos::pair<int, int> >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair<int, int>, Kokkos::pair<int, int>, Kokkos::pair<int, int> >::value ) ); } -} +} // namespace Impl -template< class Space, class MemTraits = void> +template< class Space, class MemTraits = void > void test_1d_assign() { - Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft ,Kokkos::LayoutLeft ,Kokkos::LayoutLeft, MemTraits>(); - //Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutLeft ,Kokkos::LayoutLeft >(); - Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft ,Kokkos::LayoutLeft, MemTraits>(); - //Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft ,Kokkos::LayoutRight ,Kokkos::LayoutLeft >(); - Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight ,Kokkos::LayoutRight, MemTraits>(); - Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutRight ,Kokkos::LayoutRight, MemTraits>(); - //Impl::test_1d_assign_impl<Space,Kokkos::LayoutLeft ,Kokkos::LayoutStride,Kokkos::LayoutLeft >(); - //Impl::test_1d_assign_impl<Space,Kokkos::LayoutRight ,Kokkos::LayoutStride,Kokkos::LayoutLeft >(); - Impl::test_1d_assign_impl<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft, MemTraits>(); + Impl::test_1d_assign_impl< Space, Kokkos::LayoutLeft, Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits >(); + //Impl::test_1d_assign_impl< Space, Kokkos::LayoutRight, Kokkos::LayoutLeft, Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl< Space, Kokkos::LayoutStride, Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits >(); + //Impl::test_1d_assign_impl< Space, Kokkos::LayoutLeft, Kokkos::LayoutRight, Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl< Space, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits >(); + Impl::test_1d_assign_impl< Space, Kokkos::LayoutStride, Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits >(); + //Impl::test_1d_assign_impl< Space, Kokkos::LayoutLeft, Kokkos::LayoutStride, Kokkos::LayoutLeft >(); + //Impl::test_1d_assign_impl< Space, Kokkos::LayoutRight, Kokkos::LayoutStride, Kokkos::LayoutLeft >(); + Impl::test_1d_assign_impl< Space, Kokkos::LayoutStride, Kokkos::LayoutStride, Kokkos::LayoutLeft, MemTraits >(); } -template<class Space, class MemTraits = void> +template< class Space, class MemTraits = void > void test_2d_subview_3d() { - Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutRight ,Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits>(); - Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits>(); - Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight, MemTraits>(); - Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits>(); - Impl::test_2d_subview_3d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft, MemTraits>(); + Impl::test_2d_subview_3d_impl_layout< Space, Kokkos::LayoutRight, Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits >(); + Impl::test_2d_subview_3d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits >(); + Impl::test_2d_subview_3d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutStride, Kokkos::LayoutRight, MemTraits >(); + Impl::test_2d_subview_3d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits >(); + Impl::test_2d_subview_3d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutStride, Kokkos::LayoutLeft, MemTraits >(); } -template<class Space, class MemTraits = void> +template< class Space, class MemTraits = void > void test_3d_subview_5d_right() { - Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits>(); - Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutRight, MemTraits>(); + Impl::test_3d_subview_5d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutRight, Kokkos::LayoutRight, MemTraits >(); + Impl::test_3d_subview_5d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutStride, Kokkos::LayoutRight, MemTraits >(); } -template<class Space, class MemTraits = void> +template< class Space, class MemTraits = void > void test_3d_subview_5d_left() { - Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits>(); - Impl::test_3d_subview_5d_impl_layout<Space,Kokkos::LayoutStride,Kokkos::LayoutStride,Kokkos::LayoutLeft, MemTraits>(); + Impl::test_3d_subview_5d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutLeft, Kokkos::LayoutLeft, MemTraits >(); + Impl::test_3d_subview_5d_impl_layout< Space, Kokkos::LayoutStride, Kokkos::LayoutStride, Kokkos::LayoutLeft, MemTraits >(); } +namespace Impl { +template< class Layout, class Space > +struct FillView_3D { + Kokkos::View< int***, Layout, Space > a; -namespace Impl { + KOKKOS_INLINE_FUNCTION + void operator()( const int & ii ) const + { + const int i = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ii % a.dimension_0() + : ii / ( a.dimension_1() * a.dimension_2() ); + + const int j = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ( ii / a.dimension_0() ) % a.dimension_1() + : ( ii / a.dimension_2() ) % a.dimension_1(); + + const int k = std::is_same< Layout, Kokkos::LayoutRight >::value + ? ii / ( a.dimension_0() * a.dimension_1() ) + : ii % a.dimension_2(); - template<class Layout, class Space> - struct FillView_3D { - Kokkos::View<int***,Layout,Space> a; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& ii) const { - const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ? - ii % a.dimension_0(): ii / (a.dimension_1()*a.dimension_2()); - const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ? - (ii / a.dimension_0()) % a.dimension_1() : (ii / a.dimension_2()) % a.dimension_1(); - const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ? - ii / (a.dimension_0() * a.dimension_1()) : ii % a.dimension_2(); - a(i,j,k) = 1000000 * i + 1000 * j + k; + a( i, j, k ) = 1000000 * i + 1000 * j + k; + } +}; + +template< class Layout, class Space > +struct FillView_4D { + Kokkos::View< int****, Layout, Space > a; + + KOKKOS_INLINE_FUNCTION + void operator()( const int & ii ) const { + const int i = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ii % a.dimension_0() + : ii / ( a.dimension_1() * a.dimension_2() * a.dimension_3() ); + + const int j = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ( ii / a.dimension_0() ) % a.dimension_1() + : ( ii / ( a.dimension_2() * a.dimension_3() ) % a.dimension_1() ); + + const int k = std::is_same< Layout, Kokkos::LayoutRight >::value + ? ( ii / ( a.dimension_0() * a.dimension_1() ) ) % a.dimension_2() + : ( ii / a.dimension_3() ) % a.dimension_2(); + + const int l = std::is_same< Layout, Kokkos::LayoutRight >::value + ? ii / ( a.dimension_0() * a.dimension_1() * a.dimension_2() ) + : ii % a.dimension_3(); + + a( i, j, k, l ) = 1000000 * i + 10000 * j + 100 * k + l; + } +}; + +template< class Layout, class Space, class MemTraits > +struct CheckSubviewCorrectness_3D_3D { + Kokkos::View< const int***, Layout, Space, MemTraits > a; + Kokkos::View< const int***, Layout, Space, MemTraits > b; + int offset_0, offset_2; + + KOKKOS_INLINE_FUNCTION + void operator()( const int & ii ) const + { + const int i = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ii % b.dimension_0() + : ii / ( b.dimension_1() * b.dimension_2() ); + + const int j = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ( ii / b.dimension_0() ) % b.dimension_1() + : ( ii / b.dimension_2() ) % b.dimension_1(); + + const int k = std::is_same< Layout, Kokkos::LayoutRight >::value + ? ii / ( b.dimension_0() * b.dimension_1() ) + : ii % b.dimension_2(); + + if ( a( i + offset_0, j, k + offset_2 ) != b( i, j, k ) ) { + Kokkos::abort( "Error: check_subview_correctness 3D-3D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)" ); } - }; - - template<class Layout, class Space> - struct FillView_4D { - Kokkos::View<int****,Layout,Space> a; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& ii) const { - const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ? - ii % a.dimension_0(): ii / (a.dimension_1()*a.dimension_2()*a.dimension_3()); - const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ? - (ii / a.dimension_0()) % a.dimension_1() : (ii / (a.dimension_2()*a.dimension_3()) % a.dimension_1()); - const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ? - (ii / (a.dimension_0() * a.dimension_1())) % a.dimension_2() : (ii / a.dimension_3()) % a.dimension_2(); - const int l = std::is_same<Layout,Kokkos::LayoutRight>::value ? - ii / (a.dimension_0() * a.dimension_1() * a.dimension_2()) : ii % a.dimension_3(); - a(i,j,k,l) = 1000000 * i + 10000 * j + 100 * k + l; + } +}; + +template< class Layout, class Space, class MemTraits > +struct CheckSubviewCorrectness_3D_4D { + Kokkos::View< const int****, Layout, Space, MemTraits > a; + Kokkos::View< const int***, Layout, Space, MemTraits > b; + int offset_0, offset_2, index; + + KOKKOS_INLINE_FUNCTION + void operator()( const int & ii ) const { + const int i = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ii % b.dimension_0() + : ii / ( b.dimension_1() * b.dimension_2() ); + + const int j = std::is_same< Layout, Kokkos::LayoutLeft >::value + ? ( ii / b.dimension_0() ) % b.dimension_1() + : ( ii / b.dimension_2() ) % b.dimension_1(); + + const int k = std::is_same< Layout, Kokkos::LayoutRight >::value + ? ii / ( b.dimension_0() * b.dimension_1() ) + : ii % b.dimension_2(); + + int i0, i1, i2, i3; + + if ( std::is_same< Layout, Kokkos::LayoutLeft >::value ) { + i0 = i + offset_0; + i1 = j; + i2 = k + offset_2; + i3 = index; } - }; - - template<class Layout, class Space, class MemTraits> - struct CheckSubviewCorrectness_3D_3D { - Kokkos::View<const int***,Layout,Space,MemTraits> a; - Kokkos::View<const int***,Layout,Space,MemTraits> b; - int offset_0,offset_2; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& ii) const { - const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ? - ii % b.dimension_0(): ii / (b.dimension_1()*b.dimension_2()); - const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ? - (ii / b.dimension_0()) % b.dimension_1() : (ii / b.dimension_2()) % b.dimension_1(); - const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ? - ii / (b.dimension_0() * b.dimension_1()) : ii % b.dimension_2(); - if( a(i+offset_0,j,k+offset_2) != b(i,j,k)) - Kokkos::abort("Error: check_subview_correctness 3D-3D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)"); + else { + i0 = index; + i1 = i + offset_0; + i2 = j; + i3 = k + offset_2; } - }; - - template<class Layout, class Space, class MemTraits> - struct CheckSubviewCorrectness_3D_4D { - Kokkos::View<const int****,Layout,Space,MemTraits> a; - Kokkos::View<const int***,Layout,Space,MemTraits> b; - int offset_0,offset_2,index; - - KOKKOS_INLINE_FUNCTION - void operator() (const int& ii) const { - const int i = std::is_same<Layout,Kokkos::LayoutLeft>::value ? - ii % b.dimension_0(): ii / (b.dimension_1()*b.dimension_2()); - const int j = std::is_same<Layout,Kokkos::LayoutLeft>::value ? - (ii / b.dimension_0()) % b.dimension_1() : (ii / b.dimension_2()) % b.dimension_1(); - const int k = std::is_same<Layout,Kokkos::LayoutRight>::value ? - ii / (b.dimension_0() * b.dimension_1()) : ii % b.dimension_2(); - - int i0,i1,i2,i3; - if(std::is_same<Layout,Kokkos::LayoutLeft>::value) { - i0 = i + offset_0; - i1 = j; - i2 = k + offset_2; - i3 = index; - } else { - i0 = index; - i1 = i + offset_0; - i2 = j; - i3 = k + offset_2; - } - if( a(i0,i1,i2,i3) != b(i,j,k)) - Kokkos::abort("Error: check_subview_correctness 3D-4D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)"); + + if ( a( i0, i1, i2, i3 ) != b( i, j, k ) ) { + Kokkos::abort( "Error: check_subview_correctness 3D-4D (LayoutLeft -> LayoutLeft or LayoutRight -> LayoutRight)" ); } - }; -} + } +}; -template<class Space, class MemTraits = void> +} // namespace Impl + +template< class Space, class MemTraits = void > void test_layoutleft_to_layoutleft() { Impl::test_subview_legal_args_left(); { - Kokkos::View<int***,Kokkos::LayoutLeft,Space> a("A",100,4,3); - Kokkos::View<int***,Kokkos::LayoutLeft,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::ALL); + Kokkos::View< int***, Kokkos::LayoutLeft, Space > a( "A", 100, 4, 3 ); + Kokkos::View< int***, Kokkos::LayoutLeft, Space > b( a, Kokkos::pair< int, int >( 16, 32 ), Kokkos::ALL, Kokkos::ALL ); - Impl::FillView_3D<Kokkos::LayoutLeft,Space> fill; + Impl::FillView_3D< Kokkos::LayoutLeft, Space > fill; fill.a = a; - Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)), fill); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, a.extent( 0 ) * a.extent( 1 ) * a.extent( 2 ) ), fill ); - Impl::CheckSubviewCorrectness_3D_3D<Kokkos::LayoutLeft,Space,MemTraits> check; + Impl::CheckSubviewCorrectness_3D_3D< Kokkos::LayoutLeft, Space, MemTraits > check; check.a = a; check.b = b; check.offset_0 = 16; check.offset_2 = 0; - Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, b.extent( 0 ) * b.extent( 1 ) * b.extent( 2 ) ), check ); } + { - Kokkos::View<int***,Kokkos::LayoutLeft,Space> a("A",100,4,5); - Kokkos::View<int***,Kokkos::LayoutLeft,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::pair<int,int>(1,3)); + Kokkos::View< int***, Kokkos::LayoutLeft, Space > a( "A", 100, 4, 5 ); + Kokkos::View< int***, Kokkos::LayoutLeft, Space > b( a, Kokkos::pair< int, int >( 16, 32 ), Kokkos::ALL, Kokkos::pair< int, int >( 1, 3 ) ); - Impl::FillView_3D<Kokkos::LayoutLeft,Space> fill; + Impl::FillView_3D<Kokkos::LayoutLeft, Space> fill; fill.a = a; - Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)), fill); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, a.extent( 0 ) * a.extent( 1 ) * a.extent( 2 ) ), fill ); - Impl::CheckSubviewCorrectness_3D_3D<Kokkos::LayoutLeft,Space,MemTraits> check; + Impl::CheckSubviewCorrectness_3D_3D< Kokkos::LayoutLeft, Space, MemTraits > check; check.a = a; check.b = b; check.offset_0 = 16; check.offset_2 = 1; - Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, b.extent( 0 ) * b.extent( 1 ) * b.extent( 2 ) ), check ); } + { - Kokkos::View<int****,Kokkos::LayoutLeft,Space> a("A",100,4,5,3); - Kokkos::View<int***,Kokkos::LayoutLeft,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::pair<int,int>(1,3),1); + Kokkos::View< int****, Kokkos::LayoutLeft, Space > a( "A", 100, 4, 5, 3 ); + Kokkos::View< int***, Kokkos::LayoutLeft, Space > b( a, Kokkos::pair< int, int >( 16, 32 ), Kokkos::ALL, Kokkos::pair< int, int >( 1, 3 ), 1 ); - Impl::FillView_4D<Kokkos::LayoutLeft,Space> fill; + Impl::FillView_4D< Kokkos::LayoutLeft, Space > fill; fill.a = a; - Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)*a.extent(3)), fill); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, a.extent( 0 ) * a.extent( 1 ) * a.extent( 2 ) * a.extent( 3 ) ), fill ); - Impl::CheckSubviewCorrectness_3D_4D<Kokkos::LayoutLeft,Space,MemTraits> check; + Impl::CheckSubviewCorrectness_3D_4D< Kokkos::LayoutLeft, Space, MemTraits > check; check.a = a; check.b = b; check.offset_0 = 16; check.offset_2 = 1; check.index = 1; - Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, b.extent( 0 ) * b.extent( 1 ) * b.extent( 2 ) ), check ); } } -template<class Space, class MemTraits = void> +template< class Space, class MemTraits = void > void test_layoutright_to_layoutright() { Impl::test_subview_legal_args_right(); { - Kokkos::View<int***,Kokkos::LayoutRight,Space> a("A",100,4,3); - Kokkos::View<int***,Kokkos::LayoutRight,Space> b(a,Kokkos::pair<int,int>(16,32),Kokkos::ALL,Kokkos::ALL); + Kokkos::View< int***, Kokkos::LayoutRight, Space > a( "A", 100, 4, 3 ); + Kokkos::View< int***, Kokkos::LayoutRight, Space > b( a, Kokkos::pair< int, int >( 16, 32 ), Kokkos::ALL, Kokkos::ALL ); - Impl::FillView_3D<Kokkos::LayoutRight,Space> fill; + Impl::FillView_3D<Kokkos::LayoutRight, Space> fill; fill.a = a; - Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)), fill); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, a.extent( 0 ) * a.extent( 1 ) * a.extent( 2 ) ), fill ); - Impl::CheckSubviewCorrectness_3D_3D<Kokkos::LayoutRight,Space,MemTraits> check; + Impl::CheckSubviewCorrectness_3D_3D< Kokkos::LayoutRight, Space, MemTraits > check; check.a = a; check.b = b; check.offset_0 = 16; check.offset_2 = 0; - Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, b.extent( 0 ) * b.extent( 1 ) * b.extent( 2 ) ), check ); } - { - Kokkos::View<int****,Kokkos::LayoutRight,Space> a("A",3,4,5,100); - Kokkos::View<int***,Kokkos::LayoutRight,Space> b(a,1,Kokkos::pair<int,int>(1,3),Kokkos::ALL,Kokkos::ALL); + { + Kokkos::View< int****, Kokkos::LayoutRight, Space > a( "A", 3, 4, 5, 100 ); + Kokkos::View< int***, Kokkos::LayoutRight, Space > b( a, 1, Kokkos::pair< int, int >( 1, 3 ), Kokkos::ALL, Kokkos::ALL ); - Impl::FillView_4D<Kokkos::LayoutRight,Space> fill; + Impl::FillView_4D< Kokkos::LayoutRight, Space > fill; fill.a = a; - Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,a.extent(0)*a.extent(1)*a.extent(2)*a.extent(3)), fill); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, a.extent( 0 ) * a.extent( 1 ) * a.extent( 2 ) * a.extent( 3 ) ), fill ); - Impl::CheckSubviewCorrectness_3D_4D<Kokkos::LayoutRight,Space,MemTraits> check; + Impl::CheckSubviewCorrectness_3D_4D< Kokkos::LayoutRight, Space, MemTraits > check; check.a = a; check.b = b; check.offset_0 = 1; check.offset_2 = 0; check.index = 1; - Kokkos::parallel_for(Kokkos::RangePolicy<typename Space::execution_space>(0,b.extent(0)*b.extent(1)*b.extent(2)), check); + Kokkos::parallel_for( Kokkos::RangePolicy< typename Space::execution_space >( 0, b.extent( 0 ) * b.extent( 1 ) * b.extent( 2 ) ), check ); } } - -} -//---------------------------------------------------------------------------- - +} // namespace TestViewSubview diff --git a/lib/kokkos/core/unit_test/UnitTestMain.cpp b/lib/kokkos/core/unit_test/UnitTestMain.cpp index f952ab3db51028aff0a0ebfe313b2639e353ab87..4f52fc956707147761dd60354d9cade69b37bb9a 100644 --- a/lib/kokkos/core/unit_test/UnitTestMain.cpp +++ b/lib/kokkos/core/unit_test/UnitTestMain.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,15 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ #include <gtest/gtest.h> -int main(int argc, char *argv[]) { - ::testing::InitGoogleTest(&argc,argv); +int main( int argc, char *argv[] ) { + ::testing::InitGoogleTest( &argc, argv ); return RUN_ALL_TESTS(); } - diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda.hpp b/lib/kokkos/core/unit_test/cuda/TestCuda.hpp index 36b9b0688ba239ec2f6bf2b847184e95b07f84a3..768b0392048184a4e26c320f16329c07bb8caba5 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda.hpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda.hpp @@ -40,31 +40,25 @@ // ************************************************************************ //@HEADER */ + #ifndef KOKKOS_TEST_CUDA_HPP #define KOKKOS_TEST_CUDA_HPP + #include <gtest/gtest.h> #include <Kokkos_Macros.hpp> - #include <Kokkos_Core.hpp> #include <TestTile.hpp> - -//---------------------------------------------------------------------------- - #include <TestSharedAlloc.hpp> #include <TestViewMapping.hpp> - - #include <TestViewAPI.hpp> #include <TestViewOfClass.hpp> #include <TestViewSubview.hpp> #include <TestViewSpaceAssign.hpp> #include <TestAtomic.hpp> #include <TestAtomicOperations.hpp> - #include <TestAtomicViews.hpp> - #include <TestRange.hpp> #include <TestTeam.hpp> #include <TestReduce.hpp> @@ -73,20 +67,16 @@ #include <TestCompilerMacros.hpp> #include <TestTaskScheduler.hpp> #include <TestMemoryPool.hpp> - - #include <TestCXX11.hpp> #include <TestCXX11Deduction.hpp> #include <TestTeamVector.hpp> #include <TestTemplateMetaFunctions.hpp> - #include <TestPolicyConstruction.hpp> - #include <TestMDRange.hpp> namespace Test { -// For Some Reason I can only have the definition of SetUp and TearDown in one cpp file ... +// For some reason I can only have the definition of SetUp and TearDown in one cpp file ... class cuda : public ::testing::Test { protected: static void SetUpTestCase(); @@ -95,17 +85,19 @@ protected: #ifdef TEST_CUDA_INSTANTIATE_SETUP_TEARDOWN void cuda::SetUpTestCase() - { - Kokkos::Cuda::print_configuration( std::cout ); - Kokkos::HostSpace::execution_space::initialize(); - Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); - } +{ + Kokkos::print_configuration( std::cout ); + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice( 0 ) ); +} void cuda::TearDownTestCase() - { - Kokkos::Cuda::finalize(); - Kokkos::HostSpace::execution_space::finalize(); - } -#endif +{ + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); } #endif + +} // namespace Test + +#endif diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp index ff379dc805ddcbadcd4e6b135d03beda683d8d5b..7cf19b26d1b3ebe6a73f2614aab51dda9d9bd88c 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Atomics.cpp @@ -40,164 +40,164 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda , atomics ) +TEST_F( cuda, atomics ) { - const int loop_count = 1e3 ; + const int loop_count = 1e3; - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Cuda>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Cuda>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Cuda>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Cuda>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Cuda>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Cuda>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Cuda>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Cuda>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Cuda>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Cuda>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Cuda>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Cuda>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Cuda>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Cuda>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Cuda>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Cuda>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Cuda>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Cuda>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Cuda >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Cuda >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Cuda >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Cuda>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Cuda>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Cuda>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Cuda >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Cuda >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Cuda >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Cuda>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Cuda>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Cuda>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Cuda >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Cuda >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Cuda >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Cuda>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Cuda >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Cuda >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Cuda >( 100, 3 ) ) ); } -TEST_F( cuda , atomic_operations ) +TEST_F( cuda, atomic_operations ) { - const int start = 1; //Avoid zero for division + const int start = 1; // Avoid zero for division. const int end = 11; - for (int i = start; i < end; ++i) + + for ( int i = start; i < end; ++i ) { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Cuda>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Cuda>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Cuda>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Cuda>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Cuda>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Cuda>(start, end-i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Cuda>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Cuda>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Cuda>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Cuda>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Cuda >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Cuda >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Cuda >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Cuda >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Cuda >( start, end -i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Cuda >( start, end - i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Cuda >( start, end - i, 4 ) ) ); } } -TEST_F( cuda , atomic_views_integral ) +TEST_F( cuda, atomic_views_integral ) { const long length = 1000000; + { - //Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Cuda>(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Cuda>(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Cuda>(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Cuda>(length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Cuda>(length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Cuda>(length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Cuda>(length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Cuda>(length, 8 ) ) ); + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Cuda >( length, 8 ) ) ); } } -TEST_F( cuda , atomic_views_nonintegral ) +TEST_F( cuda, atomic_views_nonintegral ) { const long length = 1000000; - { - //Non-Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Cuda>(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Cuda>(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Cuda>(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Cuda>(length, 4 ) ) ); + { + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Cuda >( length, 4 ) ) ); } } - -TEST_F( cuda , atomic_view_api ) +TEST_F( cuda, atomic_view_api ) { - TestAtomicViews::TestAtomicViewAPI<int, Kokkos::Cuda>(); + TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Cuda >(); } - -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp index aeaa2a0e81d8114d95fed6566891fecf98d2feb2..e655193a51f513dd390a5545aebe66ebb44f2c11 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp @@ -40,63 +40,68 @@ // ************************************************************************ //@HEADER */ + #define TEST_CUDA_INSTANTIATE_SETUP_TEARDOWN + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda , init ) { +TEST_F( cuda, init ) +{ ; } -TEST_F( cuda , md_range ) { - TestMDRange_2D< Kokkos::Cuda >::test_for2(100,100); - - TestMDRange_3D< Kokkos::Cuda >::test_for3(100,100,100); +TEST_F( cuda , mdrange_for ) { + TestMDRange_2D< Kokkos::Cuda >::test_for2( 100, 100 ); + TestMDRange_3D< Kokkos::Cuda >::test_for3( 100, 100, 100 ); + TestMDRange_4D< Kokkos::Cuda >::test_for4( 100, 10, 100, 10 ); + TestMDRange_5D< Kokkos::Cuda >::test_for5( 100, 10, 10, 10, 5 ); + TestMDRange_6D< Kokkos::Cuda >::test_for6( 100, 10, 5, 2, 10, 5 ); } -TEST_F( cuda, policy_construction) { +TEST_F( cuda, policy_construction ) +{ TestRangePolicyConstruction< Kokkos::Cuda >(); TestTeamPolicyConstruction< Kokkos::Cuda >(); } -TEST_F( cuda , range_tag ) +TEST_F( cuda, range_tag ) { - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(0); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(0); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0); - - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(2); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(2); - - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3); - - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); - - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); - TestRange< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_scan( 0 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 0 ); + + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_scan( 2 ); + + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 3 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 3 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 3 ); + + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_scan( 1000 ); + + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1001 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1001 ); + TestRange< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 1001 ); } - //---------------------------------------------------------------------------- -TEST_F( cuda , compiler_macros ) +TEST_F( cuda, compiler_macros ) { ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Cuda >() ) ); } //---------------------------------------------------------------------------- -TEST_F( cuda , memory_pool ) +TEST_F( cuda, memory_pool ) { bool val = TestMemoryPool::test_mempool< Kokkos::Cuda >( 128, 128000000 ); ASSERT_TRUE( val ); @@ -110,24 +115,24 @@ TEST_F( cuda , memory_pool ) #if defined( KOKKOS_ENABLE_TASKDAG ) -TEST_F( cuda , task_fib ) +TEST_F( cuda, task_fib ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestFib< Kokkos::Cuda >::run(i, (i+1)*(i+1)*10000 ); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Cuda >::run( i, ( i + 1 ) * ( i + 1 ) * 10000 ); } } -TEST_F( cuda , task_depend ) +TEST_F( cuda, task_depend ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::Cuda >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Cuda >::run( i ); } } -TEST_F( cuda , task_team ) +TEST_F( cuda, task_team ) { - TestTaskScheduler::TestTaskTeam< Kokkos::Cuda >::run(1000); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::Cuda >::run(1000); //put back after testing + TestTaskScheduler::TestTaskTeam< Kokkos::Cuda >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Cuda >::run( 1000 ); // Put back after testing. } #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ @@ -135,55 +140,55 @@ TEST_F( cuda , task_team ) //---------------------------------------------------------------------------- #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) -TEST_F( cuda , cxx11 ) +TEST_F( cuda, cxx11 ) { - if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Cuda >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >(4) ) ); + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Cuda >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Cuda >( 4 ) ) ); } } #endif TEST_F( cuda, tile_layout ) { - TestTile::test< Kokkos::Cuda , 1 , 1 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 1 , 1 >( 2 , 3 ); - TestTile::test< Kokkos::Cuda , 1 , 1 >( 9 , 10 ); - - TestTile::test< Kokkos::Cuda , 2 , 2 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 2 , 2 >( 2 , 3 ); - TestTile::test< Kokkos::Cuda , 2 , 2 >( 4 , 4 ); - TestTile::test< Kokkos::Cuda , 2 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::Cuda , 2 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Cuda , 4 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::Cuda , 4 , 4 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 4 , 4 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Cuda , 4 , 4 >( 9 , 11 ); - - TestTile::test< Kokkos::Cuda , 8 , 8 >( 1 , 1 ); - TestTile::test< Kokkos::Cuda , 8 , 8 >( 4 , 4 ); - TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 9 ); - TestTile::test< Kokkos::Cuda , 8 , 8 >( 9 , 11 ); + TestTile::test< Kokkos::Cuda, 1, 1 >( 1, 1 ); + TestTile::test< Kokkos::Cuda, 1, 1 >( 2, 3 ); + TestTile::test< Kokkos::Cuda, 1, 1 >( 9, 10 ); + + TestTile::test< Kokkos::Cuda, 2, 2 >( 1, 1 ); + TestTile::test< Kokkos::Cuda, 2, 2 >( 2, 3 ); + TestTile::test< Kokkos::Cuda, 2, 2 >( 4, 4 ); + TestTile::test< Kokkos::Cuda, 2, 2 >( 9, 9 ); + + TestTile::test< Kokkos::Cuda, 2, 4 >( 9, 9 ); + TestTile::test< Kokkos::Cuda, 4, 2 >( 9, 9 ); + + TestTile::test< Kokkos::Cuda, 4, 4 >( 1, 1 ); + TestTile::test< Kokkos::Cuda, 4, 4 >( 4, 4 ); + TestTile::test< Kokkos::Cuda, 4, 4 >( 9, 9 ); + TestTile::test< Kokkos::Cuda, 4, 4 >( 9, 11 ); + + TestTile::test< Kokkos::Cuda, 8, 8 >( 1, 1 ); + TestTile::test< Kokkos::Cuda, 8, 8 >( 4, 4 ); + TestTile::test< Kokkos::Cuda, 8, 8 >( 9, 9 ); + TestTile::test< Kokkos::Cuda, 8, 8 >( 9, 11 ); } -#if defined (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -#if defined (KOKKOS_COMPILER_CLANG) -TEST_F( cuda , dispatch ) +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +#if defined( KOKKOS_COMPILER_CLANG ) +TEST_F( cuda, dispatch ) { - const int repeat = 100 ; - for ( int i = 0 ; i < repeat ; ++i ) { - for ( int j = 0 ; j < repeat ; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda >(0,j) - , KOKKOS_LAMBDA( int ) {} ); - }} + const int repeat = 100; + for ( int i = 0; i < repeat; ++i ) { + for ( int j = 0; j < repeat; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda >( 0, j ) + , KOKKOS_LAMBDA( int ) {} ); + } + } } #endif #endif -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp index b9ab9fe72d494a672cefe07f770ea38663e2ffec..01eed4e023447acb953c27ce2e8aa2ab18d155a4 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_a.cpp @@ -40,17 +40,17 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda , reducers ) +TEST_F( cuda, reducers ) { - TestReducers<int, Kokkos::Cuda>::execute_integer(); - TestReducers<size_t, Kokkos::Cuda>::execute_integer(); - TestReducers<double, Kokkos::Cuda>::execute_float(); - TestReducers<Kokkos::complex<double>, Kokkos::Cuda>::execute_basic(); + TestReducers< int, Kokkos::Cuda >::execute_integer(); + TestReducers< size_t, Kokkos::Cuda >::execute_integer(); + TestReducers< double, Kokkos::Cuda >::execute_float(); + TestReducers< Kokkos::complex<double>, Kokkos::Cuda >::execute_basic(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp index c588d752dd21ef2135d1e4fa52c37f5dba0c37a9..7f4e0973e7a512a5e855ba30c9e65e5a539c123d 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp @@ -40,38 +40,44 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, long_reduce) { - TestReduce< long , Kokkos::Cuda >( 0 ); - TestReduce< long , Kokkos::Cuda >( 1000000 ); +TEST_F( cuda, long_reduce ) +{ + TestReduce< long, Kokkos::Cuda >( 0 ); + TestReduce< long, Kokkos::Cuda >( 1000000 ); } -TEST_F( cuda, double_reduce) { - TestReduce< double , Kokkos::Cuda >( 0 ); - TestReduce< double , Kokkos::Cuda >( 1000000 ); +TEST_F( cuda, double_reduce ) +{ + TestReduce< double, Kokkos::Cuda >( 0 ); + TestReduce< double, Kokkos::Cuda >( 1000000 ); } -TEST_F( cuda, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::Cuda >( 0 ); - TestReduceDynamic< long , Kokkos::Cuda >( 1000000 ); +TEST_F( cuda, long_reduce_dynamic ) +{ + TestReduceDynamic< long, Kokkos::Cuda >( 0 ); + TestReduceDynamic< long, Kokkos::Cuda >( 1000000 ); } -TEST_F( cuda, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::Cuda >( 0 ); - TestReduceDynamic< double , Kokkos::Cuda >( 1000000 ); +TEST_F( cuda, double_reduce_dynamic ) +{ + TestReduceDynamic< double, Kokkos::Cuda >( 0 ); + TestReduceDynamic< double, Kokkos::Cuda >( 1000000 ); } -TEST_F( cuda, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::Cuda >( 0 ); - TestReduceDynamicView< long , Kokkos::Cuda >( 1000000 ); +TEST_F( cuda, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, Kokkos::Cuda >( 0 ); + TestReduceDynamicView< long, Kokkos::Cuda >( 1000000 ); } -TEST_F( cuda , scan ) +TEST_F( cuda, scan ) { - TestScan< Kokkos::Cuda >::test_range( 1 , 1000 ); + TestScan< Kokkos::Cuda >::test_range( 1, 1000 ); TestScan< Kokkos::Cuda >( 0 ); TestScan< Kokkos::Cuda >( 100000 ); TestScan< Kokkos::Cuda >( 10000000 ); @@ -79,10 +85,11 @@ TEST_F( cuda , scan ) } #if 0 -TEST_F( cuda , scan_small ) +TEST_F( cuda, scan_small ) { - typedef TestScan< Kokkos::Cuda , Kokkos::Impl::CudaExecUseScanSmall > TestScanFunctor ; - for ( int i = 0 ; i < 1000 ; ++i ) { + typedef TestScan< Kokkos::Cuda, Kokkos::Impl::CudaExecUseScanSmall > TestScanFunctor; + + for ( int i = 0; i < 1000; ++i ) { TestScanFunctor( 10 ); TestScanFunctor( 10000 ); } @@ -93,38 +100,39 @@ TEST_F( cuda , scan_small ) } #endif -TEST_F( cuda , team_scan ) +TEST_F( cuda, team_scan ) { - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); } -TEST_F( cuda , team_long_reduce) { - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +TEST_F( cuda, team_long_reduce ) +{ + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); } -TEST_F( cuda , team_double_reduce) { - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double , Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +TEST_F( cuda, team_double_reduce ) +{ + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); } -TEST_F( cuda , reduction_deduction ) +TEST_F( cuda, reduction_deduction ) { TestCXX11::test_reduction_deduction< Kokkos::Cuda >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp index f3cbc3b8897625f07f7c4fc810662b68cfe907e9..5bed7640daa114879f789e67807946e0dc2343f4 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Spaces.cpp @@ -40,6 +40,7 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { @@ -47,353 +48,338 @@ namespace Test { __global__ void test_abort() { - Kokkos::abort("test_abort"); + Kokkos::abort( "test_abort" ); } __global__ void test_cuda_spaces_int_value( int * ptr ) { - if ( *ptr == 42 ) { *ptr = 2 * 42 ; } + if ( *ptr == 42 ) { *ptr = 2 * 42; } } -TEST_F( cuda , space_access ) +TEST_F( cuda, space_access ) { - //-------------------------------------- - static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::HostSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::HostSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::CudaSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaSpace >::accessible , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::CudaSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::CudaUVMSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::HostSpace, Kokkos::CudaUVMSpace >::accessible, "" ); //-------------------------------------- static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::CudaSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::CudaUVMSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::HostSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace , Kokkos::HostSpace >::accessible , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaSpace, Kokkos::HostSpace >::accessible, "" ); //-------------------------------------- static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::CudaUVMSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::CudaSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::CudaSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::HostSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::HostSpace >::accessible , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::HostSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace >::accessible, "" ); //-------------------------------------- static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaHostPinnedSpace >::assignable , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::CudaHostPinnedSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace >::assignable, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace >::accessible , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace >::assignable , "" ); + ! Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace >::assignable, "" ); static_assert( - Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace >::accessible , "" ); + Kokkos::Impl::MemorySpaceAccess< Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace >::accessible, "" ); //-------------------------------------- static_assert( - ! Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::HostSpace >::accessible , "" ); + ! Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda, Kokkos::HostSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda, Kokkos::CudaSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaUVMSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda, Kokkos::CudaUVMSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda , Kokkos::CudaHostPinnedSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< Kokkos::Cuda, Kokkos::CudaHostPinnedSpace >::accessible, "" ); static_assert( - ! Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaSpace >::accessible , "" ); + ! Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, Kokkos::CudaSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaUVMSpace >::accessible , "" ); + Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, Kokkos::CudaUVMSpace >::accessible, "" ); static_assert( - Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >::accessible , "" ); - + Kokkos::Impl::SpaceAccessibility< Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace >::accessible, "" ); static_assert( std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaSpace >::Space - , Kokkos::HostSpace >::value , "" ); + , Kokkos::HostSpace >::value, "" ); static_assert( std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaUVMSpace >::Space , Kokkos::Device< Kokkos::HostSpace::execution_space - , Kokkos::CudaUVMSpace > >::value , "" ); + , Kokkos::CudaUVMSpace > >::value, "" ); static_assert( std::is_same< Kokkos::Impl::HostMirror< Kokkos::CudaHostPinnedSpace >::Space - , Kokkos::CudaHostPinnedSpace >::value , "" ); + , Kokkos::CudaHostPinnedSpace >::value, "" ); static_assert( std::is_same< Kokkos::Device< Kokkos::HostSpace::execution_space , Kokkos::CudaUVMSpace > , Kokkos::Device< Kokkos::HostSpace::execution_space - , Kokkos::CudaUVMSpace > >::value , "" ); + , Kokkos::CudaUVMSpace > >::value, "" ); static_assert( Kokkos::Impl::SpaceAccessibility < Kokkos::Impl::HostMirror< Kokkos::Cuda >::Space , Kokkos::HostSpace - >::accessible , "" ); + >::accessible, "" ); static_assert( Kokkos::Impl::SpaceAccessibility < Kokkos::Impl::HostMirror< Kokkos::CudaSpace >::Space , Kokkos::HostSpace - >::accessible , "" ); + >::accessible, "" ); static_assert( Kokkos::Impl::SpaceAccessibility < Kokkos::Impl::HostMirror< Kokkos::CudaUVMSpace >::Space , Kokkos::HostSpace - >::accessible , "" ); + >::accessible, "" ); static_assert( Kokkos::Impl::SpaceAccessibility < Kokkos::Impl::HostMirror< Kokkos::CudaHostPinnedSpace >::Space , Kokkos::HostSpace - >::accessible , "" ); + >::accessible, "" ); } TEST_F( cuda, uvm ) { if ( Kokkos::CudaUVMSpace::available() ) { + int * uvm_ptr = (int*) Kokkos::kokkos_malloc< Kokkos::CudaUVMSpace >( "uvm_ptr", sizeof( int ) ); - int * uvm_ptr = (int*) Kokkos::kokkos_malloc< Kokkos::CudaUVMSpace >("uvm_ptr",sizeof(int)); - - *uvm_ptr = 42 ; + *uvm_ptr = 42; Kokkos::Cuda::fence(); - test_cuda_spaces_int_value<<<1,1>>>(uvm_ptr); + test_cuda_spaces_int_value<<< 1, 1 >>>( uvm_ptr ); Kokkos::Cuda::fence(); - EXPECT_EQ( *uvm_ptr, int(2*42) ); - - Kokkos::kokkos_free< Kokkos::CudaUVMSpace >(uvm_ptr ); + EXPECT_EQ( *uvm_ptr, int( 2 * 42 ) ); + Kokkos::kokkos_free< Kokkos::CudaUVMSpace >( uvm_ptr ); } } TEST_F( cuda, uvm_num_allocs ) { - // The max number of uvm allocations allowed is 65536 + // The max number of UVM allocations allowed is 65536. #define MAX_NUM_ALLOCS 65536 if ( Kokkos::CudaUVMSpace::available() ) { - struct TestMaxUVMAllocs { - using view_type = Kokkos::View< double* , Kokkos::CudaUVMSpace >; - using view_of_view_type = Kokkos::View< view_type[ MAX_NUM_ALLOCS ] + using view_type = Kokkos::View< double*, Kokkos::CudaUVMSpace >; + using view_of_view_type = Kokkos::View< view_type[ MAX_NUM_ALLOCS ] , Kokkos::CudaUVMSpace >; - TestMaxUVMAllocs() - : view_allocs_test("view_allocs_test") + TestMaxUVMAllocs() : view_allocs_test( "view_allocs_test" ) { + for ( auto i = 0; i < MAX_NUM_ALLOCS; ++i ) { - for ( auto i = 0; i < MAX_NUM_ALLOCS ; ++i ) { - - // Kokkos will throw a runtime exception if an attempt is made to - // allocate more than the maximum number of uvm allocations + // Kokkos will throw a runtime exception if an attempt is made to + // allocate more than the maximum number of uvm allocations. // In this test, the max num of allocs occurs when i = MAX_NUM_ALLOCS - 1 // since the 'outer' view counts as one UVM allocation, leaving - // 65535 possible UVM allocations, that is 'i in [0 , 65535)' + // 65535 possible UVM allocations, that is 'i in [0, 65535)'. - // The test will catch the exception thrown in this case and continue + // The test will catch the exception thrown in this case and continue. - if ( i == ( MAX_NUM_ALLOCS - 1) ) { - EXPECT_ANY_THROW( { view_allocs_test(i) = view_type("inner_view",1); } ) ; + if ( i == ( MAX_NUM_ALLOCS - 1 ) ) { + EXPECT_ANY_THROW( { view_allocs_test( i ) = view_type( "inner_view", 1 ); } ); } else { - if(i<MAX_NUM_ALLOCS - 1000) { - EXPECT_NO_THROW( { view_allocs_test(i) = view_type("inner_view",1); } ) ; - } else { // This might or might not throw depending on compilation options. + if ( i < MAX_NUM_ALLOCS - 1000 ) { + EXPECT_NO_THROW( { view_allocs_test( i ) = view_type( "inner_view", 1 ); } ); + } else { // This might or might not throw depending on compilation options. try { - view_allocs_test(i) = view_type("inner_view",1); + view_allocs_test( i ) = view_type( "inner_view", 1 ); } - catch (...) {} + catch ( ... ) {} } } - } //end allocation for loop + } // End allocation for loop. - for ( auto i = 0; i < MAX_NUM_ALLOCS -1; ++i ) { + for ( auto i = 0; i < MAX_NUM_ALLOCS - 1; ++i ) { - view_allocs_test(i) = view_type(); + view_allocs_test( i ) = view_type(); - } //end deallocation for loop + } // End deallocation for loop. - view_allocs_test = view_of_view_type(); // deallocate the view of views + view_allocs_test = view_of_view_type(); // Deallocate the view of views. } - // Member - view_of_view_type view_allocs_test ; - } ; - - // trigger the test via the TestMaxUVMAllocs constructor - TestMaxUVMAllocs() ; + // Member. + view_of_view_type view_allocs_test; + }; + // Trigger the test via the TestMaxUVMAllocs constructor. + TestMaxUVMAllocs(); } - #undef MAX_NUM_ALLOCS + + #undef MAX_NUM_ALLOCS } -template< class MemSpace , class ExecSpace > +template< class MemSpace, class ExecSpace > struct TestViewCudaAccessible { - enum { N = 1000 }; - using V = Kokkos::View<double*,MemSpace> ; + using V = Kokkos::View< double*, MemSpace >; - V m_base ; + V m_base; struct TagInit {}; struct TagTest {}; KOKKOS_INLINE_FUNCTION - void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; } + void operator()( const TagInit &, const int i ) const { m_base[i] = i + 1; } KOKKOS_INLINE_FUNCTION - void operator()( const TagTest & , const int i , long & error_count ) const - { if ( m_base[i] != i + 1 ) ++error_count ; } + void operator()( const TagTest &, const int i, long & error_count ) const + { if ( m_base[i] != i + 1 ) ++error_count; } TestViewCudaAccessible() - : m_base("base",N) + : m_base( "base", N ) {} static void run() - { - TestViewCudaAccessible self ; - Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space , TagInit >(0,N) , self ); - MemSpace::execution_space::fence(); - // Next access is a different execution space, must complete prior kernel. - long error_count = -1 ; - Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace , TagTest >(0,N) , self , error_count ); - EXPECT_EQ( error_count , 0 ); - } + { + TestViewCudaAccessible self; + Kokkos::parallel_for( Kokkos::RangePolicy< typename MemSpace::execution_space, TagInit >( 0, N ), self ); + MemSpace::execution_space::fence(); + + // Next access is a different execution space, must complete prior kernel. + long error_count = -1; + Kokkos::parallel_reduce( Kokkos::RangePolicy< ExecSpace, TagTest >( 0, N ), self, error_count ); + EXPECT_EQ( error_count, 0 ); + } }; -TEST_F( cuda , impl_view_accessible ) +TEST_F( cuda, impl_view_accessible ) { - TestViewCudaAccessible< Kokkos::CudaSpace , Kokkos::Cuda >::run(); + TestViewCudaAccessible< Kokkos::CudaSpace, Kokkos::Cuda >::run(); - TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::Cuda >::run(); - TestViewCudaAccessible< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >::run(); + TestViewCudaAccessible< Kokkos::CudaUVMSpace, Kokkos::Cuda >::run(); + TestViewCudaAccessible< Kokkos::CudaUVMSpace, Kokkos::HostSpace::execution_space >::run(); - TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::Cuda >::run(); - TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >::run(); + TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda >::run(); + TestViewCudaAccessible< Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace::execution_space >::run(); } template< class MemSpace > struct TestViewCudaTexture { - enum { N = 1000 }; - using V = Kokkos::View<double*,MemSpace> ; - using T = Kokkos::View<const double*, MemSpace, Kokkos::MemoryRandomAccess > ; + using V = Kokkos::View< double*, MemSpace >; + using T = Kokkos::View< const double*, MemSpace, Kokkos::MemoryRandomAccess >; - V m_base ; - T m_tex ; + V m_base; + T m_tex; struct TagInit {}; struct TagTest {}; KOKKOS_INLINE_FUNCTION - void operator()( const TagInit & , const int i ) const { m_base[i] = i + 1 ; } + void operator()( const TagInit &, const int i ) const { m_base[i] = i + 1; } KOKKOS_INLINE_FUNCTION - void operator()( const TagTest & , const int i , long & error_count ) const - { if ( m_tex[i] != i + 1 ) ++error_count ; } + void operator()( const TagTest &, const int i, long & error_count ) const + { if ( m_tex[i] != i + 1 ) ++error_count; } TestViewCudaTexture() - : m_base("base",N) + : m_base( "base", N ) , m_tex( m_base ) {} static void run() - { - EXPECT_TRUE( ( std::is_same< typename V::reference_type - , double & - >::value ) ); - - EXPECT_TRUE( ( std::is_same< typename T::reference_type - , const double - >::value ) ); - - EXPECT_TRUE( V::reference_type_is_lvalue_reference ); // An ordinary view - EXPECT_FALSE( T::reference_type_is_lvalue_reference ); // Texture fetch returns by value - - TestViewCudaTexture self ; - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda , TagInit >(0,N) , self ); - long error_count = -1 ; - Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::Cuda , TagTest >(0,N) , self , error_count ); - EXPECT_EQ( error_count , 0 ); - } -}; + { + EXPECT_TRUE( ( std::is_same< typename V::reference_type, double & >::value ) ); + EXPECT_TRUE( ( std::is_same< typename T::reference_type, const double >::value ) ); + + EXPECT_TRUE( V::reference_type_is_lvalue_reference ); // An ordinary view. + EXPECT_FALSE( T::reference_type_is_lvalue_reference ); // Texture fetch returns by value. + TestViewCudaTexture self; + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Cuda, TagInit >( 0, N ), self ); -TEST_F( cuda , impl_view_texture ) + long error_count = -1; + Kokkos::parallel_reduce( Kokkos::RangePolicy< Kokkos::Cuda, TagTest >( 0, N ), self, error_count ); + EXPECT_EQ( error_count, 0 ); + } +}; + +TEST_F( cuda, impl_view_texture ) { TestViewCudaTexture< Kokkos::CudaSpace >::run(); TestViewCudaTexture< Kokkos::CudaUVMSpace >::run(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp index fd8a647ef3f03b9d1109a464a51cd06e90de703d..0aea35db517bdba78967eb8b443cb771aaf2215f 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_a.cpp @@ -40,53 +40,64 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Cuda >(); +TEST_F( cuda, view_subview_auto_1d_left ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Cuda >(); } -TEST_F( cuda, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Cuda >(); +TEST_F( cuda, view_subview_auto_1d_right ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Cuda >(); } -TEST_F( cuda, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Cuda >(); +TEST_F( cuda, view_subview_auto_1d_stride ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Cuda >(); } -TEST_F( cuda, view_subview_assign_strided ) { +TEST_F( cuda, view_subview_assign_strided ) +{ TestViewSubview::test_1d_strided_assignment< Kokkos::Cuda >(); } -TEST_F( cuda, view_subview_left_0 ) { +TEST_F( cuda, view_subview_left_0 ) +{ TestViewSubview::test_left_0< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_left_1 ) { +TEST_F( cuda, view_subview_left_1 ) +{ TestViewSubview::test_left_1< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_left_2 ) { +TEST_F( cuda, view_subview_left_2 ) +{ TestViewSubview::test_left_2< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_left_3 ) { +TEST_F( cuda, view_subview_left_3 ) +{ TestViewSubview::test_left_3< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_right_0 ) { +TEST_F( cuda, view_subview_right_0 ) +{ TestViewSubview::test_right_0< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_right_1 ) { +TEST_F( cuda, view_subview_right_1 ) +{ TestViewSubview::test_right_1< Kokkos::CudaUVMSpace >(); } -TEST_F( cuda, view_subview_right_3 ) { +TEST_F( cuda, view_subview_right_3 ) +{ TestViewSubview::test_right_3< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp index 053fcfc2095c26540ff75e545bb4f920e0a96912..f31f4cbe62bc06bd5fee04abc6a71913c6fbddd9 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_b.cpp @@ -40,21 +40,23 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_layoutleft_to_layoutleft) { +TEST_F( cuda, view_subview_layoutleft_to_layoutleft ) +{ TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -TEST_F( cuda, view_subview_layoutright_to_layoutright) { +TEST_F( cuda, view_subview_layoutright_to_layoutright ) +{ TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp index 4c5f2ef72fdd45b2b9033d54c3c83e70c3c089c1..0213a196e8612b4d9d3821de6d657803e9e22b6c 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c01.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_1d_assign ) { +TEST_F( cuda, view_subview_1d_assign ) +{ TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp index aee6f1730d6fb33e15877a043fe0ef8beaed11d9..181e1bab2ccb531722b08e627a8ee724fcd393d9 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c02.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( cuda, view_subview_1d_assign_atomic ) +{ + TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp index 2ef48c686e1d3a202aaf5f017d9ac88cc486085d..708cc1f5ba98fc7eb0f5603524c2b533eb090fee 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c03.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( cuda, view_subview_1d_assign_randomaccess ) +{ + TestViewSubview::test_1d_assign< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp index aec123ac235ef631172b3dc7c26151d2da7e38da..a3db996f8d87d63dd1a21ea74eb83a615a0e7162 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c04.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_2d_from_3d ) { +TEST_F( cuda, view_subview_2d_from_3d ) +{ TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp index e8ad2319963b2750e01d518309e84c7423a387d6..2f7cffa75da133039d0624d2d812053774013846 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c05.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( cuda, view_subview_2d_from_3d_atomic ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp index e86b4513fd8b8fdeb85c7bce130b3ae274d5e214..949c6f3e0b9d3055e7da32ace79a810310861d99 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c06.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( cuda, view_subview_2d_from_3d_randomaccess ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp index ad9dcc0fd1faccf2c8f8ff5e254b82a33f9d998b..3e68277a9e93b447a90a9b3496e0b4d0ccc407e2 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c07.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_left ) { +TEST_F( cuda, view_subview_3d_from_5d_left ) +{ TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp index f97d97e59c205fda791ac1d231b1429e1f8d4ec2..0cd91b7795f52f457f4403559cb353180bcdbe44 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c08.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( cuda, view_subview_3d_from_5d_left_atomic ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp index 2a07f28f830a125d865eb89a4a456cb5d0aa2b62..cd1c13f7d073f1a445c35ded9eaa9fd121d35fee 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c09.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( cuda, view_subview_3d_from_5d_left_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp index 3c51d9420184c91d8ddc1b15e9fb50659c1651d6..22d27535431f7b6414c52305a46547654c40ccbb 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c10.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_right ) { +TEST_F( cuda, view_subview_3d_from_5d_right ) +{ TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp index 835caa7b879891ed4cd0d24bac61bdaf6a686efb..5dc5f87b4e2b7faa2a52163f8b8af732b53000a9 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c11.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( cuda, view_subview_3d_from_5d_right_atomic ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp index 53bd5eee20205d56ca4356df4f2bb1118e0ff93d..318d8edbbb82eb6dd097b959e07861cf74a77099 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c12.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( cuda, view_subview_3d_from_5d_right_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::CudaUVMSpace, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp index e4348319f695da2819e24143754777746bdc35d6..a2158f06c73db10193e1275c5d49c99738b0c06b 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_SubView_c_all.cpp @@ -1,12 +1,12 @@ -#include<cuda/TestCuda_SubView_c01.cpp> -#include<cuda/TestCuda_SubView_c02.cpp> -#include<cuda/TestCuda_SubView_c03.cpp> -#include<cuda/TestCuda_SubView_c04.cpp> -#include<cuda/TestCuda_SubView_c05.cpp> -#include<cuda/TestCuda_SubView_c06.cpp> -#include<cuda/TestCuda_SubView_c07.cpp> -#include<cuda/TestCuda_SubView_c08.cpp> -#include<cuda/TestCuda_SubView_c09.cpp> -#include<cuda/TestCuda_SubView_c10.cpp> -#include<cuda/TestCuda_SubView_c11.cpp> -#include<cuda/TestCuda_SubView_c12.cpp> +#include <cuda/TestCuda_SubView_c01.cpp> +#include <cuda/TestCuda_SubView_c02.cpp> +#include <cuda/TestCuda_SubView_c03.cpp> +#include <cuda/TestCuda_SubView_c04.cpp> +#include <cuda/TestCuda_SubView_c05.cpp> +#include <cuda/TestCuda_SubView_c06.cpp> +#include <cuda/TestCuda_SubView_c07.cpp> +#include <cuda/TestCuda_SubView_c08.cpp> +#include <cuda/TestCuda_SubView_c09.cpp> +#include <cuda/TestCuda_SubView_c10.cpp> +#include <cuda/TestCuda_SubView_c11.cpp> +#include <cuda/TestCuda_SubView_c12.cpp> diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp index 13834d09ad03854d1ac1ae17c7e8a159efa55ca7..8d9b9328ba9691fe90947554aeb9e9825322d55a 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Team.cpp @@ -40,81 +40,87 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda , team_tag ) +TEST_F( cuda, team_tag ) { - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(0); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(2); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); - TestTeamPolicy< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } -TEST_F( cuda , team_shared_request) { - TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( cuda, team_shared_request ) +{ + TestSharedTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); } -//THis Tests request to much L0 scratch -//TEST_F( cuda, team_scratch_request) { -// TestScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); -// TestScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); +// This tests request to much L0 scratch. +//TEST_F( cuda, team_scratch_request ) +//{ +// TestScratchTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); +// TestScratchTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); //} -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -TEST_F( cuda , team_lambda_shared_request) { +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +TEST_F( cuda, team_lambda_shared_request ) +{ TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); TestLambdaSharedTeam< Kokkos::CudaSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); TestLambdaSharedTeam< Kokkos::CudaUVMSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); - TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); + TestLambdaSharedTeam< Kokkos::CudaHostPinnedSpace, Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); } #endif -TEST_F( cuda, shmem_size) { +TEST_F( cuda, shmem_size ) +{ TestShmemSize< Kokkos::Cuda >(); } -TEST_F( cuda, multi_level_scratch) { - TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Static> >(); - TestMultiLevelScratchTeam< Kokkos::Cuda , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( cuda, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< Kokkos::Cuda, Kokkos::Schedule<Kokkos::Dynamic> >(); } -TEST_F( cuda , team_vector ) +#if !defined(KOKKOS_CUDA_CLANG_WORKAROUND) && !defined(KOKKOS_ARCH_PASCAL) +TEST_F( cuda, team_vector ) { - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >(10) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Cuda >( 10 ) ) ); } +#endif TEST_F( cuda, triple_nested_parallelism ) { - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 32 ); - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 32 , 16 ); - TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048 , 16 , 16 ); + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, Kokkos::Cuda >( 8192, 2048, 16, 16 ); } - -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp index c01ca1c1463c6573c8d9e51c0ca31ed43c19941e..be0c4c5715eeba492112e9a83dbc3cba09796d98 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_a.cpp @@ -40,20 +40,21 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda , impl_view_mapping_a ) { +TEST_F( cuda, impl_view_mapping_a ) +{ test_view_mapping< Kokkos::CudaSpace >(); test_view_mapping_operator< Kokkos::CudaSpace >(); } -TEST_F( cuda , view_of_class ) +TEST_F( cuda, view_of_class ) { TestViewMappingClassValue< Kokkos::CudaSpace >::run(); TestViewMappingClassValue< Kokkos::CudaUVMSpace >::run(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp index 8e821ada000678c762b22db574dd1e0d816bbd54..b4d8e5d953f8e753eac945560fac763589bd2025 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_b.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda , impl_view_mapping_d ) { +TEST_F( cuda, impl_view_mapping_d ) +{ test_view_mapping< Kokkos::CudaHostPinnedSpace >(); test_view_mapping_operator< Kokkos::CudaHostPinnedSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp index cf29a68e96586dc5d194bd0b28338259784dceb0..e4e6894c5346b6283371903bc2e1bdea18c5f399 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_c.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda , impl_view_mapping_c ) { +TEST_F( cuda, impl_view_mapping_c ) +{ test_view_mapping< Kokkos::CudaUVMSpace >(); test_view_mapping_operator< Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp index db14b5158f6efa01a6397df98041827a830158d4..82a3dd83e88c3b047525771a5dd9deca32d6d891 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_d.cpp @@ -40,73 +40,77 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda , view_nested_view ) +TEST_F( cuda, view_nested_view ) { ::Test::view_nested_view< Kokkos::Cuda >(); } - - -TEST_F( cuda , view_remap ) +TEST_F( cuda, view_remap ) { - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::CudaUVMSpace > output_type ; + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::CudaUVMSpace > output_type; - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::CudaUVMSpace > input_type ; + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace > input_type; - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::CudaUVMSpace > diff_type ; + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::CudaUVMSpace > diff_type; - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); Kokkos::fence(); - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} + + int value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } + Kokkos::fence(); - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + Kokkos::fence(); - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } + Kokkos::fence(); } -//---------------------------------------------------------------------------- - -TEST_F( cuda , view_aggregate ) +TEST_F( cuda, view_aggregate ) { TestViewAggregate< Kokkos::Cuda >(); } -TEST_F( cuda , template_meta_functions ) +TEST_F( cuda, template_meta_functions ) { - TestTemplateMetaFunctions<int, Kokkos::Cuda >(); + TestTemplateMetaFunctions< int, Kokkos::Cuda >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp index 07d425647330228815a7103e6f7596a8a2f2a460..27450fa6ff827dbbe6970331eca68589a423c406 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_e.cpp @@ -40,17 +40,20 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda , impl_shared_alloc ) { - test_shared_alloc< Kokkos::CudaSpace , Kokkos::HostSpace::execution_space >(); - test_shared_alloc< Kokkos::CudaUVMSpace , Kokkos::HostSpace::execution_space >(); - test_shared_alloc< Kokkos::CudaHostPinnedSpace , Kokkos::HostSpace::execution_space >(); +TEST_F( cuda, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::CudaSpace, Kokkos::HostSpace::execution_space >(); + test_shared_alloc< Kokkos::CudaUVMSpace, Kokkos::HostSpace::execution_space >(); + test_shared_alloc< Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace::execution_space >(); } -TEST_F( cuda , impl_view_mapping_b ) { +TEST_F( cuda, impl_view_mapping_b ) +{ test_view_mapping_subview< Kokkos::CudaSpace >(); test_view_mapping_subview< Kokkos::CudaUVMSpace >(); test_view_mapping_subview< Kokkos::CudaHostPinnedSpace >(); @@ -59,5 +62,4 @@ TEST_F( cuda , impl_view_mapping_b ) { TestViewMappingAtomic< Kokkos::CudaHostPinnedSpace >::run(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp index 34721f02dc73f418ba7c348fe65c3a59d534dc7c..56524111aec939d0ff2b80196b5352a44f6919dd 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_f.cpp @@ -40,16 +40,17 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_api_a) { - typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess > > view_texture_managed ; - typedef Kokkos::View< const int * , Kokkos::Cuda , Kokkos::MemoryTraits< Kokkos::RandomAccess | Kokkos::Unmanaged > > view_texture_unmanaged ; +TEST_F( cuda, view_api_a ) +{ + typedef Kokkos::View< const int *, Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::RandomAccess> > view_texture_managed; + typedef Kokkos::View< const int *, Kokkos::Cuda, Kokkos::MemoryTraits<Kokkos::RandomAccess | Kokkos::Unmanaged> > view_texture_unmanaged; - TestViewAPI< double , Kokkos::Cuda >(); + TestViewAPI< double, Kokkos::Cuda >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp index abbcf3bf8bfa6d89ff5c5a5891d8cd16018becf0..d5fd24456d782409450fcf949d6c6280504bb785 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_g.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_api_b) { - TestViewAPI< double , Kokkos::CudaUVMSpace >(); +TEST_F( cuda, view_api_b ) +{ + TestViewAPI< double, Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp index 9899642035ada183fe7b7b5c4a60610e3c271739..649023e4afcaf921511edab82cc10035776246ae 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_h.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda, view_api_c) { - TestViewAPI< double , Kokkos::CudaHostPinnedSpace >(); +TEST_F( cuda, view_api_c ) +{ + TestViewAPI< double, Kokkos::CudaHostPinnedSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp index 9bc09ba893affeec45923883b62751534a7e86dc..b46b1e5f8173bd724c0333de776366704c23f152 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_ViewAPI_s.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include <cuda/TestCuda.hpp> namespace Test { -TEST_F( cuda , view_space_assign ) { - view_space_assign< Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >(); - view_space_assign< Kokkos::CudaSpace , Kokkos::CudaUVMSpace >(); +TEST_F( cuda, view_space_assign ) +{ + view_space_assign< Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace >(); + view_space_assign< Kokkos::CudaSpace, Kokkos::CudaUVMSpace >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp index 28ae5b41b039a385db047de37c5a0d1865a1ee1b..ed9bb68cd60a004c214ec473ae35653f61c6a814 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp @@ -40,11 +40,14 @@ // ************************************************************************ //@HEADER */ + #ifndef KOKKOS_TEST_OPENMP_HPP #define KOKKOS_TEST_OPENMP_HPP + #include <gtest/gtest.h> #include <Kokkos_Macros.hpp> + #ifdef KOKKOS_LAMBDA #undef KOKKOS_LAMBDA #endif @@ -53,13 +56,8 @@ #include <Kokkos_Core.hpp> #include <TestTile.hpp> - -//---------------------------------------------------------------------------- - #include <TestSharedAlloc.hpp> #include <TestViewMapping.hpp> - - #include <TestViewAPI.hpp> #include <TestViewOfClass.hpp> #include <TestViewSubview.hpp> @@ -74,15 +72,11 @@ #include <TestCompilerMacros.hpp> #include <TestTaskScheduler.hpp> #include <TestMemoryPool.hpp> - - #include <TestCXX11.hpp> #include <TestCXX11Deduction.hpp> #include <TestTeamVector.hpp> #include <TestTemplateMetaFunctions.hpp> - #include <TestPolicyConstruction.hpp> - #include <TestMDRange.hpp> namespace Test { @@ -95,23 +89,24 @@ protected: const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - const unsigned threads_count = std::max( 1u , numa_count ) * - std::max( 2u , ( cores_per_numa * threads_per_core ) / 2 ); + const unsigned threads_count = std::max( 1u, numa_count ) * + std::max( 2u, ( cores_per_numa * threads_per_core ) / 2 ); Kokkos::OpenMP::initialize( threads_count ); - Kokkos::OpenMP::print_configuration( std::cout , true ); - srand(10231); + Kokkos::print_configuration( std::cout, true ); + srand( 10231 ); } static void TearDownTestCase() { Kokkos::OpenMP::finalize(); - omp_set_num_threads(1); + omp_set_num_threads( 1 ); - ASSERT_EQ( 1 , omp_get_max_threads() ); + ASSERT_EQ( 1, omp_get_max_threads() ); } }; -} +} // namespace Test + #endif diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp index ed6c9f8d1696c9c653c82f52b14a8a73520b7735..2585c01973b3aeba5fd00f27068c361b15552800 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Atomics.cpp @@ -40,165 +40,162 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp , atomics ) +TEST_F( openmp, atomics ) { - const int loop_count = 1e4 ; + const int loop_count = 1e4; - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::OpenMP>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::OpenMP>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::OpenMP>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::OpenMP>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::OpenMP>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::OpenMP>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::OpenMP>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::OpenMP>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::OpenMP>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::OpenMP>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::OpenMP>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::OpenMP>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::OpenMP>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::OpenMP>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::OpenMP>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::OpenMP>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::OpenMP>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::OpenMP>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::OpenMP >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::OpenMP >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::OpenMP >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::OpenMP>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::OpenMP>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::OpenMP>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::OpenMP >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::OpenMP >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::OpenMP >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::OpenMP>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::OpenMP>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::OpenMP>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::OpenMP >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::OpenMP >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::OpenMP >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::OpenMP>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::OpenMP>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::OpenMP>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::OpenMP >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::OpenMP >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::OpenMP >( 100, 3 ) ) ); } -TEST_F( openmp , atomic_operations ) +TEST_F( openmp, atomic_operations ) { - const int start = 1; //Avoid zero for division + const int start = 1; // Avoid zero for division. const int end = 11; - for (int i = start; i < end; ++i) + + for ( int i = start; i < end; ++i ) { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::OpenMP>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::OpenMP>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::OpenMP>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::OpenMP>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::OpenMP>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::OpenMP>(start, end-i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::OpenMP>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::OpenMP>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::OpenMP>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::OpenMP>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::OpenMP >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::OpenMP >( start, end - i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::OpenMP >( start, end - i, 4 ) ) ); } - } - -TEST_F( openmp , atomic_views_integral ) +TEST_F( openmp, atomic_views_integral ) { const long length = 1000000; { - //Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::OpenMP>(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::OpenMP>(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::OpenMP>(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::OpenMP>(length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::OpenMP>(length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::OpenMP>(length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::OpenMP>(length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::OpenMP>(length, 8 ) ) ); - + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::OpenMP >( length, 8 ) ) ); } } -TEST_F( openmp , atomic_views_nonintegral ) +TEST_F( openmp, atomic_views_nonintegral ) { const long length = 1000000; { - //Non-Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::OpenMP>(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::OpenMP>(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::OpenMP>(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::OpenMP>(length, 4 ) ) ); - + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::OpenMP >( length, 4 ) ) ); } } -TEST_F( openmp , atomic_view_api ) +TEST_F( openmp, atomic_view_api ) { - TestAtomicViews::TestAtomicViewAPI<int, Kokkos::OpenMP>(); + TestAtomicViews::TestAtomicViewAPI<int, Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp index 126d730f0ff96272ae1e21eb5f8f81523fda8f02..b4f32dac706222e2c1f79f43469eadb4f5e3e6c6 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp @@ -40,65 +40,90 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp , init ) { +TEST_F( openmp, init ) +{ ; } -TEST_F( openmp , md_range ) { - TestMDRange_2D< Kokkos::OpenMP >::test_for2(100,100); +TEST_F( openmp, mdrange_for ) +{ + Kokkos::Timer timer; + TestMDRange_2D< Kokkos::OpenMP >::test_for2( 10000, 1000 ); + std::cout << " 2D: " << timer.seconds() << std::endl; + + timer.reset(); + TestMDRange_3D< Kokkos::OpenMP >::test_for3( 100, 100, 1000 ); + std::cout << " 3D: " << timer.seconds() << std::endl; - TestMDRange_3D< Kokkos::OpenMP >::test_for3(100,100,100); + timer.reset(); + TestMDRange_4D< Kokkos::OpenMP >::test_for4( 100, 10, 100, 100 ); + std::cout << " 4D: " << timer.seconds() << std::endl; + + timer.reset(); + TestMDRange_5D< Kokkos::OpenMP >::test_for5( 100, 10, 10, 100, 50 ); + std::cout << " 5D: " << timer.seconds() << std::endl; + + timer.reset(); + TestMDRange_6D< Kokkos::OpenMP >::test_for6( 10, 10, 10, 10, 50, 50 ); + std::cout << " 6D: " << timer.seconds() << std::endl; } -TEST_F( openmp, policy_construction) { +TEST_F( openmp, mdrange_reduce ) +{ + TestMDRange_2D< Kokkos::OpenMP >::test_reduce2( 100, 100 ); + TestMDRange_3D< Kokkos::OpenMP >::test_reduce3( 100, 10, 100 ); +} + +TEST_F( openmp, policy_construction ) +{ TestRangePolicyConstruction< Kokkos::OpenMP >(); TestTeamPolicyConstruction< Kokkos::OpenMP >(); } -TEST_F( openmp , range_tag ) +TEST_F( openmp, range_tag ) { - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(0); - - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(2); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(2); - - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(3); - - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); - - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); - TestRange< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_scan( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 0 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 0 ); + + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_scan( 2 ); + + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 3 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 3 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 3 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 3 ); + + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_scan( 1000 ); + + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1001 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1001 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 1001 ); + TestRange< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 1000 ); } - //---------------------------------------------------------------------------- -TEST_F( openmp , compiler_macros ) +TEST_F( openmp, compiler_macros ) { ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::OpenMP >() ) ); } //---------------------------------------------------------------------------- -TEST_F( openmp , memory_pool ) +TEST_F( openmp, memory_pool ) { bool val = TestMemoryPool::test_mempool< Kokkos::OpenMP >( 128, 128000000 ); ASSERT_TRUE( val ); @@ -112,24 +137,24 @@ TEST_F( openmp , memory_pool ) #if defined( KOKKOS_ENABLE_TASKDAG ) -TEST_F( openmp , task_fib ) +TEST_F( openmp, task_fib ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestFib< Kokkos::OpenMP >::run(i, (i+1)*(i+1)*10000 ); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestFib< Kokkos::OpenMP >::run( i, ( i + 1 ) * ( i + 1 ) * 10000 ); } } -TEST_F( openmp , task_depend ) +TEST_F( openmp, task_depend ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::OpenMP >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::OpenMP >::run( i ); } } -TEST_F( openmp , task_team ) +TEST_F( openmp, task_team ) { - TestTaskScheduler::TestTaskTeam< Kokkos::OpenMP >::run(1000); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::OpenMP >::run(1000); //put back after testing + TestTaskScheduler::TestTaskTeam< Kokkos::OpenMP >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::OpenMP >::run( 1000 ); // Put back after testing. } #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ @@ -137,53 +162,51 @@ TEST_F( openmp , task_team ) //---------------------------------------------------------------------------- #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) -TEST_F( openmp , cxx11 ) +TEST_F( openmp, cxx11 ) { - if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::OpenMP >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >(4) ) ); + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::OpenMP >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::OpenMP >( 4 ) ) ); } } #endif TEST_F( openmp, tile_layout ) { - TestTile::test< Kokkos::OpenMP , 1 , 1 >( 1 , 1 ); - TestTile::test< Kokkos::OpenMP , 1 , 1 >( 2 , 3 ); - TestTile::test< Kokkos::OpenMP , 1 , 1 >( 9 , 10 ); - - TestTile::test< Kokkos::OpenMP , 2 , 2 >( 1 , 1 ); - TestTile::test< Kokkos::OpenMP , 2 , 2 >( 2 , 3 ); - TestTile::test< Kokkos::OpenMP , 2 , 2 >( 4 , 4 ); - TestTile::test< Kokkos::OpenMP , 2 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::OpenMP , 2 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::OpenMP , 4 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::OpenMP , 4 , 4 >( 1 , 1 ); - TestTile::test< Kokkos::OpenMP , 4 , 4 >( 4 , 4 ); - TestTile::test< Kokkos::OpenMP , 4 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::OpenMP , 4 , 4 >( 9 , 11 ); - - TestTile::test< Kokkos::OpenMP , 8 , 8 >( 1 , 1 ); - TestTile::test< Kokkos::OpenMP , 8 , 8 >( 4 , 4 ); - TestTile::test< Kokkos::OpenMP , 8 , 8 >( 9 , 9 ); - TestTile::test< Kokkos::OpenMP , 8 , 8 >( 9 , 11 ); + TestTile::test< Kokkos::OpenMP, 1, 1 >( 1, 1 ); + TestTile::test< Kokkos::OpenMP, 1, 1 >( 2, 3 ); + TestTile::test< Kokkos::OpenMP, 1, 1 >( 9, 10 ); + + TestTile::test< Kokkos::OpenMP, 2, 2 >( 1, 1 ); + TestTile::test< Kokkos::OpenMP, 2, 2 >( 2, 3 ); + TestTile::test< Kokkos::OpenMP, 2, 2 >( 4, 4 ); + TestTile::test< Kokkos::OpenMP, 2, 2 >( 9, 9 ); + + TestTile::test< Kokkos::OpenMP, 2, 4 >( 9, 9 ); + TestTile::test< Kokkos::OpenMP, 4, 2 >( 9, 9 ); + + TestTile::test< Kokkos::OpenMP, 4, 4 >( 1, 1 ); + TestTile::test< Kokkos::OpenMP, 4, 4 >( 4, 4 ); + TestTile::test< Kokkos::OpenMP, 4, 4 >( 9, 9 ); + TestTile::test< Kokkos::OpenMP, 4, 4 >( 9, 11 ); + + TestTile::test< Kokkos::OpenMP, 8, 8 >( 1, 1 ); + TestTile::test< Kokkos::OpenMP, 8, 8 >( 4, 4 ); + TestTile::test< Kokkos::OpenMP, 8, 8 >( 9, 9 ); + TestTile::test< Kokkos::OpenMP, 8, 8 >( 9, 11 ); } - -TEST_F( openmp , dispatch ) +TEST_F( openmp, dispatch ) { - const int repeat = 100 ; - for ( int i = 0 ; i < repeat ; ++i ) { - for ( int j = 0 ; j < repeat ; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::OpenMP >(0,j) - , KOKKOS_LAMBDA( int ) {} ); - }} + const int repeat = 100; + for ( int i = 0; i < repeat; ++i ) { + for ( int j = 0; j < repeat; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::OpenMP >( 0, j ) + , KOKKOS_LAMBDA( int ) {} ); + } + } } - -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp index d41e1493eea6306d68087d1a8562ab963e1ec039..22c29308a6289361bfa0b62d47e579e4bb1e29c2 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Reductions.cpp @@ -40,46 +40,52 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, long_reduce) { - TestReduce< long , Kokkos::OpenMP >( 0 ); - TestReduce< long , Kokkos::OpenMP >( 1000000 ); +TEST_F( openmp, long_reduce ) +{ + TestReduce< long, Kokkos::OpenMP >( 0 ); + TestReduce< long, Kokkos::OpenMP >( 1000000 ); } -TEST_F( openmp, double_reduce) { - TestReduce< double , Kokkos::OpenMP >( 0 ); - TestReduce< double , Kokkos::OpenMP >( 1000000 ); +TEST_F( openmp, double_reduce ) +{ + TestReduce< double, Kokkos::OpenMP >( 0 ); + TestReduce< double, Kokkos::OpenMP >( 1000000 ); } -TEST_F( openmp , reducers ) +TEST_F( openmp, reducers ) { - TestReducers<int, Kokkos::OpenMP>::execute_integer(); - TestReducers<size_t, Kokkos::OpenMP>::execute_integer(); - TestReducers<double, Kokkos::OpenMP>::execute_float(); - TestReducers<Kokkos::complex<double>, Kokkos::OpenMP>::execute_basic(); + TestReducers< int, Kokkos::OpenMP >::execute_integer(); + TestReducers< size_t, Kokkos::OpenMP >::execute_integer(); + TestReducers< double, Kokkos::OpenMP >::execute_float(); + TestReducers< Kokkos::complex<double>, Kokkos::OpenMP >::execute_basic(); } -TEST_F( openmp, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::OpenMP >( 0 ); - TestReduceDynamic< long , Kokkos::OpenMP >( 1000000 ); +TEST_F( openmp, long_reduce_dynamic ) +{ + TestReduceDynamic< long, Kokkos::OpenMP >( 0 ); + TestReduceDynamic< long, Kokkos::OpenMP >( 1000000 ); } -TEST_F( openmp, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::OpenMP >( 0 ); - TestReduceDynamic< double , Kokkos::OpenMP >( 1000000 ); +TEST_F( openmp, double_reduce_dynamic ) +{ + TestReduceDynamic< double, Kokkos::OpenMP >( 0 ); + TestReduceDynamic< double, Kokkos::OpenMP >( 1000000 ); } -TEST_F( openmp, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::OpenMP >( 0 ); - TestReduceDynamicView< long , Kokkos::OpenMP >( 1000000 ); +TEST_F( openmp, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, Kokkos::OpenMP >( 0 ); + TestReduceDynamicView< long, Kokkos::OpenMP >( 1000000 ); } -TEST_F( openmp , scan ) +TEST_F( openmp, scan ) { - TestScan< Kokkos::OpenMP >::test_range( 1 , 1000 ); + TestScan< Kokkos::OpenMP >::test_range( 1, 1000 ); TestScan< Kokkos::OpenMP >( 0 ); TestScan< Kokkos::OpenMP >( 100000 ); TestScan< Kokkos::OpenMP >( 10000000 ); @@ -87,10 +93,11 @@ TEST_F( openmp , scan ) } #if 0 -TEST_F( openmp , scan_small ) +TEST_F( openmp, scan_small ) { - typedef TestScan< Kokkos::OpenMP , Kokkos::Impl::OpenMPExecUseScanSmall > TestScanFunctor ; - for ( int i = 0 ; i < 1000 ; ++i ) { + typedef TestScan< Kokkos::OpenMP, Kokkos::Impl::OpenMPExecUseScanSmall > TestScanFunctor; + + for ( int i = 0; i < 1000; ++i ) { TestScanFunctor( 10 ); TestScanFunctor( 10000 ); } @@ -101,38 +108,39 @@ TEST_F( openmp , scan_small ) } #endif -TEST_F( openmp , team_scan ) +TEST_F( openmp, team_scan ) { - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); } -TEST_F( openmp , team_long_reduce) { - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +TEST_F( openmp, team_long_reduce ) +{ + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); } -TEST_F( openmp , team_double_reduce) { - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double , Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +TEST_F( openmp, team_double_reduce ) +{ + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); } -TEST_F( openmp , reduction_deduction ) +TEST_F( openmp, reduction_deduction ) { TestCXX11::test_reduction_deduction< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp index 9854417e42da5a8bdd6986b85fbdd754bab3e57b..fefae073227a7086bb440152b76abf16dc9c00b2 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_a.cpp @@ -40,53 +40,64 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::OpenMP >(); +TEST_F( openmp, view_subview_auto_1d_left ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::OpenMP >(); +TEST_F( openmp, view_subview_auto_1d_right ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::OpenMP >(); +TEST_F( openmp, view_subview_auto_1d_stride ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_assign_strided ) { +TEST_F( openmp, view_subview_assign_strided ) +{ TestViewSubview::test_1d_strided_assignment< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_left_0 ) { +TEST_F( openmp, view_subview_left_0 ) +{ TestViewSubview::test_left_0< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_left_1 ) { +TEST_F( openmp, view_subview_left_1 ) +{ TestViewSubview::test_left_1< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_left_2 ) { +TEST_F( openmp, view_subview_left_2 ) +{ TestViewSubview::test_left_2< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_left_3 ) { +TEST_F( openmp, view_subview_left_3 ) +{ TestViewSubview::test_left_3< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_right_0 ) { +TEST_F( openmp, view_subview_right_0 ) +{ TestViewSubview::test_right_0< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_right_1 ) { +TEST_F( openmp, view_subview_right_1 ) +{ TestViewSubview::test_right_1< Kokkos::OpenMP >(); } -TEST_F( openmp, view_subview_right_3 ) { +TEST_F( openmp, view_subview_right_3 ) +{ TestViewSubview::test_right_3< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp index 2aa1fc5c633ffab0319c37c7a00a9abe48438597..7de7ca91bdc082057bccc1b71ec8f482a16bc0f9 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_b.cpp @@ -40,21 +40,23 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_layoutleft_to_layoutleft) { +TEST_F( openmp, view_subview_layoutleft_to_layoutleft ) +{ TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -TEST_F( openmp, view_subview_layoutright_to_layoutright) { +TEST_F( openmp, view_subview_layoutright_to_layoutright ) +{ TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp index 1a6871cfca8f3136b13011f66576cd7a9d891978..d727ec0ee592c57d357b8cfebfa83a9bcc06eb12 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c01.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_1d_assign ) { +TEST_F( openmp, view_subview_1d_assign ) +{ TestViewSubview::test_1d_assign< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp index b04edbb997d564a2e921bacf7b36959b17e8755f..df43f555d385037dafe3a29b9cec66ef2eb9b781 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c02.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( openmp, view_subview_1d_assign_atomic ) +{ + TestViewSubview::test_1d_assign< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp index 765e235830db2f7e48ad8fe9df271429fef2c2ab..38f241ebf7bdea50af2f8a0b06dd69b16175667c 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c03.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( openmp, view_subview_1d_assign_randomaccess ) +{ + TestViewSubview::test_1d_assign< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp index 9d8b62708a3d4d898ddbc923b733c78c869c2826..11a4ea8ac24bf457f9d4fbe97b5180536d1fac69 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c04.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_2d_from_3d ) { +TEST_F( openmp, view_subview_2d_from_3d ) +{ TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp index 9c19cf0e57dcf7058f4f0aeb4752465c470e9fa9..a91baa34df3f0fc41db37909fdcdbeefc27a3158 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c05.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( openmp, view_subview_2d_from_3d_atomic ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp index c1bdf72351b02958f5e1e857c41f7e5d999ade64..20d4d9bd64462eaa9d90a5d776c7129a7a816312 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c06.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( openmp, view_subview_2d_from_3d_randomaccess ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp index 08a3b5a54a2c66599ebc61384357324a79815507..528df1c0700d7582f427310d8f7610376f9166bb 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c07.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_left ) { +TEST_F( openmp, view_subview_3d_from_5d_left ) +{ TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp index 0864ebbdaa44b1bd00a154fe2f7fcf4b55ae48eb..d9eea8dba91a7c03cdfd8460b2241438ffbbce1d 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c08.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( openmp, view_subview_3d_from_5d_left_atomic ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp index e38dfecbf6e353bcab69f7341d2754ea6ef85cf9..f909dc33c067ca4ff6c3badeddf92c6bb12a2bd6 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c09.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( openmp, view_subview_3d_from_5d_left_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp index b7e4683d23d18bb838c97a1fa198b2d38874de77..59996d5e33b594a23c7e368354208c68707339e9 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c10.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_right ) { +TEST_F( openmp, view_subview_3d_from_5d_right ) +{ TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp index fc3e66fd4853c6104503aaf461eda97183cb44e1..3f9c215d9b10dbbeb3aada555515ab27c1e38adb 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c11.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( openmp, view_subview_3d_from_5d_right_atomic ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp index e21a13ee579e5052241252ffa6b99ba49f9c6b47..d3a73483a0bc11c4d60eb4d6d658c00fde838566 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c12.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( openmp, view_subview_3d_from_5d_right_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::OpenMP, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp index 9da159ab5773a0a7b1a49605cf1a88294a29d09d..399c6e92e4c7cf858ecef02a97e1bf4742ec6eda 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_SubView_c_all.cpp @@ -1,12 +1,12 @@ -#include<openmp/TestOpenMP_SubView_c01.cpp> -#include<openmp/TestOpenMP_SubView_c02.cpp> -#include<openmp/TestOpenMP_SubView_c03.cpp> -#include<openmp/TestOpenMP_SubView_c04.cpp> -#include<openmp/TestOpenMP_SubView_c05.cpp> -#include<openmp/TestOpenMP_SubView_c06.cpp> -#include<openmp/TestOpenMP_SubView_c07.cpp> -#include<openmp/TestOpenMP_SubView_c08.cpp> -#include<openmp/TestOpenMP_SubView_c09.cpp> -#include<openmp/TestOpenMP_SubView_c10.cpp> -#include<openmp/TestOpenMP_SubView_c11.cpp> -#include<openmp/TestOpenMP_SubView_c12.cpp> +#include <openmp/TestOpenMP_SubView_c01.cpp> +#include <openmp/TestOpenMP_SubView_c02.cpp> +#include <openmp/TestOpenMP_SubView_c03.cpp> +#include <openmp/TestOpenMP_SubView_c04.cpp> +#include <openmp/TestOpenMP_SubView_c05.cpp> +#include <openmp/TestOpenMP_SubView_c06.cpp> +#include <openmp/TestOpenMP_SubView_c07.cpp> +#include <openmp/TestOpenMP_SubView_c08.cpp> +#include <openmp/TestOpenMP_SubView_c09.cpp> +#include <openmp/TestOpenMP_SubView_c10.cpp> +#include <openmp/TestOpenMP_SubView_c11.cpp> +#include <openmp/TestOpenMP_SubView_c12.cpp> diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp index 38cf0a0f409c8dbe5d923cae4b88bec619a5a8b0..216789e8bf6ebcd1d2deab1e567317376c611e0b 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Team.cpp @@ -40,67 +40,73 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp , team_tag ) +TEST_F( openmp, team_tag ) { - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(0); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(2); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); - TestTeamPolicy< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } -TEST_F( openmp , team_shared_request) { - TestSharedTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( openmp, team_shared_request ) +{ + TestSharedTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >(); } -TEST_F( openmp, team_scratch_request) { - TestScratchTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >(); - TestScratchTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( openmp, team_scratch_request ) +{ + TestScratchTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >(); } -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -TEST_F( openmp , team_lambda_shared_request) { - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +TEST_F( openmp, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >(); } #endif -TEST_F( openmp, shmem_size) { +TEST_F( openmp, shmem_size ) +{ TestShmemSize< Kokkos::OpenMP >(); } -TEST_F( openmp, multi_level_scratch) { - TestMultiLevelScratchTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Static> >(); - TestMultiLevelScratchTeam< Kokkos::OpenMP , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( openmp, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< Kokkos::OpenMP, Kokkos::Schedule<Kokkos::Dynamic> >(); } -TEST_F( openmp , team_vector ) +TEST_F( openmp, team_vector ) { - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >(10) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::OpenMP >( 10 ) ) ); } #ifdef KOKKOS_COMPILER_GNU @@ -112,11 +118,10 @@ TEST_F( openmp , team_vector ) #ifndef SKIP_TEST TEST_F( openmp, triple_nested_parallelism ) { - TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 32 , 32 ); - TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 32 , 16 ); - TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048 , 16 , 16 ); + TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, Kokkos::OpenMP >( 8192, 2048, 16, 16 ); } #endif -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp index 82cbf3ea18ecf7c3c424c73fe3e41ebf4a4e0c26..aead381a11e5b5a88763d9622deac55c3ceaf631 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_a.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp , impl_view_mapping_a ) { +TEST_F( openmp, impl_view_mapping_a ) +{ test_view_mapping< Kokkos::OpenMP >(); test_view_mapping_operator< Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp index b2d4f87fdd417ab2d1036884dcce4b0df5793396..c802fb79caf081b103c6e65bf54d8e20fe3b7193 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewAPI_b.cpp @@ -40,82 +40,85 @@ // ************************************************************************ //@HEADER */ + #include <openmp/TestOpenMP.hpp> namespace Test { -TEST_F( openmp , impl_shared_alloc ) { - test_shared_alloc< Kokkos::HostSpace , Kokkos::OpenMP >(); +TEST_F( openmp, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::HostSpace, Kokkos::OpenMP >(); } -TEST_F( openmp , impl_view_mapping_b ) { +TEST_F( openmp, impl_view_mapping_b ) +{ test_view_mapping_subview< Kokkos::OpenMP >(); TestViewMappingAtomic< Kokkos::OpenMP >::run(); } -TEST_F( openmp, view_api) { - TestViewAPI< double , Kokkos::OpenMP >(); +TEST_F( openmp, view_api ) +{ + TestViewAPI< double, Kokkos::OpenMP >(); } -TEST_F( openmp , view_nested_view ) +TEST_F( openmp, view_nested_view ) { ::Test::view_nested_view< Kokkos::OpenMP >(); } - - -TEST_F( openmp , view_remap ) +TEST_F( openmp, view_remap ) { - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::OpenMP > output_type ; - - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::OpenMP > input_type ; - - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::OpenMP > diff_type ; - - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); - - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} - - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::OpenMP > output_type; + + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::OpenMP > input_type; + + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::OpenMP > diff_type; + + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); + + int value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } + + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } } -//---------------------------------------------------------------------------- - -TEST_F( openmp , view_aggregate ) +TEST_F( openmp, view_aggregate ) { TestViewAggregate< Kokkos::OpenMP >(); } -TEST_F( openmp , template_meta_functions ) +TEST_F( openmp, template_meta_functions ) { - TestTemplateMetaFunctions<int, Kokkos::OpenMP >(); + TestTemplateMetaFunctions< int, Kokkos::OpenMP >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads.hpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads.hpp new file mode 100644 index 0000000000000000000000000000000000000000..907fe23ea5e7c6b11a52c6327787ddee0108f89e --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads.hpp @@ -0,0 +1,109 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_TEST_QTHREADS_HPP +#define KOKKOS_TEST_QTHREADS_HPP + +#include <gtest/gtest.h> + +#include <Kokkos_Macros.hpp> + +#ifdef KOKKOS_LAMBDA +#undef KOKKOS_LAMBDA +#endif +#define KOKKOS_LAMBDA [=] + +#include <Kokkos_Core.hpp> + +#include <TestTile.hpp> +#include <TestSharedAlloc.hpp> +#include <TestViewMapping.hpp> +#include <TestViewAPI.hpp> +#include <TestViewOfClass.hpp> +#include <TestViewSubview.hpp> +#include <TestAtomic.hpp> +#include <TestAtomicOperations.hpp> +#include <TestAtomicViews.hpp> +#include <TestRange.hpp> +#include <TestTeam.hpp> +#include <TestReduce.hpp> +#include <TestScan.hpp> +#include <TestAggregate.hpp> +#include <TestCompilerMacros.hpp> +#include <TestTaskScheduler.hpp> +#include <TestMemoryPool.hpp> +#include <TestCXX11.hpp> +#include <TestCXX11Deduction.hpp> +#include <TestTeamVector.hpp> +#include <TestTemplateMetaFunctions.hpp> +#include <TestPolicyConstruction.hpp> +#include <TestMDRange.hpp> + +namespace Test { + +class qthreads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); + const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + + const unsigned threads_count = std::max( 1u, numa_count ) * + std::max( 2u, ( cores_per_numa * threads_per_core ) / 2 ); + + Kokkos::Qthreads::initialize( threads_count ); + Kokkos::print_configuration( std::cout, true ); + + srand( 10231 ); + } + + static void TearDownTestCase() + { + Kokkos::Qthreads::finalize(); + } +}; + +} // namespace Test + +#endif diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Atomics.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Atomics.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e64c3305db616b09c24c2b47d64c9153e3aeb0df --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Atomics.cpp @@ -0,0 +1,213 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, atomics ) +{ +#if 0 + const int loop_count = 1e4; + + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Qthreads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Qthreads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Qthreads >( loop_count, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Qthreads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Qthreads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Qthreads >( 100, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Qthreads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Qthreads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Qthreads >( 100, 3 ) ) ); + + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Qthreads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Qthreads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Qthreads >( 100, 3 ) ) ); +#endif +} + +TEST_F( qthreads, atomic_operations ) +{ +#if 0 + const int start = 1; // Avoid zero for division. + const int end = 11; + + for ( int i = start; i < end; ++i ) + { + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Qthreads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Qthreads >( start, end - i, 4 ) ) ); + } +#endif +} + +TEST_F( qthreads, atomic_views_integral ) +{ +#if 0 + const long length = 1000000; + + { + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Qthreads >( length, 8 ) ) ); + } +#endif +} + +TEST_F( qthreads, atomic_views_nonintegral ) +{ +#if 0 + const long length = 1000000; + + { + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Qthreads >( length, 4 ) ) ); + } +#endif +} + +TEST_F( qthreads, atomic_view_api ) +{ +#if 0 + TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Other.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Other.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0faec84056997dd0d1236ff8c00f2218b2549cf9 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Other.cpp @@ -0,0 +1,213 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, init ) +{ + ; +} + +TEST_F( qthreads, md_range ) +{ +#if 0 + TestMDRange_2D< Kokkos::Qthreads >::test_for2( 100, 100 ); + TestMDRange_3D< Kokkos::Qthreads >::test_for3( 100, 100, 100 ); +#endif +} + +TEST_F( qthreads, policy_construction ) +{ +#if 0 + TestRangePolicyConstruction< Kokkos::Qthreads >(); + TestTeamPolicyConstruction< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, range_tag ) +{ +#if 0 + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_scan( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 0 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 0 ); + + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_scan( 2 ); + + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 3 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 3 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 3 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 3 ); + + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_scan( 1000 ); + + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1001 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1001 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 1001 ); + TestRange< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 1000 ); +#endif +} + +//---------------------------------------------------------------------------- + +TEST_F( qthreads, compiler_macros ) +{ +#if 0 + ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Qthreads >() ) ); +#endif +} + +//---------------------------------------------------------------------------- + +TEST_F( qthreads, memory_pool ) +{ +#if 0 + bool val = TestMemoryPool::test_mempool< Kokkos::Qthreads >( 128, 128000000 ); + ASSERT_TRUE( val ); + + TestMemoryPool::test_mempool2< Kokkos::Qthreads >( 64, 4, 1000000, 2000000 ); + + TestMemoryPool::test_memory_exhaustion< Kokkos::Qthreads >(); +#endif +} + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_TASKDAG ) + +TEST_F( qthreads, task_fib ) +{ +#if 0 + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Qthreads >::run( i, ( i + 1 ) * ( i + 1 ) * 10000 ); + } +#endif +} + +TEST_F( qthreads, task_depend ) +{ +#if 0 + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Qthreads >::run( i ); + } +#endif +} + +TEST_F( qthreads, task_team ) +{ +#if 0 + TestTaskScheduler::TestTaskTeam< Kokkos::Qthreads >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Qthreads >::run( 1000 ); // Put back after testing. +#endif +} + +#endif // #if defined( KOKKOS_ENABLE_TASKDAG ) + +//---------------------------------------------------------------------------- + +#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_QTHREADS ) + +TEST_F( qthreads, cxx11 ) +{ +#if 0 + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Qthreads >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Qthreads >( 4 ) ) ); + } +#endif +} + +#endif + +TEST_F( qthreads, tile_layout ) +{ +#if 0 + TestTile::test< Kokkos::Qthreads, 1, 1 >( 1, 1 ); + TestTile::test< Kokkos::Qthreads, 1, 1 >( 2, 3 ); + TestTile::test< Kokkos::Qthreads, 1, 1 >( 9, 10 ); + + TestTile::test< Kokkos::Qthreads, 2, 2 >( 1, 1 ); + TestTile::test< Kokkos::Qthreads, 2, 2 >( 2, 3 ); + TestTile::test< Kokkos::Qthreads, 2, 2 >( 4, 4 ); + TestTile::test< Kokkos::Qthreads, 2, 2 >( 9, 9 ); + + TestTile::test< Kokkos::Qthreads, 2, 4 >( 9, 9 ); + TestTile::test< Kokkos::Qthreads, 4, 2 >( 9, 9 ); + + TestTile::test< Kokkos::Qthreads, 4, 4 >( 1, 1 ); + TestTile::test< Kokkos::Qthreads, 4, 4 >( 4, 4 ); + TestTile::test< Kokkos::Qthreads, 4, 4 >( 9, 9 ); + TestTile::test< Kokkos::Qthreads, 4, 4 >( 9, 11 ); + + TestTile::test< Kokkos::Qthreads, 8, 8 >( 1, 1 ); + TestTile::test< Kokkos::Qthreads, 8, 8 >( 4, 4 ); + TestTile::test< Kokkos::Qthreads, 8, 8 >( 9, 9 ); + TestTile::test< Kokkos::Qthreads, 8, 8 >( 9, 11 ); +#endif +} + +TEST_F( qthreads, dispatch ) +{ +#if 0 + const int repeat = 100; + for ( int i = 0; i < repeat; ++i ) { + for ( int j = 0; j < repeat; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Qthreads >( 0, j ) + , KOKKOS_LAMBDA( int ) {} ); + } + } +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Reductions.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Reductions.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a2470ac15c45431e852981a94f792bb2710535d7 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Reductions.cpp @@ -0,0 +1,168 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, long_reduce ) +{ +#if 0 + TestReduce< long, Kokkos::Qthreads >( 0 ); + TestReduce< long, Kokkos::Qthreads >( 1000000 ); +#endif +} + +TEST_F( qthreads, double_reduce ) +{ +#if 0 + TestReduce< double, Kokkos::Qthreads >( 0 ); + TestReduce< double, Kokkos::Qthreads >( 1000000 ); +#endif +} + +TEST_F( qthreads, reducers ) +{ +#if 0 + TestReducers< int, Kokkos::Qthreads >::execute_integer(); + TestReducers< size_t, Kokkos::Qthreads >::execute_integer(); + TestReducers< double, Kokkos::Qthreads >::execute_float(); + TestReducers< Kokkos::complex<double >, Kokkos::Qthreads>::execute_basic(); +#endif +} + +TEST_F( qthreads, long_reduce_dynamic ) +{ +#if 0 + TestReduceDynamic< long, Kokkos::Qthreads >( 0 ); + TestReduceDynamic< long, Kokkos::Qthreads >( 1000000 ); +#endif +} + +TEST_F( qthreads, double_reduce_dynamic ) +{ +#if 0 + TestReduceDynamic< double, Kokkos::Qthreads >( 0 ); + TestReduceDynamic< double, Kokkos::Qthreads >( 1000000 ); +#endif +} + +TEST_F( qthreads, long_reduce_dynamic_view ) +{ +#if 0 + TestReduceDynamicView< long, Kokkos::Qthreads >( 0 ); + TestReduceDynamicView< long, Kokkos::Qthreads >( 1000000 ); +#endif +} + +TEST_F( qthreads, scan ) +{ +#if 0 + TestScan< Kokkos::Qthreads >::test_range( 1, 1000 ); + TestScan< Kokkos::Qthreads >( 0 ); + TestScan< Kokkos::Qthreads >( 100000 ); + TestScan< Kokkos::Qthreads >( 10000000 ); + Kokkos::Qthreads::fence(); +#endif +} + +TEST_F( qthreads, scan_small ) +{ +#if 0 + typedef TestScan< Kokkos::Qthreads, Kokkos::Impl::QthreadsExecUseScanSmall > TestScanFunctor; + + for ( int i = 0; i < 1000; ++i ) { + TestScanFunctor( 10 ); + TestScanFunctor( 10000 ); + } + TestScanFunctor( 1000000 ); + TestScanFunctor( 10000000 ); + + Kokkos::Qthreads::fence(); +#endif +} + +TEST_F( qthreads, team_scan ) +{ +#if 0 + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); +#endif +} + +TEST_F( qthreads, team_long_reduce ) +{ +#if 0 + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +#endif +} + +TEST_F( qthreads, team_double_reduce ) +{ +#if 0 + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +#endif +} + +TEST_F( qthreads, reduction_deduction ) +{ +#if 0 + TestCXX11::test_reduction_deduction< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_a.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ab873359a748e6086533454f7a0842a5e8dee9e6 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_a.cpp @@ -0,0 +1,125 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_auto_1d_left ) +{ +#if 0 + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_auto_1d_right ) +{ +#if 0 + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_auto_1d_stride ) +{ +#if 0 + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_assign_strided ) +{ +#if 0 + TestViewSubview::test_1d_strided_assignment< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_left_0 ) +{ +#if 0 + TestViewSubview::test_left_0< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_left_1 ) +{ +#if 0 + TestViewSubview::test_left_1< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_left_2 ) +{ +#if 0 + TestViewSubview::test_left_2< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_left_3 ) +{ +#if 0 + TestViewSubview::test_left_3< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_right_0 ) +{ +#if 0 + TestViewSubview::test_right_0< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_right_1 ) +{ +#if 0 + TestViewSubview::test_right_1< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_subview_right_3 ) +{ +#if 0 + TestViewSubview::test_right_3< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_b.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..199c5c795557bb4da254c24d320a99240768e014 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_b.cpp @@ -0,0 +1,66 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_layoutleft_to_layoutleft ) +{ +#if 0 + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Qthreads >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +#endif +} + +TEST_F( qthreads, view_subview_layoutright_to_layoutright ) +{ +#if 0 + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Qthreads >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c01.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c01.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f44909f3daffd71b13a12eba33b4e8e142e946ad --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c01.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_1d_assign ) +{ +#if 0 + TestViewSubview::test_1d_assign< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c02.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c02.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7bb936f8dd511034924d779362f34e10833b2668 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c02.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_1d_assign_atomic ) +{ +#if 0 + TestViewSubview::test_1d_assign< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c03.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c03.cpp new file mode 100644 index 0000000000000000000000000000000000000000..27073dfa814683a77a0edc602e23f3c3aadcd0e2 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c03.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_1d_assign_randomaccess ) +{ +#if 0 + TestViewSubview::test_1d_assign< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c04.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c04.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1b3cf488521b6ed84aa7eda62084ba737d485abf --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c04.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_2d_from_3d ) +{ +#if 0 + TestViewSubview::test_2d_subview_3d< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c05.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c05.cpp new file mode 100644 index 0000000000000000000000000000000000000000..34dda63e64da0cb39b1a7d977ff08477aa8bbfec --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c05.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_2d_from_3d_atomic ) +{ +#if 0 + TestViewSubview::test_2d_subview_3d< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c06.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c06.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5a4ee50fb2f6b41ddfc504192a3815d4a1775f5e --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c06.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_2d_from_3d_randomaccess ) +{ +#if 0 + TestViewSubview::test_2d_subview_3d< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c07.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c07.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fe386e34a8083a8bc2084b6957f57124a78d41c3 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c07.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_left ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_left< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c08.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c08.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a3e0ab25291334f291adf3ba743c822eea552380 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c08.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_left_atomic ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_left< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c09.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c09.cpp new file mode 100644 index 0000000000000000000000000000000000000000..df1f570e9dce927b75c11695a11124564e39d567 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c09.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_left_randomaccess ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_left< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c10.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c10.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cc3c80d10d7b3fd544ed7b49fa56b9f2f4e8b5a7 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c10.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_right ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_right< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c11.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c11.cpp new file mode 100644 index 0000000000000000000000000000000000000000..14b331a4585efeb912c0ec7001cf0195657c60de --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c11.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_right_atomic ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_right< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c12.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c12.cpp new file mode 100644 index 0000000000000000000000000000000000000000..571382e66f52d5a6c8294af1d117ebaeb6fe25f5 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c12.cpp @@ -0,0 +1,55 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, view_subview_3d_from_5d_right_randomaccess ) +{ +#if 0 + TestViewSubview::test_3d_subview_5d_right< Kokkos::Qthreads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c_all.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c_all.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ab984c5f30e05958c0c601256ada3c13a70ee68d --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_SubView_c_all.cpp @@ -0,0 +1,12 @@ +#include <qthreads/TestQthreads_SubView_c01.cpp> +#include <qthreads/TestQthreads_SubView_c02.cpp> +#include <qthreads/TestQthreads_SubView_c03.cpp> +#include <qthreads/TestQthreads_SubView_c04.cpp> +#include <qthreads/TestQthreads_SubView_c05.cpp> +#include <qthreads/TestQthreads_SubView_c06.cpp> +#include <qthreads/TestQthreads_SubView_c07.cpp> +#include <qthreads/TestQthreads_SubView_c08.cpp> +#include <qthreads/TestQthreads_SubView_c09.cpp> +#include <qthreads/TestQthreads_SubView_c10.cpp> +#include <qthreads/TestQthreads_SubView_c11.cpp> +#include <qthreads/TestQthreads_SubView_c12.cpp> diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_Team.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Team.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e7b81283fbf27e97427defbf1b0894793cc44ed2 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_Team.cpp @@ -0,0 +1,143 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, team_tag ) +{ +#if 0 + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); + + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); +#endif +} + +TEST_F( qthreads, team_shared_request ) +{ +#if 0 + TestSharedTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >(); +#endif +} + +TEST_F( qthreads, team_scratch_request ) +{ +#if 0 + TestScratchTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >(); +#endif +} + +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +TEST_F( qthreads, team_lambda_shared_request ) +{ +#if 0 + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >(); +#endif +} +#endif + +TEST_F( qthreads, shmem_size ) +{ +#if 0 + TestShmemSize< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, multi_level_scratch ) +{ +#if 0 + TestMultiLevelScratchTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< Kokkos::Qthreads, Kokkos::Schedule<Kokkos::Dynamic> >(); +#endif +} + +TEST_F( qthreads, team_vector ) +{ +#if 0 + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Qthreads >( 10 ) ) ); +#endif +} + +#ifdef KOKKOS_COMPILER_GNU +#if ( KOKKOS_COMPILER_GNU == 472 ) +#define SKIP_TEST +#endif +#endif + +#ifndef SKIP_TEST +TEST_F( qthreads, triple_nested_parallelism ) +{ +#if 0 + TestTripleNestedReduce< double, Kokkos::Qthreads >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, Kokkos::Qthreads >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, Kokkos::Qthreads >( 8192, 2048, 16, 16 ); +#endif +} +#endif + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_a.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cd876a36bfa457f3c5f895d604f38be27fa4e986 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_a.cpp @@ -0,0 +1,56 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, impl_view_mapping_a ) +{ +#if 0 + test_view_mapping< Kokkos::Qthreads >(); + test_view_mapping_operator< Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_b.cpp new file mode 100644 index 0000000000000000000000000000000000000000..adf048b61360b1aa9d49d9ce0f93453d580eb1a4 --- /dev/null +++ b/lib/kokkos/core/unit_test/qthreads/TestQthreads_ViewAPI_b.cpp @@ -0,0 +1,138 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include <qthreads/TestQthreads.hpp> + +namespace Test { + +TEST_F( qthreads, impl_shared_alloc ) +{ +#if 0 + test_shared_alloc< Kokkos::HostSpace, Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, impl_view_mapping_b ) +{ +#if 0 + test_view_mapping_subview< Kokkos::Qthreads >(); + TestViewMappingAtomic< Kokkos::Qthreads >::run(); +#endif +} + +TEST_F( qthreads, view_api ) +{ +#if 0 + TestViewAPI< double, Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_nested_view ) +{ +#if 0 + ::Test::view_nested_view< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, view_remap ) +{ +#if 0 + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::Qthreads > output_type; + + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::Qthreads > input_type; + + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::Qthreads > diff_type; + + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); + + int value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } + + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } +#endif +} + +TEST_F( qthreads, view_aggregate ) +{ +#if 0 + TestViewAggregate< Kokkos::Qthreads >(); +#endif +} + +TEST_F( qthreads, template_meta_functions ) +{ +#if 0 + TestTemplateMetaFunctions< int, Kokkos::Qthreads >(); +#endif +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial.hpp b/lib/kokkos/core/unit_test/serial/TestSerial.hpp index c0ffa6afb1843f7fe61693a778d9389e4c20fccb..03da07e065e371e636f1d2c59ba99a2832dd574c 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial.hpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial.hpp @@ -40,11 +40,14 @@ // ************************************************************************ //@HEADER */ + #ifndef KOKKOS_TEST_SERIAL_HPP #define KOKKOS_TEST_SERIAL_HPP + #include <gtest/gtest.h> #include <Kokkos_Macros.hpp> + #ifdef KOKKOS_LAMBDA #undef KOKKOS_LAMBDA #endif @@ -53,21 +56,14 @@ #include <Kokkos_Core.hpp> #include <TestTile.hpp> - -//---------------------------------------------------------------------------- - #include <TestSharedAlloc.hpp> #include <TestViewMapping.hpp> - - #include <TestViewAPI.hpp> #include <TestViewOfClass.hpp> #include <TestViewSubview.hpp> #include <TestAtomic.hpp> #include <TestAtomicOperations.hpp> - #include <TestAtomicViews.hpp> - #include <TestRange.hpp> #include <TestTeam.hpp> #include <TestReduce.hpp> @@ -76,15 +72,11 @@ #include <TestCompilerMacros.hpp> #include <TestTaskScheduler.hpp> #include <TestMemoryPool.hpp> - - #include <TestCXX11.hpp> #include <TestCXX11Deduction.hpp> #include <TestTeamVector.hpp> #include <TestTemplateMetaFunctions.hpp> - #include <TestPolicyConstruction.hpp> - #include <TestMDRange.hpp> namespace Test { @@ -92,14 +84,16 @@ namespace Test { class serial : public ::testing::Test { protected: static void SetUpTestCase() - { - Kokkos::HostSpace::execution_space::initialize(); - } + { + Kokkos::HostSpace::execution_space::initialize(); + } + static void TearDownTestCase() - { - Kokkos::HostSpace::execution_space::finalize(); - } + { + Kokkos::HostSpace::execution_space::finalize(); + } }; -} +} // namespace Test + #endif diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp index 729a76556dc4f3ff8110ba62b02dfc57ec878590..81ba532a3d45322ca561498585763d413256be3c 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Atomics.cpp @@ -40,165 +40,165 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial , atomics ) +TEST_F( serial, atomics ) { - const int loop_count = 1e6 ; + const int loop_count = 1e6; - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Serial>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Serial>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Serial>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Serial>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Serial>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Serial>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Serial >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Serial >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Serial >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Serial>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Serial >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Serial >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Serial >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Serial>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Serial >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Serial >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Serial >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Serial>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Serial >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Serial >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Serial >( 100, 3 ) ) ); } -TEST_F( serial , atomic_operations ) +TEST_F( serial, atomic_operations ) { - const int start = 1; //Avoid zero for division + const int start = 1; // Avoid zero for division. const int end = 11; - for (int i = start; i < end; ++i) + + for ( int i = start; i < end; ++i ) { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Serial>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Serial>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Serial>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Serial>(start, end-i, 12) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Serial>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Serial>(start, end-i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Serial>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Serial >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Serial >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Serial >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Serial >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Serial >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Serial >( start, end - i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Serial >( start, end - i, 4 ) ) ); } - } -TEST_F( serial , atomic_views_integral ) +TEST_F( serial, atomic_views_integral ) { const long length = 1000000; - { - //Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Serial>(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Serial>(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Serial>(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Serial>(length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Serial>(length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Serial>(length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Serial>(length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Serial>(length, 8 ) ) ); + { + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Serial >( length, 8 ) ) ); } } -TEST_F( serial , atomic_views_nonintegral ) +TEST_F( serial, atomic_views_nonintegral ) { const long length = 1000000; - { - //Non-Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Serial>(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Serial>(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Serial>(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Serial>(length, 4 ) ) ); + { + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Serial >( length, 4 ) ) ); } } -TEST_F( serial , atomic_view_api ) +TEST_F( serial, atomic_view_api ) { - TestAtomicViews::TestAtomicViewAPI<int, Kokkos::Serial>(); + TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp index 43fc4c358745f3f01032723d029796a78bcf76a1..b40ed3f4afc5b4176f02c2ad7d16a5ce19f2614b 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp @@ -40,50 +40,61 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial , md_range ) { - TestMDRange_2D< Kokkos::Serial >::test_for2(100,100); +TEST_F( serial , mdrange_for ) +{ + TestMDRange_2D< Kokkos::Serial >::test_for2( 100, 100 ); + TestMDRange_3D< Kokkos::Serial >::test_for3( 100, 10, 100 ); + TestMDRange_4D< Kokkos::Serial >::test_for4( 100, 10, 10, 10 ); + TestMDRange_5D< Kokkos::Serial >::test_for5( 100, 10, 10, 10, 5 ); + TestMDRange_6D< Kokkos::Serial >::test_for6( 10, 10, 10, 10, 5, 5 ); +} - TestMDRange_3D< Kokkos::Serial >::test_for3(100,100,100); +TEST_F( serial , mdrange_reduce ) +{ + TestMDRange_2D< Kokkos::Serial >::test_reduce2( 100, 100 ); + TestMDRange_3D< Kokkos::Serial >::test_reduce3( 100, 10, 100 ); } -TEST_F( serial, policy_construction) { +TEST_F( serial, policy_construction ) +{ TestRangePolicyConstruction< Kokkos::Serial >(); TestTeamPolicyConstruction< Kokkos::Serial >(); } -TEST_F( serial , range_tag ) +TEST_F( serial, range_tag ) { - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(0); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_scan(0); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0); - - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); - TestRange< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_scan( 0 ); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 0 ); + + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_scan( 1000 ); + + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1001 ); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1001 ); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 1001 ); + TestRange< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 1000 ); } - //---------------------------------------------------------------------------- -TEST_F( serial , compiler_macros ) +TEST_F( serial, compiler_macros ) { ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Serial >() ) ); } //---------------------------------------------------------------------------- -TEST_F( serial , memory_pool ) +TEST_F( serial, memory_pool ) { bool val = TestMemoryPool::test_mempool< Kokkos::Serial >( 128, 128000000 ); ASSERT_TRUE( val ); @@ -97,24 +108,24 @@ TEST_F( serial , memory_pool ) #if defined( KOKKOS_ENABLE_TASKDAG ) -TEST_F( serial , task_fib ) +TEST_F( serial, task_fib ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestFib< Kokkos::Serial >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Serial >::run( i ); } } -TEST_F( serial , task_depend ) +TEST_F( serial, task_depend ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::Serial >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Serial >::run( i ); } } -TEST_F( serial , task_team ) +TEST_F( serial, task_team ) { - TestTaskScheduler::TestTaskTeam< Kokkos::Serial >::run(1000); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::Serial >::run(1000); //put back after testing + TestTaskScheduler::TestTaskTeam< Kokkos::Serial >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Serial >::run( 1000 ); // Put back after testing. } #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ @@ -122,44 +133,40 @@ TEST_F( serial , task_team ) //---------------------------------------------------------------------------- #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL ) -TEST_F( serial , cxx11 ) +TEST_F( serial, cxx11 ) { - if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Serial >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >(4) ) ); + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Serial >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Serial >( 4 ) ) ); } } #endif TEST_F( serial, tile_layout ) { - TestTile::test< Kokkos::Serial , 1 , 1 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 1 , 1 >( 2 , 3 ); - TestTile::test< Kokkos::Serial , 1 , 1 >( 9 , 10 ); - - TestTile::test< Kokkos::Serial , 2 , 2 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 2 , 2 >( 2 , 3 ); - TestTile::test< Kokkos::Serial , 2 , 2 >( 4 , 4 ); - TestTile::test< Kokkos::Serial , 2 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::Serial , 2 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Serial , 4 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::Serial , 4 , 4 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 4 , 4 >( 4 , 4 ); - TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Serial , 4 , 4 >( 9 , 11 ); - - TestTile::test< Kokkos::Serial , 8 , 8 >( 1 , 1 ); - TestTile::test< Kokkos::Serial , 8 , 8 >( 4 , 4 ); - TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 9 ); - TestTile::test< Kokkos::Serial , 8 , 8 >( 9 , 11 ); + TestTile::test< Kokkos::Serial, 1, 1 >( 1, 1 ); + TestTile::test< Kokkos::Serial, 1, 1 >( 2, 3 ); + TestTile::test< Kokkos::Serial, 1, 1 >( 9, 10 ); + + TestTile::test< Kokkos::Serial, 2, 2 >( 1, 1 ); + TestTile::test< Kokkos::Serial, 2, 2 >( 2, 3 ); + TestTile::test< Kokkos::Serial, 2, 2 >( 4, 4 ); + TestTile::test< Kokkos::Serial, 2, 2 >( 9, 9 ); + + TestTile::test< Kokkos::Serial, 2, 4 >( 9, 9 ); + TestTile::test< Kokkos::Serial, 4, 2 >( 9, 9 ); + + TestTile::test< Kokkos::Serial, 4, 4 >( 1, 1 ); + TestTile::test< Kokkos::Serial, 4, 4 >( 4, 4 ); + TestTile::test< Kokkos::Serial, 4, 4 >( 9, 9 ); + TestTile::test< Kokkos::Serial, 4, 4 >( 9, 11 ); + + TestTile::test< Kokkos::Serial, 8, 8 >( 1, 1 ); + TestTile::test< Kokkos::Serial, 8, 8 >( 4, 4 ); + TestTile::test< Kokkos::Serial, 8, 8 >( 9, 9 ); + TestTile::test< Kokkos::Serial, 8, 8 >( 9, 11 ); } - - - -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp index 25b5ac6d16a8d101dd1e7d940007a107d1c814fc..8a3d518cfbea93b97d9a885ac061a79494676362 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Reductions.cpp @@ -40,83 +40,90 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, long_reduce) { - TestReduce< long , Kokkos::Serial >( 0 ); - TestReduce< long , Kokkos::Serial >( 1000000 ); +TEST_F( serial, long_reduce ) +{ + TestReduce< long, Kokkos::Serial >( 0 ); + TestReduce< long, Kokkos::Serial >( 1000000 ); } -TEST_F( serial, double_reduce) { - TestReduce< double , Kokkos::Serial >( 0 ); - TestReduce< double , Kokkos::Serial >( 1000000 ); +TEST_F( serial, double_reduce ) +{ + TestReduce< double, Kokkos::Serial >( 0 ); + TestReduce< double, Kokkos::Serial >( 1000000 ); } -TEST_F( serial , reducers ) +TEST_F( serial, reducers ) { - TestReducers<int, Kokkos::Serial>::execute_integer(); - TestReducers<size_t, Kokkos::Serial>::execute_integer(); - TestReducers<double, Kokkos::Serial>::execute_float(); - TestReducers<Kokkos::complex<double>, Kokkos::Serial>::execute_basic(); + TestReducers< int, Kokkos::Serial >::execute_integer(); + TestReducers< size_t, Kokkos::Serial >::execute_integer(); + TestReducers< double, Kokkos::Serial >::execute_float(); + TestReducers< Kokkos::complex<double >, Kokkos::Serial>::execute_basic(); } -TEST_F( serial, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::Serial >( 0 ); - TestReduceDynamic< long , Kokkos::Serial >( 1000000 ); +TEST_F( serial, long_reduce_dynamic ) +{ + TestReduceDynamic< long, Kokkos::Serial >( 0 ); + TestReduceDynamic< long, Kokkos::Serial >( 1000000 ); } -TEST_F( serial, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::Serial >( 0 ); - TestReduceDynamic< double , Kokkos::Serial >( 1000000 ); +TEST_F( serial, double_reduce_dynamic ) +{ + TestReduceDynamic< double, Kokkos::Serial >( 0 ); + TestReduceDynamic< double, Kokkos::Serial >( 1000000 ); } -TEST_F( serial, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::Serial >( 0 ); - TestReduceDynamicView< long , Kokkos::Serial >( 1000000 ); +TEST_F( serial, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, Kokkos::Serial >( 0 ); + TestReduceDynamicView< long, Kokkos::Serial >( 1000000 ); } -TEST_F( serial , scan ) +TEST_F( serial, scan ) { - TestScan< Kokkos::Serial >::test_range( 1 , 1000 ); + TestScan< Kokkos::Serial >::test_range( 1, 1000 ); TestScan< Kokkos::Serial >( 0 ); TestScan< Kokkos::Serial >( 10 ); TestScan< Kokkos::Serial >( 10000 ); } -TEST_F( serial , team_scan ) +TEST_F( serial, team_scan ) { - TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); } -TEST_F( serial , team_long_reduce) { - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +TEST_F( serial, team_long_reduce ) +{ + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); } -TEST_F( serial , team_double_reduce) { - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double , Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +TEST_F( serial, team_double_reduce ) +{ + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); } -TEST_F( serial , reduction_deduction ) +TEST_F( serial, reduction_deduction ) { TestCXX11::test_reduction_deduction< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp index bc838ccde4b36cf964d0da97500fdbd921a85aa0..3dc3e2019d9fd3927f422c689bfbd65fc45a997b 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_a.cpp @@ -40,53 +40,64 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Serial >(); +TEST_F( serial, view_subview_auto_1d_left ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Serial >(); } -TEST_F( serial, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Serial >(); +TEST_F( serial, view_subview_auto_1d_right ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Serial >(); } -TEST_F( serial, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Serial >(); +TEST_F( serial, view_subview_auto_1d_stride ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Serial >(); } -TEST_F( serial, view_subview_assign_strided ) { +TEST_F( serial, view_subview_assign_strided ) +{ TestViewSubview::test_1d_strided_assignment< Kokkos::Serial >(); } -TEST_F( serial, view_subview_left_0 ) { +TEST_F( serial, view_subview_left_0 ) +{ TestViewSubview::test_left_0< Kokkos::Serial >(); } -TEST_F( serial, view_subview_left_1 ) { +TEST_F( serial, view_subview_left_1 ) +{ TestViewSubview::test_left_1< Kokkos::Serial >(); } -TEST_F( serial, view_subview_left_2 ) { +TEST_F( serial, view_subview_left_2 ) +{ TestViewSubview::test_left_2< Kokkos::Serial >(); } -TEST_F( serial, view_subview_left_3 ) { +TEST_F( serial, view_subview_left_3 ) +{ TestViewSubview::test_left_3< Kokkos::Serial >(); } -TEST_F( serial, view_subview_right_0 ) { +TEST_F( serial, view_subview_right_0 ) +{ TestViewSubview::test_right_0< Kokkos::Serial >(); } -TEST_F( serial, view_subview_right_1 ) { +TEST_F( serial, view_subview_right_1 ) +{ TestViewSubview::test_right_1< Kokkos::Serial >(); } -TEST_F( serial, view_subview_right_3 ) { +TEST_F( serial, view_subview_right_3 ) +{ TestViewSubview::test_right_3< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp index e6a5b56d3ed48ac2301e56b944e4924dcb79451e..536c3bf1979a5b3b9bc33cd8768a86ca3367a8c7 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_b.cpp @@ -40,21 +40,23 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_layoutleft_to_layoutleft) { +TEST_F( serial, view_subview_layoutleft_to_layoutleft ) +{ TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -TEST_F( serial, view_subview_layoutright_to_layoutright) { +TEST_F( serial, view_subview_layoutright_to_layoutright ) +{ TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp index 0b7a0d3bfa6fa514195a4fd6241fc262f0ad884d..579a12bf782a34c4739c9e4a30685878dc55900e 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c01.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_1d_assign ) { +TEST_F( serial, view_subview_1d_assign ) +{ TestViewSubview::test_1d_assign< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp index 8ca7285c1f8331cb6992411d6b35d7bc054945a3..ff009fef27715a8b366e848267eaa4c6c10bc2d7 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c02.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( serial, view_subview_1d_assign_atomic ) +{ + TestViewSubview::test_1d_assign< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp index 1d156c741524315d2fb66fdc5e852329d846d3ae..a20478433cd2b87f0e07a0e793143c4f6f2ddf40 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c03.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( serial, view_subview_1d_assign_randomaccess ) +{ + TestViewSubview::test_1d_assign< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp index ebf0e5c99155afe17dea3807981d712e1d67c601..a34b26d9f79317b90dd0bfaf06385ad638d4757f 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c04.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_2d_from_3d ) { +TEST_F( serial, view_subview_2d_from_3d ) +{ TestViewSubview::test_2d_subview_3d< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp index 74acb92f1b9e632a980b7d0141a54200aebbfd15..6d1882cf04e3d384773d384215cd0244ebd8cfcd 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c05.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( serial, view_subview_2d_from_3d_atomic ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp index 8075d46e0fe15c4c15a47e80f6172d4990fd6ce5..12fb883b63e12812c947facc4b070c0577d09783 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c06.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( serial, view_subview_2d_from_3d_randomaccess ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp index 9ce8222643a5d3a183fad578013945a67efd6847..8aae20c0239d5a6272879887c7626f0e1a0e2f2a 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c07.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_left ) { +TEST_F( serial, view_subview_3d_from_5d_left ) +{ TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp index c8a5c8f33fdc70a2408aade42f21b3c451753b4c..e75db8d52dc1250b582d62c7e51b6bda8ce00b9b 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c08.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( serial, view_subview_3d_from_5d_left_atomic ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp index b66f15f17da1b7f0bcb24459678965dacee04f9b..b9cea2ce89c6f2bb311299ee6463ac34185245d8 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c09.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( serial, view_subview_3d_from_5d_left_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp index 5e5e3cf3d1af0f0755ab8fa3f8be9f846ff554e9..e5dbcead376ebdcb37a4bb79dfdfe1916b3e2d0d 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c10.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_right ) { +TEST_F( serial, view_subview_3d_from_5d_right ) +{ TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp index 55a353bcafef5e852ec33c80d9084f7c2236efcc..3005030f934551a0f8ea5d6be7772cfefa605a98 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c11.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( serial, view_subview_3d_from_5d_right_atomic ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp index a168e1e232ff5f71cce593be776496cbd7dd6c25..fee8cb7af2a20cdebafa9270932cda2457363602 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c12.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( serial, view_subview_3d_from_5d_right_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::Serial, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp index a489b0fcb585aa0e12310f09a0701188b8814045..24dc6b5061412c04998f734cab9f1367a9b7d4fe 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_SubView_c_all.cpp @@ -1,12 +1,12 @@ -#include<serial/TestSerial_SubView_c01.cpp> -#include<serial/TestSerial_SubView_c02.cpp> -#include<serial/TestSerial_SubView_c03.cpp> -#include<serial/TestSerial_SubView_c04.cpp> -#include<serial/TestSerial_SubView_c05.cpp> -#include<serial/TestSerial_SubView_c06.cpp> -#include<serial/TestSerial_SubView_c07.cpp> -#include<serial/TestSerial_SubView_c08.cpp> -#include<serial/TestSerial_SubView_c09.cpp> -#include<serial/TestSerial_SubView_c10.cpp> -#include<serial/TestSerial_SubView_c11.cpp> -#include<serial/TestSerial_SubView_c12.cpp> +#include <serial/TestSerial_SubView_c01.cpp> +#include <serial/TestSerial_SubView_c02.cpp> +#include <serial/TestSerial_SubView_c03.cpp> +#include <serial/TestSerial_SubView_c04.cpp> +#include <serial/TestSerial_SubView_c05.cpp> +#include <serial/TestSerial_SubView_c06.cpp> +#include <serial/TestSerial_SubView_c07.cpp> +#include <serial/TestSerial_SubView_c08.cpp> +#include <serial/TestSerial_SubView_c09.cpp> +#include <serial/TestSerial_SubView_c10.cpp> +#include <serial/TestSerial_SubView_c11.cpp> +#include <serial/TestSerial_SubView_c12.cpp> diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp index df400b4cb51587b76992c26ff28419b334b5d2d6..f13b2ce1b4bd20e92509fc9dc1801352ff3bb289 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Team.cpp @@ -40,62 +40,68 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial , team_tag ) +TEST_F( serial, team_tag ) { - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(0); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); - TestTeamPolicy< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } -TEST_F( serial , team_shared_request) { - TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( serial, team_shared_request ) +{ + TestSharedTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >(); } -TEST_F( serial, team_scratch_request) { - TestScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); - TestScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( serial, team_scratch_request ) +{ + TestScratchTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >(); } -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -TEST_F( serial , team_lambda_shared_request) { - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +TEST_F( serial, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >(); } #endif -TEST_F( serial, shmem_size) { +TEST_F( serial, shmem_size ) +{ TestShmemSize< Kokkos::Serial >(); } -TEST_F( serial, multi_level_scratch) { - TestMultiLevelScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Static> >(); - TestMultiLevelScratchTeam< Kokkos::Serial , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( serial, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< Kokkos::Serial, Kokkos::Schedule<Kokkos::Dynamic> >(); } -TEST_F( serial , team_vector ) +TEST_F( serial, team_vector ) { - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >(10) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Serial >( 10 ) ) ); } #ifdef KOKKOS_COMPILER_GNU @@ -107,11 +113,10 @@ TEST_F( serial , team_vector ) #ifndef SKIP_TEST TEST_F( serial, triple_nested_parallelism ) { - TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 32 , 32 ); - TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 32 , 16 ); - TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048 , 16 , 16 ); + TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, Kokkos::Serial >( 8192, 2048, 16, 16 ); } #endif -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp index 4c655fe770f26fd8d6b239251c5d6301140faa09..2192159b8439a2b4fdd0fcc38b3be4d382973821 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_a.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial , impl_view_mapping_a ) { +TEST_F( serial, impl_view_mapping_a ) +{ test_view_mapping< Kokkos::Serial >(); test_view_mapping_operator< Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp index 4947f2eaaef607b04d680a7c9c64ae6f2d8e6087..8c48ad2ceda81ca46913e3d3206fac96e492950a 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_ViewAPI_b.cpp @@ -40,82 +40,85 @@ // ************************************************************************ //@HEADER */ + #include <serial/TestSerial.hpp> namespace Test { -TEST_F( serial , impl_shared_alloc ) { - test_shared_alloc< Kokkos::HostSpace , Kokkos::Serial >(); +TEST_F( serial, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::HostSpace, Kokkos::Serial >(); } -TEST_F( serial , impl_view_mapping_b ) { +TEST_F( serial, impl_view_mapping_b ) +{ test_view_mapping_subview< Kokkos::Serial >(); TestViewMappingAtomic< Kokkos::Serial >::run(); } -TEST_F( serial, view_api) { - TestViewAPI< double , Kokkos::Serial >(); +TEST_F( serial, view_api ) +{ + TestViewAPI< double, Kokkos::Serial >(); } -TEST_F( serial , view_nested_view ) +TEST_F( serial, view_nested_view ) { ::Test::view_nested_view< Kokkos::Serial >(); } - - -TEST_F( serial , view_remap ) +TEST_F( serial, view_remap ) { - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::Serial > output_type ; - - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Serial > input_type ; - - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Serial > diff_type ; - - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); - - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} - - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::Serial > output_type; + + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::Serial > input_type; + + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::Serial > diff_type; + + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); + + int value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } + + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } } -//---------------------------------------------------------------------------- - -TEST_F( serial , view_aggregate ) +TEST_F( serial, view_aggregate ) { TestViewAggregate< Kokkos::Serial >(); } -TEST_F( serial , template_meta_functions ) +TEST_F( serial, template_meta_functions ) { - TestTemplateMetaFunctions<int, Kokkos::Serial >(); + TestTemplateMetaFunctions< int, Kokkos::Serial >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads.hpp b/lib/kokkos/core/unit_test/threads/TestThreads.hpp index 4f611cf99c7c0e4f3c4b26f0fada9c7c8469ddbe..0afd6772fefff3e2efd7d490d35f985346163fd6 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads.hpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads.hpp @@ -40,11 +40,14 @@ // ************************************************************************ //@HEADER */ + #ifndef KOKKOS_TEST_THREADS_HPP #define KOKKOS_TEST_THREADS_HPP + #include <gtest/gtest.h> #include <Kokkos_Macros.hpp> + #ifdef KOKKOS_LAMBDA #undef KOKKOS_LAMBDA #endif @@ -53,13 +56,8 @@ #include <Kokkos_Core.hpp> #include <TestTile.hpp> - -//---------------------------------------------------------------------------- - #include <TestSharedAlloc.hpp> #include <TestViewMapping.hpp> - - #include <TestViewAPI.hpp> #include <TestViewOfClass.hpp> #include <TestViewSubview.hpp> @@ -74,15 +72,11 @@ #include <TestCompilerMacros.hpp> #include <TestTaskScheduler.hpp> #include <TestMemoryPool.hpp> - - #include <TestCXX11.hpp> #include <TestCXX11Deduction.hpp> #include <TestTeamVector.hpp> #include <TestTemplateMetaFunctions.hpp> - #include <TestPolicyConstruction.hpp> - #include <TestMDRange.hpp> namespace Test { @@ -95,13 +89,13 @@ protected: const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - unsigned threads_count = 0 ; + unsigned threads_count = 0; - threads_count = std::max( 1u , numa_count ) - * std::max( 2u , cores_per_numa * threads_per_core ); + threads_count = std::max( 1u, numa_count ) + * std::max( 2u, cores_per_numa * threads_per_core ); Kokkos::Threads::initialize( threads_count ); - Kokkos::Threads::print_configuration( std::cout , true /* detailed */ ); + Kokkos::print_configuration( std::cout, true /* detailed */ ); } static void TearDownTestCase() @@ -110,6 +104,6 @@ protected: } }; +} // namespace Test -} #endif diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp index 6e24c4973ed7c37ff559a5ad023a69fabb607b29..d2a5ea5d6352acc79606082fd75c465b0b5b515e 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Atomics.cpp @@ -40,165 +40,161 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads , atomics ) +TEST_F( threads, atomics ) { - const int loop_count = 1e4 ; + const int loop_count = 1e4; - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<int,Kokkos::Threads>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< int, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned int,Kokkos::Threads>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned int, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long int,Kokkos::Threads>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long int, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<unsigned long int,Kokkos::Threads>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< unsigned long int, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<long long int,Kokkos::Threads>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< long long int, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<double,Kokkos::Threads>(loop_count,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Threads >( loop_count, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Threads >( loop_count, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< double, Kokkos::Threads >( loop_count, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<float,Kokkos::Threads>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Threads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Threads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< float, Kokkos::Threads >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<Kokkos::complex<double> ,Kokkos::Threads>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Threads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Threads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< Kokkos::complex<double>, Kokkos::Threads >( 100, 3 ) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Threads>(100,1) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Threads>(100,2) ) ); - ASSERT_TRUE( ( TestAtomic::Loop<TestAtomic::SuperScalar<4> ,Kokkos::Threads>(100,3) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Threads >( 100, 1 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Threads >( 100, 2 ) ) ); + ASSERT_TRUE( ( TestAtomic::Loop< TestAtomic::SuperScalar<4>, Kokkos::Threads >( 100, 3 ) ) ); } -TEST_F( threads , atomic_operations ) +TEST_F( threads, atomic_operations ) { - const int start = 1; //Avoid zero for division + const int start = 1; // Avoid zero for division. const int end = 11; - for (int i = start; i < end; ++i) + for ( int i = start; i < end; ++i ) { - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<int,Kokkos::Threads>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned int,Kokkos::Threads>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long int,Kokkos::Threads>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<unsigned long int,Kokkos::Threads>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 8 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 9 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 11 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType<long long int,Kokkos::Threads>(start, end-i, 12 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<double,Kokkos::Threads>(start, end-i, 4 ) ) ); - - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType<float,Kokkos::Threads>(start, end-i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< int, Kokkos::Threads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned int, Kokkos::Threads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long int, Kokkos::Threads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< unsigned long int, Kokkos::Threads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 8 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 9 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 11 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestIntegralType< long long int, Kokkos::Threads >( start, end - i, 12 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< double, Kokkos::Threads >( start, end - i, 4 ) ) ); + + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicOperations::AtomicOperationsTestNonIntegralType< float, Kokkos::Threads >( start, end - i, 4 ) ) ); } - } - -TEST_F( threads , atomic_views_integral ) +TEST_F( threads, atomic_views_integral ) { const long length = 1000000; { - //Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Threads>(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Threads>(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Threads>(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Threads>(length, 4 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Threads>(length, 5 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Threads>(length, 6 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Threads>(length, 7 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType<long, Kokkos::Threads>(length, 8 ) ) ); - + // Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 4 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 5 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 6 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 7 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestIntegralType< long, Kokkos::Threads >( length, 8 ) ) ); } } -TEST_F( threads , atomic_views_nonintegral ) +TEST_F( threads, atomic_views_nonintegral ) { const long length = 1000000; { - //Non-Integral Types - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Threads>(length, 1 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Threads>(length, 2 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Threads>(length, 3 ) ) ); - ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType<double,Kokkos::Threads>(length, 4 ) ) ); - + // Non-Integral Types. + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 1 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 2 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 3 ) ) ); + ASSERT_TRUE( ( TestAtomicViews::AtomicViewsTestNonIntegralType< double, Kokkos::Threads >( length, 4 ) ) ); } } -TEST_F( threads , atomic_view_api ) +TEST_F( threads, atomic_view_api ) { - TestAtomicViews::TestAtomicViewAPI<int, Kokkos::Threads>(); + TestAtomicViews::TestAtomicViewAPI< int, Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp index ac0356eeb4c9c15d5409c0e9d10a772941de57d0..7d268c14547e4680c1ad57d8e66e2b1a4bfaf501 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp @@ -40,65 +40,74 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads , init ) { +TEST_F( threads, init ) +{ ; } -TEST_F( threads , md_range ) { - TestMDRange_2D< Kokkos::Threads >::test_for2(100,100); +TEST_F( threads , mdrange_for ) { + TestMDRange_2D< Kokkos::Threads >::test_for2( 100, 100 ); + TestMDRange_3D< Kokkos::Threads >::test_for3( 100, 10, 100 ); + TestMDRange_4D< Kokkos::Threads >::test_for4( 100, 10, 10, 10 ); + TestMDRange_5D< Kokkos::Threads >::test_for5( 100, 10, 10, 10, 5 ); + TestMDRange_6D< Kokkos::Threads >::test_for6( 10, 10, 10, 10, 5, 5 ); +} - TestMDRange_3D< Kokkos::Threads >::test_for3(100,100,100); +TEST_F( threads , mdrange_reduce ) { + TestMDRange_2D< Kokkos::Threads >::test_reduce2( 100, 100 ); + TestMDRange_3D< Kokkos::Threads >::test_reduce3( 100, 10, 100 ); } -TEST_F( threads, policy_construction) { +TEST_F( threads, policy_construction ) +{ TestRangePolicyConstruction< Kokkos::Threads >(); TestTeamPolicyConstruction< Kokkos::Threads >(); } -TEST_F( threads , range_tag ) +TEST_F( threads, range_tag ) { - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(0); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(0); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(0); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(0); - - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(2); - - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(3); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(3); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(3); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(3); - - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_scan(1000); - - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1001); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1001); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_scan(1001); - TestRange< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy(1000); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_scan( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 0 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 0 ); + + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_scan( 2 ); + + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 3 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 3 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 3 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 3 ); + + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_scan( 1000 ); + + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1001 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1001 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_scan( 1001 ); + TestRange< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_dynamic_policy( 1000 ); } - //---------------------------------------------------------------------------- -TEST_F( threads , compiler_macros ) +TEST_F( threads, compiler_macros ) { ASSERT_TRUE( ( TestCompilerMacros::Test< Kokkos::Threads >() ) ); } //---------------------------------------------------------------------------- -TEST_F( threads , memory_pool ) +TEST_F( threads, memory_pool ) { bool val = TestMemoryPool::test_mempool< Kokkos::Threads >( 128, 128000000 ); ASSERT_TRUE( val ); @@ -112,24 +121,24 @@ TEST_F( threads , memory_pool ) #if defined( KOKKOS_ENABLE_TASKDAG ) /* -TEST_F( threads , task_fib ) +TEST_F( threads, task_fib ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestFib< Kokkos::Threads >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestFib< Kokkos::Threads >::run( i ); } } -TEST_F( threads , task_depend ) +TEST_F( threads, task_depend ) { - for ( int i = 0 ; i < 25 ; ++i ) { - TestTaskScheduler::TestTaskDependence< Kokkos::Threads >::run(i); + for ( int i = 0; i < 25; ++i ) { + TestTaskScheduler::TestTaskDependence< Kokkos::Threads >::run( i ); } } -TEST_F( threads , task_team ) +TEST_F( threads, task_team ) { - TestTaskScheduler::TestTaskTeam< Kokkos::Threads >::run(1000); - //TestTaskScheduler::TestTaskTeamValue< Kokkos::Threads >::run(1000); //put back after testing + TestTaskScheduler::TestTaskTeam< Kokkos::Threads >::run( 1000 ); + //TestTaskScheduler::TestTaskTeamValue< Kokkos::Threads >::run( 1000 ); // Put back after testing. } */ #endif /* #if defined( KOKKOS_ENABLE_TASKDAG ) */ @@ -137,53 +146,51 @@ TEST_F( threads , task_team ) //---------------------------------------------------------------------------- #if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) -TEST_F( threads , cxx11 ) +TEST_F( threads, cxx11 ) { - if ( std::is_same< Kokkos::DefaultExecutionSpace , Kokkos::Threads >::value ) { - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(1) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(2) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(3) ) ); - ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >(4) ) ); + if ( std::is_same< Kokkos::DefaultExecutionSpace, Kokkos::Threads >::value ) { + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 1 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 2 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 3 ) ) ); + ASSERT_TRUE( ( TestCXX11::Test< Kokkos::Threads >( 4 ) ) ); } } #endif TEST_F( threads, tile_layout ) { - TestTile::test< Kokkos::Threads , 1 , 1 >( 1 , 1 ); - TestTile::test< Kokkos::Threads , 1 , 1 >( 2 , 3 ); - TestTile::test< Kokkos::Threads , 1 , 1 >( 9 , 10 ); - - TestTile::test< Kokkos::Threads , 2 , 2 >( 1 , 1 ); - TestTile::test< Kokkos::Threads , 2 , 2 >( 2 , 3 ); - TestTile::test< Kokkos::Threads , 2 , 2 >( 4 , 4 ); - TestTile::test< Kokkos::Threads , 2 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::Threads , 2 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Threads , 4 , 2 >( 9 , 9 ); - - TestTile::test< Kokkos::Threads , 4 , 4 >( 1 , 1 ); - TestTile::test< Kokkos::Threads , 4 , 4 >( 4 , 4 ); - TestTile::test< Kokkos::Threads , 4 , 4 >( 9 , 9 ); - TestTile::test< Kokkos::Threads , 4 , 4 >( 9 , 11 ); - - TestTile::test< Kokkos::Threads , 8 , 8 >( 1 , 1 ); - TestTile::test< Kokkos::Threads , 8 , 8 >( 4 , 4 ); - TestTile::test< Kokkos::Threads , 8 , 8 >( 9 , 9 ); - TestTile::test< Kokkos::Threads , 8 , 8 >( 9 , 11 ); + TestTile::test< Kokkos::Threads, 1, 1 >( 1, 1 ); + TestTile::test< Kokkos::Threads, 1, 1 >( 2, 3 ); + TestTile::test< Kokkos::Threads, 1, 1 >( 9, 10 ); + + TestTile::test< Kokkos::Threads, 2, 2 >( 1, 1 ); + TestTile::test< Kokkos::Threads, 2, 2 >( 2, 3 ); + TestTile::test< Kokkos::Threads, 2, 2 >( 4, 4 ); + TestTile::test< Kokkos::Threads, 2, 2 >( 9, 9 ); + + TestTile::test< Kokkos::Threads, 2, 4 >( 9, 9 ); + TestTile::test< Kokkos::Threads, 4, 2 >( 9, 9 ); + + TestTile::test< Kokkos::Threads, 4, 4 >( 1, 1 ); + TestTile::test< Kokkos::Threads, 4, 4 >( 4, 4 ); + TestTile::test< Kokkos::Threads, 4, 4 >( 9, 9 ); + TestTile::test< Kokkos::Threads, 4, 4 >( 9, 11 ); + + TestTile::test< Kokkos::Threads, 8, 8 >( 1, 1 ); + TestTile::test< Kokkos::Threads, 8, 8 >( 4, 4 ); + TestTile::test< Kokkos::Threads, 8, 8 >( 9, 9 ); + TestTile::test< Kokkos::Threads, 8, 8 >( 9, 11 ); } - -TEST_F( threads , dispatch ) +TEST_F( threads, dispatch ) { - const int repeat = 100 ; - for ( int i = 0 ; i < repeat ; ++i ) { - for ( int j = 0 ; j < repeat ; ++j ) { - Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >(0,j) - , KOKKOS_LAMBDA( int ) {} ); - }} + const int repeat = 100; + for ( int i = 0; i < repeat; ++i ) { + for ( int j = 0; j < repeat; ++j ) { + Kokkos::parallel_for( Kokkos::RangePolicy< Kokkos::Threads >( 0, j ) + , KOKKOS_LAMBDA( int ) {} ); + } + } } - -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp index a637d1e3ab654b402e49b7d3aec582e425d2592a..d2b75ca892b5abcf3f405aec37459f53c2a3aafc 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Reductions.cpp @@ -40,46 +40,52 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, long_reduce) { - TestReduce< long , Kokkos::Threads >( 0 ); - TestReduce< long , Kokkos::Threads >( 1000000 ); +TEST_F( threads, long_reduce ) +{ + TestReduce< long, Kokkos::Threads >( 0 ); + TestReduce< long, Kokkos::Threads >( 1000000 ); } -TEST_F( threads, double_reduce) { - TestReduce< double , Kokkos::Threads >( 0 ); - TestReduce< double , Kokkos::Threads >( 1000000 ); +TEST_F( threads, double_reduce ) +{ + TestReduce< double, Kokkos::Threads >( 0 ); + TestReduce< double, Kokkos::Threads >( 1000000 ); } -TEST_F( threads , reducers ) +TEST_F( threads, reducers ) { - TestReducers<int, Kokkos::Threads>::execute_integer(); - TestReducers<size_t, Kokkos::Threads>::execute_integer(); - TestReducers<double, Kokkos::Threads>::execute_float(); - TestReducers<Kokkos::complex<double>, Kokkos::Threads>::execute_basic(); + TestReducers< int, Kokkos::Threads >::execute_integer(); + TestReducers< size_t, Kokkos::Threads >::execute_integer(); + TestReducers< double, Kokkos::Threads >::execute_float(); + TestReducers< Kokkos::complex<double>, Kokkos::Threads >::execute_basic(); } -TEST_F( threads, long_reduce_dynamic ) { - TestReduceDynamic< long , Kokkos::Threads >( 0 ); - TestReduceDynamic< long , Kokkos::Threads >( 1000000 ); +TEST_F( threads, long_reduce_dynamic ) +{ + TestReduceDynamic< long, Kokkos::Threads >( 0 ); + TestReduceDynamic< long, Kokkos::Threads >( 1000000 ); } -TEST_F( threads, double_reduce_dynamic ) { - TestReduceDynamic< double , Kokkos::Threads >( 0 ); - TestReduceDynamic< double , Kokkos::Threads >( 1000000 ); +TEST_F( threads, double_reduce_dynamic ) +{ + TestReduceDynamic< double, Kokkos::Threads >( 0 ); + TestReduceDynamic< double, Kokkos::Threads >( 1000000 ); } -TEST_F( threads, long_reduce_dynamic_view ) { - TestReduceDynamicView< long , Kokkos::Threads >( 0 ); - TestReduceDynamicView< long , Kokkos::Threads >( 1000000 ); +TEST_F( threads, long_reduce_dynamic_view ) +{ + TestReduceDynamicView< long, Kokkos::Threads >( 0 ); + TestReduceDynamicView< long, Kokkos::Threads >( 1000000 ); } -TEST_F( threads , scan ) +TEST_F( threads, scan ) { - TestScan< Kokkos::Threads >::test_range( 1 , 1000 ); + TestScan< Kokkos::Threads >::test_range( 1, 1000 ); TestScan< Kokkos::Threads >( 0 ); TestScan< Kokkos::Threads >( 100000 ); TestScan< Kokkos::Threads >( 10000000 ); @@ -87,10 +93,11 @@ TEST_F( threads , scan ) } #if 0 -TEST_F( threads , scan_small ) +TEST_F( threads, scan_small ) { - typedef TestScan< Kokkos::Threads , Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor ; - for ( int i = 0 ; i < 1000 ; ++i ) { + typedef TestScan< Kokkos::Threads, Kokkos::Impl::ThreadsExecUseScanSmall > TestScanFunctor; + + for ( int i = 0; i < 1000; ++i ) { TestScanFunctor( 10 ); TestScanFunctor( 10000 ); } @@ -101,38 +108,39 @@ TEST_F( threads , scan_small ) } #endif -TEST_F( threads , team_scan ) +TEST_F( threads, team_scan ) { - TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 10000 ); - TestScanTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 10 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 10 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 10000 ); + TestScanTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 10000 ); } -TEST_F( threads , team_long_reduce) { - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< long , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +TEST_F( threads, team_long_reduce ) +{ + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< long, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); } -TEST_F( threads , team_double_reduce) { - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 0 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 3 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >( 100000 ); - TestReduceTeam< double , Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); +TEST_F( threads, team_double_reduce ) +{ + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 0 ); + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 0 ); + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 3 ); + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 3 ); + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >( 100000 ); + TestReduceTeam< double, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >( 100000 ); } -TEST_F( threads , reduction_deduction ) +TEST_F( threads, reduction_deduction ) { TestCXX11::test_reduction_deduction< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp index 2df9e19deb0130359d81b8c3cc001bb85ee7cb2f..68a9da6aedef550e94c037df93ff6dc741ff3589 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_a.cpp @@ -40,53 +40,64 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_auto_1d_left ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutLeft,Kokkos::Threads >(); +TEST_F( threads, view_subview_auto_1d_left ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutLeft, Kokkos::Threads >(); } -TEST_F( threads, view_subview_auto_1d_right ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutRight,Kokkos::Threads >(); +TEST_F( threads, view_subview_auto_1d_right ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutRight, Kokkos::Threads >(); } -TEST_F( threads, view_subview_auto_1d_stride ) { - TestViewSubview::test_auto_1d< Kokkos::LayoutStride,Kokkos::Threads >(); +TEST_F( threads, view_subview_auto_1d_stride ) +{ + TestViewSubview::test_auto_1d< Kokkos::LayoutStride, Kokkos::Threads >(); } -TEST_F( threads, view_subview_assign_strided ) { +TEST_F( threads, view_subview_assign_strided ) +{ TestViewSubview::test_1d_strided_assignment< Kokkos::Threads >(); } -TEST_F( threads, view_subview_left_0 ) { +TEST_F( threads, view_subview_left_0 ) +{ TestViewSubview::test_left_0< Kokkos::Threads >(); } -TEST_F( threads, view_subview_left_1 ) { +TEST_F( threads, view_subview_left_1 ) +{ TestViewSubview::test_left_1< Kokkos::Threads >(); } -TEST_F( threads, view_subview_left_2 ) { +TEST_F( threads, view_subview_left_2 ) +{ TestViewSubview::test_left_2< Kokkos::Threads >(); } -TEST_F( threads, view_subview_left_3 ) { +TEST_F( threads, view_subview_left_3 ) +{ TestViewSubview::test_left_3< Kokkos::Threads >(); } -TEST_F( threads, view_subview_right_0 ) { +TEST_F( threads, view_subview_right_0 ) +{ TestViewSubview::test_right_0< Kokkos::Threads >(); } -TEST_F( threads, view_subview_right_1 ) { +TEST_F( threads, view_subview_right_1 ) +{ TestViewSubview::test_right_1< Kokkos::Threads >(); } -TEST_F( threads, view_subview_right_3 ) { +TEST_F( threads, view_subview_right_3 ) +{ TestViewSubview::test_right_3< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp index d57dbe97c0d38aaa6a2e48816eb9872a8585afb7..c5cf061e8289d9d8ac5ffea92d38c9cd91349922 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_b.cpp @@ -40,21 +40,23 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_layoutleft_to_layoutleft) { +TEST_F( threads, view_subview_layoutleft_to_layoutleft ) +{ TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutleft_to_layoutleft< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -TEST_F( threads, view_subview_layoutright_to_layoutright) { +TEST_F( threads, view_subview_layoutright_to_layoutright ) +{ TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); - TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); + TestViewSubview::test_layoutright_to_layoutright< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp index 67d998c0e86488df0023cc0138ffe022cdc52d94..9018c1f4f799c1f76ee082c57dedc644627c7a75 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c01.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_1d_assign ) { +TEST_F( threads, view_subview_1d_assign ) +{ TestViewSubview::test_1d_assign< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp index e340240c48d6d28c9bc4c79b777a3e1a4a8c4ddc..9483abd9cc3f78430f2234c71708fe0315a949a9 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c02.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_1d_assign_atomic ) { - TestViewSubview::test_1d_assign< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( threads, view_subview_1d_assign_atomic ) +{ + TestViewSubview::test_1d_assign< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp index ad27fa0fa6cee9db3eb63c581a175eee0cdd6e4e..e252a26565bf6dad6387b87340c5c93cd2b3415f 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c03.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_1d_assign_randomaccess ) { - TestViewSubview::test_1d_assign< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( threads, view_subview_1d_assign_randomaccess ) +{ + TestViewSubview::test_1d_assign< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp index 6fca47cc4ce41b56155fac8ce1d4b158d5e99c82..3e211b1a58542b6307a731c3765190e91132d4dd 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c04.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_2d_from_3d ) { +TEST_F( threads, view_subview_2d_from_3d ) +{ TestViewSubview::test_2d_subview_3d< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp index c7dfca941582dee3d667f60152854ea30b393548..865d50b1a1b918b99fb36d2a3e5c889a7c93e5a7 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c05.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_2d_from_3d_atomic ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( threads, view_subview_2d_from_3d_atomic ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp index 38e8394918614fdb528e9111d7fc1f54c7ff4d83..c5840073b6486226281942bfd0c0ad8e2052ff85 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c06.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_2d_from_3d_randomaccess ) { - TestViewSubview::test_2d_subview_3d< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( threads, view_subview_2d_from_3d_randomaccess ) +{ + TestViewSubview::test_2d_subview_3d< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp index 1f01fe6b5e6104416bb1f2f680cafeab48cac1ad..7b8825ef628dbaa4449f7830abd4e227d842dccc 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c07.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_left ) { +TEST_F( threads, view_subview_3d_from_5d_left ) +{ TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp index e9a1ccbe30edcf7f512a5c20462df83cf52c3ac4..7bc16a5827a602193db55f7ffa044b38babef77d 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c08.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_left_atomic ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( threads, view_subview_3d_from_5d_left_atomic ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp index c8b6c8743dd25a97db5f00e5bc7157c9f040c5d9..57b87b6098bdd818c8e215ffb1d5938043746494 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c09.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_left_randomaccess ) { - TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( threads, view_subview_3d_from_5d_left_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_left< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp index 7cef6fa07be88859c063470857d775964c74f2fa..1875a883d485e1620430cadc59c09554dfc00ac1 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c10.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_right ) { +TEST_F( threads, view_subview_3d_from_5d_right ) +{ TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp index d67bf3157e337fef0af36dbba934f8bc22d74d0c..cf6428b18e333d66f4637fc92a45dc7f51052cc6 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c11.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_right_atomic ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::Atomic> >(); +TEST_F( threads, view_subview_3d_from_5d_right_atomic ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::Atomic> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp index e8a2c825cf3a9474d149d81a225cbadb16338cd7..7060fdb273c928d7346686c54d0a374188c47257 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_SubView_c12.cpp @@ -40,13 +40,14 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads, view_subview_3d_from_5d_right_randomaccess ) { - TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads , Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); +TEST_F( threads, view_subview_3d_from_5d_right_randomaccess ) +{ + TestViewSubview::test_3d_subview_5d_right< Kokkos::Threads, Kokkos::MemoryTraits<Kokkos::RandomAccess> >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp index 4690be4d3a75d8e5a7b66676ecf6b0482952d116..d802d658309b4ecfbd28a5ec4ce6d17edc4a5f4a 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Team.cpp @@ -40,67 +40,73 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads , team_tag ) +TEST_F( threads, team_tag ) { - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(0); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(0); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(0); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(0); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 0 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 0 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 0 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 0 ); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(2); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(2); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(2); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(2); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 2 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 2 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 2 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 2 ); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_for(1000); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >::test_reduce(1000); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_for(1000); - TestTeamPolicy< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce(1000); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >::test_reduce( 1000 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_for( 1000 ); + TestTeamPolicy< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >::test_reduce( 1000 ); } -TEST_F( threads , team_shared_request) { - TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); - TestSharedTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( threads, team_shared_request ) +{ + TestSharedTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >(); + TestSharedTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >(); } -TEST_F( threads, team_scratch_request) { - TestScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); - TestScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( threads, team_scratch_request ) +{ + TestScratchTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >(); + TestScratchTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >(); } -#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) -TEST_F( threads , team_lambda_shared_request) { - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); - TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); +#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA ) +TEST_F( threads, team_lambda_shared_request ) +{ + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >(); + TestLambdaSharedTeam< Kokkos::HostSpace, Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >(); } #endif -TEST_F( threads, shmem_size) { +TEST_F( threads, shmem_size ) +{ TestShmemSize< Kokkos::Threads >(); } -TEST_F( threads, multi_level_scratch) { - TestMultiLevelScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Static> >(); - TestMultiLevelScratchTeam< Kokkos::Threads , Kokkos::Schedule<Kokkos::Dynamic> >(); +TEST_F( threads, multi_level_scratch ) +{ + TestMultiLevelScratchTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Static> >(); + TestMultiLevelScratchTeam< Kokkos::Threads, Kokkos::Schedule<Kokkos::Dynamic> >(); } -TEST_F( threads , team_vector ) +TEST_F( threads, team_vector ) { - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(0) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(1) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(2) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(3) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(4) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(5) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(6) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(7) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(8) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(9) ) ); - ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >(10) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 0 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 1 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 2 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 3 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 4 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 5 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 6 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 7 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 8 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 9 ) ) ); + ASSERT_TRUE( ( TestTeamVector::Test< Kokkos::Threads >( 10 ) ) ); } #ifdef KOKKOS_COMPILER_GNU @@ -112,11 +118,10 @@ TEST_F( threads , team_vector ) #ifndef SKIP_TEST TEST_F( threads, triple_nested_parallelism ) { - TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 32 , 32 ); - TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 32 , 16 ); - TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048 , 16 , 16 ); + TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048, 32, 32 ); + TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048, 32, 16 ); + TestTripleNestedReduce< double, Kokkos::Threads >( 8192, 2048, 16, 16 ); } #endif -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp index 46a576b027fb2149302239ba31d6e53bd001e3ce..36eae287936ad9854dd030fc304506c3d3745c03 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp @@ -40,14 +40,15 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads , impl_view_mapping_a ) { +TEST_F( threads, impl_view_mapping_a ) +{ test_view_mapping< Kokkos::Threads >(); test_view_mapping_operator< Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp index b5d6ac843d8177149d53fe1cb52528c6ef760f3d..8c78d094435b3f524668cb1bffa44b5144749063 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_b.cpp @@ -40,82 +40,85 @@ // ************************************************************************ //@HEADER */ + #include <threads/TestThreads.hpp> namespace Test { -TEST_F( threads , impl_shared_alloc ) { - test_shared_alloc< Kokkos::HostSpace , Kokkos::Threads >(); +TEST_F( threads, impl_shared_alloc ) +{ + test_shared_alloc< Kokkos::HostSpace, Kokkos::Threads >(); } -TEST_F( threads , impl_view_mapping_b ) { +TEST_F( threads, impl_view_mapping_b ) +{ test_view_mapping_subview< Kokkos::Threads >(); TestViewMappingAtomic< Kokkos::Threads >::run(); } -TEST_F( threads, view_api) { - TestViewAPI< double , Kokkos::Threads >(); +TEST_F( threads, view_api ) +{ + TestViewAPI< double, Kokkos::Threads >(); } -TEST_F( threads , view_nested_view ) +TEST_F( threads, view_nested_view ) { ::Test::view_nested_view< Kokkos::Threads >(); } - - -TEST_F( threads , view_remap ) +TEST_F( threads, view_remap ) { - enum { N0 = 3 , N1 = 2 , N2 = 8 , N3 = 9 }; - - typedef Kokkos::View< double*[N1][N2][N3] , - Kokkos::LayoutRight , - Kokkos::Threads > output_type ; - - typedef Kokkos::View< int**[N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Threads > input_type ; - - typedef Kokkos::View< int*[N0][N2][N3] , - Kokkos::LayoutLeft , - Kokkos::Threads > diff_type ; - - output_type output( "output" , N0 ); - input_type input ( "input" , N0 , N1 ); - diff_type diff ( "diff" , N0 ); - - int value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - input(i0,i1,i2,i3) = ++value ; - }}}} - - // Kokkos::deep_copy( diff , input ); // throw with incompatible shape - Kokkos::deep_copy( output , input ); - - value = 0 ; - for ( size_t i3 = 0 ; i3 < N3 ; ++i3 ) { - for ( size_t i2 = 0 ; i2 < N2 ; ++i2 ) { - for ( size_t i1 = 0 ; i1 < N1 ; ++i1 ) { - for ( size_t i0 = 0 ; i0 < N0 ; ++i0 ) { - ++value ; - ASSERT_EQ( value , ((int) output(i0,i1,i2,i3) ) ); - }}}} + enum { N0 = 3, N1 = 2, N2 = 8, N3 = 9 }; + + typedef Kokkos::View< double*[N1][N2][N3], + Kokkos::LayoutRight, + Kokkos::Threads > output_type; + + typedef Kokkos::View< int**[N2][N3], + Kokkos::LayoutLeft, + Kokkos::Threads > input_type; + + typedef Kokkos::View< int*[N0][N2][N3], + Kokkos::LayoutLeft, + Kokkos::Threads > diff_type; + + output_type output( "output", N0 ); + input_type input ( "input", N0, N1 ); + diff_type diff ( "diff", N0 ); + + int value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + input( i0, i1, i2, i3 ) = ++value; + } + + // Kokkos::deep_copy( diff, input ); // Throw with incompatible shape. + Kokkos::deep_copy( output, input ); + + value = 0; + + for ( size_t i3 = 0; i3 < N3; ++i3 ) + for ( size_t i2 = 0; i2 < N2; ++i2 ) + for ( size_t i1 = 0; i1 < N1; ++i1 ) + for ( size_t i0 = 0; i0 < N0; ++i0 ) + { + ++value; + ASSERT_EQ( value, ( (int) output( i0, i1, i2, i3 ) ) ); + } } -//---------------------------------------------------------------------------- - -TEST_F( threads , view_aggregate ) +TEST_F( threads, view_aggregate ) { TestViewAggregate< Kokkos::Threads >(); } -TEST_F( threads , template_meta_functions ) +TEST_F( threads, template_meta_functions ) { - TestTemplateMetaFunctions<int, Kokkos::Threads >(); + TestTemplateMetaFunctions< int, Kokkos::Threads >(); } -} // namespace test - +} // namespace Test diff --git a/lib/kokkos/doc/design_notes_space_instances.md b/lib/kokkos/doc/design_notes_space_instances.md index 487fa25bcb32875ed3ba90821aba006a13cd506e..0124dfbc873285255fa92ff171dc5873056495ab 100644 --- a/lib/kokkos/doc/design_notes_space_instances.md +++ b/lib/kokkos/doc/design_notes_space_instances.md @@ -1,35 +1,41 @@ # Design Notes for Execution and Memory Space Instances +## Objective -## Execution Spaces + * Enable Kokkos interoperability with coarse-grain tasking models + +## Requirements - * Work is *dispatched* to an execution space instance + * Backwards compatable with existing Kokkos API + * Support existing Host execution spaces (Serial, Threads, OpenMP, maybe Qthreads) + * Support DARMA threading model (may require a new Host execution space) + * Support Uintah threading model, i.e. indepentant worker threadpools working of of shared task queues + + +## Execution Space + * Parallel work is *dispatched* on an execution space instance + + * Execution space instances are conceptually disjoint/independant from each other + - -## Host Associated Execution Space Instances - -Vocabulary and examples assuming C++11 Threads Support Library +## Host Execution Space Instances * A host-side *control* thread dispatches work to an instance - * `this_thread` is the control thread - * `main` is the initial control thread - * An execution space instance is a pool of threads + * A host execution space instance is an organized thread pool - * All instances are disjoint thread pools + * All instances are disjoint, i.e. hardware resources are not shared between instances * Exactly one control thread is associated with an instance and only that control thread may dispatch work to to that instance - * A control thread may be a member of an instance, - if so then it is also the control thread associated - with that instance + * The control thread is a member of the instance - * The pool of threads associated with an instances is not mutatable + * The pool of threads associated with an instances is not mutatable during that instance existance * The pool of threads associated with an instance may be masked @@ -37,130 +43,89 @@ Vocabulary and examples assuming C++11 Threads Support Library - Example: only one hyperthread per core of the instance - - When a mask is applied to an instance that mask - remains until cleared or another mask is applied - - - Masking is portable by defining it as using a fraction - of the available resources (threads) - - * Instances are shared (referenced counted) objects, - just like `Kokkos::View` - -``` -struct StdThread { - void mask( float fraction ); - void unmask() { mask( 1.0 ); } -}; -``` - - - -### Requesting an Execution Space Instance - - * `Space::request(` *who* `,` *what* `,` *control-opt* `)` - - * *who* is an identifier for subsquent queries regarding - who requested each instance - - * *what* is the number of threads and how they should be placed - - - Placement within locality-topology hierarchy; e.g., HWLOC - - - Compact within a level of hierarchy, or striped across that level; - e.g., socket or NUMA region - - - Granularity of request is core - - * *control-opt* optionally specifies whether the instance - has a new control thread - - - *control-opt* includes a control function / closure - - - The new control thread is a member of the instance - - - The control function is called by the new control thread - and is passed a `const` instance - - - The instance is **not** returned to the creating control thread - - * `std::thread` that is not a member of an instance is - *hard blocked* on a `std::mutex` - - - One global mutex or one mutex per thread? - - * `std::thread` that is a member of an instance is - *spinning* waiting for work, or are working - -``` -struct StdThread { - - struct Resource ; - - static StdThread request(); // default + - A mask can be applied during the policy creation of a parallel algorithm + + - Masking is portable by defining it as ceiling of fraction between [0.0, 1.0] + of the available resources - static StdThread request( const std::string & , const Resource & ); - - // If the instance can be reserved then - // allocate a copy of ControlClosure and invoke - // ControlClosure::operator()( const StdThread intance ) const - template< class ControlClosure > - static bool request( const std::string & , const Resource & - , const ControlClosure & ); -}; ``` - -### Relinquishing an Execution Space Instance - - * De-referencing the last reference-counted instance - relinquishes the pool of threads - - * If a control thread was created for the instance then - it is relinquished when that control thread returns - from the control function - - - Requires the reference count to be zero, an error if not - - * No *forced* relinquish - - - -## CUDA Associated Execution Space Instances - - * Only a signle CUDA architecture - - * An instance is a device + stream - - * A stream is exclusive to an instance - - * Only a host-side control thread can dispatch work to an instance - - * Finite number of streams per device - - * ISSUE: How to use CUDA `const` memory with multiple streams? - - * Masking can be mapped to restricting the number of CUDA blocks - to the fraction of available resources; e.g., maximum resident blocks - - -### Requesting an Execution Space Instance - - * `Space::request(` *who* `,` *what* `)` - - * *who* is an identifier for subsquent queries regarding - who requested each instance - - * *what* is which device, the stream is a requested/relinquished resource - +class ExecutionSpace { +public: + using execution_space = ExecutionSpace; + using memory_space = ...; + using device_type = Kokkos::Device<execution_space, memory_space>; + using array_layout = ...; + using size_type = ...; + using scratch_memory_space = ...; + + + class Instance + { + int thread_pool_size( int depth = 0 ); + ... + }; + + class InstanceRequest + { + public: + using Control = std::function< void( Instance * )>; + + InstanceRequest( Control control + , unsigned thread_count + , unsigned use_numa_count = 0 + , unsigned use_cores_per_numa = 0 + ); + + }; + + static bool in_parallel(); + + static bool sleep(); + static bool wake(); + + static void fence(); + + static void print_configuration( std::ostream &, const bool detailed = false ); + + static void initialize( unsigned thread_count = 0 + , unsigned use_numa_count = 0 + , unsigned use_cores_per_numa = 0 + ); + + // Partition the current instance into the requested instances + // and run the given functions on the cooresponding instances + // will block until all the partitioned instances complete and + // the original instance will be restored + // + // Requires that the space has already been initialized + // Requires that the request can be statisfied by the current instance + // i.e. the sum of number of requested threads must be less than the + // max_hardware_threads + // + // Each control functor will accept a handle to its new default instance + // Each instance must be independant of all other instances + // i.e. no assumption on scheduling between instances + // The user is responible for checking the return code for errors + static int run_instances( std::vector< InstanceRequest> const& requests ); + + static void finalize(); + + static int is_initialized(); + + static int concurrency(); + + static int thread_pool_size( int depth = 0 ); + + static int thread_pool_rank(); + + static int max_hardware_threads(); + + static int hardware_thread_id(); + + }; ``` -struct Cuda { + - struct Resource ; - - static Cuda request(); - - static Cuda request( const std::string & , const Resource & ); -}; -``` diff --git a/lib/kokkos/example/md_skeleton/types.h b/lib/kokkos/example/md_skeleton/types.h index 7f92b7cd0f8089d93c1e18e5dff3ad1508316867..c9689188a1c289c67e08dbe07707a51a0f8bff28 100644 --- a/lib/kokkos/example/md_skeleton/types.h +++ b/lib/kokkos/example/md_skeleton/types.h @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -50,7 +50,7 @@ typedef Kokkos::DefaultExecutionSpace execution_space ; -#if ! defined( KOKKOS_HAVE_CUDA ) +#if ! defined( KOKKOS_ENABLE_CUDA ) struct double2 { double x, y; KOKKOS_INLINE_FUNCTION diff --git a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp index 326d064105ecf2da945cf346cbaa9abbe27eab20..249d44ab559682ce2622842048b47af4613ec16f 100644 --- a/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp +++ b/lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -100,7 +100,7 @@ int main (int argc, char* argv[]) { // order. Parallel for loops may execute in any order. // We also need to protect the usage of a lambda against compiling // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). -#if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) +#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) Kokkos::parallel_for (15, KOKKOS_LAMBDA (const int i) { // printf works in a CUDA parallel kernel; std::ostream does not. printf ("Hello from i = %i\n", i); diff --git a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp index 70eea4324022b8bcfd7e1266f5c47ef08380d8c9..f7f467ad2d1dbd866ad185776cea5d45a9abce3c 100644 --- a/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp +++ b/lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -69,7 +69,7 @@ int main (int argc, char* argv[]) { // It also handles any other syntax needed for CUDA. // We also need to protect the usage of a lambda against compiling // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). - #if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) Kokkos::parallel_reduce (n, KOKKOS_LAMBDA (const int i, int& lsum) { lsum += i*i; }, sum); @@ -85,7 +85,7 @@ int main (int argc, char* argv[]) { printf ("Sum of squares of integers from 0 to %i, " "computed sequentially, is %i\n", n - 1, seqSum); Kokkos::finalize (); -#if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) +#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) return (sum == seqSum) ? 0 : -1; #else return 0; diff --git a/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp b/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp index dd0641be54087a76d45505d0e6777a4ebe1fd9d1..3450ad1bb468095a9d821a1c8e0560b256607166 100644 --- a/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp +++ b/lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -99,7 +99,7 @@ int main (int argc, char* argv[]) { // ask for one. // We also need to protect the usage of a lambda against compiling // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). - #if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) Kokkos::parallel_for (10, KOKKOS_LAMBDA (const int i) { // Acesss the View just like a Fortran array. The layout depends // on the View's memory space, so don't rely on the View's diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp index 216db7f125d16ed7150f2f2049506a723e9dcc79..9ea5e8b70711942cb61ef29f38144b52f81137e0 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -79,7 +79,7 @@ int main (int narg, char* args[]) { int sum = 0; // We also need to protect the usage of a lambda against compiling // with a backend which doesn't support it (i.e. Cuda 6.5/7.0). - #if (KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) + #if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) parallel_reduce (policy, KOKKOS_LAMBDA (const team_member& thread, int& lsum) { lsum += 1; // TeamPolicy<>::member_type provides functions to query the diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash index e7bd9da36b4c1eaf60125e6c38f5e3bf7d33bf5d..e671293ff11ad8120766ed014128b25fb39089bc 100755 --- a/lib/kokkos/generate_makefile.bash +++ b/lib/kokkos/generate_makefile.bash @@ -5,153 +5,166 @@ MAKE_J_OPTION="32" while [[ $# > 0 ]] do -key="$1" + key="$1" -case $key in + case $key in --kokkos-path*) - KOKKOS_PATH="${key#*=}" - ;; + KOKKOS_PATH="${key#*=}" + ;; + --qthreads-path*) + QTHREADS_PATH="${key#*=}" + ;; --prefix*) - PREFIX="${key#*=}" - ;; + PREFIX="${key#*=}" + ;; --with-cuda) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" - CUDA_PATH_NVCC=`which nvcc` - CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc} - ;; + KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" + CUDA_PATH_NVCC=`which nvcc` + CUDA_PATH=${CUDA_PATH_NVCC%/bin/nvcc} + ;; # Catch this before '--with-cuda*' --with-cuda-options*) - KOKKOS_CUDA_OPT="${key#*=}" - ;; + KOKKOS_CUDA_OPT="${key#*=}" + ;; --with-cuda*) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" - CUDA_PATH="${key#*=}" - ;; + KOKKOS_DEVICES="${KOKKOS_DEVICES},Cuda" + CUDA_PATH="${key#*=}" + ;; --with-openmp) - KOKKOS_DEVICES="${KOKKOS_DEVICES},OpenMP" - ;; + KOKKOS_DEVICES="${KOKKOS_DEVICES},OpenMP" + ;; --with-pthread) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Pthread" - ;; + KOKKOS_DEVICES="${KOKKOS_DEVICES},Pthread" + ;; --with-serial) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Serial" - ;; - --with-qthread*) - KOKKOS_DEVICES="${KOKKOS_DEVICES},Qthread" - QTHREAD_PATH="${key#*=}" - ;; + KOKKOS_DEVICES="${KOKKOS_DEVICES},Serial" + ;; + --with-qthreads*) + KOKKOS_DEVICES="${KOKKOS_DEVICES},Qthreads" + if [ -z "$QTHREADS_PATH" ]; then + QTHREADS_PATH="${key#*=}" + fi + ;; --with-devices*) - DEVICES="${key#*=}" - KOKKOS_DEVICES="${KOKKOS_DEVICES},${DEVICES}" - ;; + DEVICES="${key#*=}" + KOKKOS_DEVICES="${KOKKOS_DEVICES},${DEVICES}" + ;; --with-gtest*) - GTEST_PATH="${key#*=}" - ;; + GTEST_PATH="${key#*=}" + ;; --with-hwloc*) - HWLOC_PATH="${key#*=}" - ;; + HWLOC_PATH="${key#*=}" + ;; --arch*) - KOKKOS_ARCH="${key#*=}" - ;; + KOKKOS_ARCH="${key#*=}" + ;; --cxxflags*) - CXXFLAGS="${key#*=}" - ;; + CXXFLAGS="${key#*=}" + ;; --ldflags*) - LDFLAGS="${key#*=}" - ;; + LDFLAGS="${key#*=}" + ;; --debug|-dbg) - KOKKOS_DEBUG=yes - ;; + KOKKOS_DEBUG=yes + ;; --make-j*) - MAKE_J_OPTION="${key#*=}" - ;; + MAKE_J_OPTION="${key#*=}" + ;; --compiler*) - COMPILER="${key#*=}" - CNUM=`which ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l` - if [ ${CNUM} -gt 0 ]; then - echo "Invalid compiler by --compiler command: '${COMPILER}'" - exit - fi - if [[ ! -n ${COMPILER} ]]; then - echo "Empty compiler specified by --compiler command." - exit - fi - CNUM=`which ${COMPILER} | grep ${COMPILER} | wc -l` - if [ ${CNUM} -eq 0 ]; then - echo "Invalid compiler by --compiler command: '${COMPILER}'" - exit - fi - ;; - --with-options*) - KOKKOS_OPT="${key#*=}" - ;; + COMPILER="${key#*=}" + CNUM=`which ${COMPILER} 2>&1 >/dev/null | grep "no ${COMPILER}" | wc -l` + if [ ${CNUM} -gt 0 ]; then + echo "Invalid compiler by --compiler command: '${COMPILER}'" + exit + fi + if [[ ! -n ${COMPILER} ]]; then + echo "Empty compiler specified by --compiler command." + exit + fi + CNUM=`which ${COMPILER} | grep ${COMPILER} | wc -l` + if [ ${CNUM} -eq 0 ]; then + echo "Invalid compiler by --compiler command: '${COMPILER}'" + exit + fi + ;; + --with-options*) + KOKKOS_OPT="${key#*=}" + ;; --help) - echo "Kokkos configure options:" - echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" - echo "--prefix=/Install/Path: Path to where the Kokkos library should be installed" - echo "" - echo "--with-cuda[=/Path/To/Cuda]: enable Cuda and set path to Cuda Toolkit" - echo "--with-openmp: enable OpenMP backend" - echo "--with-pthread: enable Pthreads backend" - echo "--with-serial: enable Serial backend" - echo "--with-qthread=/Path/To/Qthread: enable Qthread backend" - echo "--with-devices: explicitly add a set of backends" - echo "" - echo "--arch=[OPTIONS]: set target architectures. Options are:" - echo " ARMv80 = ARMv8.0 Compatible CPU" - echo " ARMv81 = ARMv8.1 Compatible CPU" - echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" - echo " SNB = Intel Sandy/Ivy Bridge CPUs" - echo " HSW = Intel Haswell CPUs" - echo " BDW = Intel Broadwell Xeon E-class CPUs" - echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" - echo " KNC = Intel Knights Corner Xeon Phi" - echo " KNL = Intel Knights Landing Xeon Phi" - echo " Kepler30 = NVIDIA Kepler generation CC 3.0" - echo " Kepler35 = NVIDIA Kepler generation CC 3.5" - echo " Kepler37 = NVIDIA Kepler generation CC 3.7" - echo " Pascal60 = NVIDIA Pascal generation CC 6.0" - echo " Pascal61 = NVIDIA Pascal generation CC 6.1" - echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" - echo " Power8 = IBM POWER8 CPUs" - echo " Power9 = IBM POWER9 CPUs" - echo "" - echo "--compiler=/Path/To/Compiler set the compiler" - echo "--debug,-dbg: enable Debugging" - echo "--cxxflags=[FLAGS] overwrite CXXFLAGS for library build and test build" - echo " This will still set certain required flags via" - echo " KOKKOS_CXXFLAGS (such as -fopenmp, --std=c++11, etc.)" - echo "--ldflags=[FLAGS] overwrite LDFLAGS for library build and test build" - echo " This will still set certain required flags via" - echo " KOKKOS_LDFLAGS (such as -fopenmp, -lpthread, etc.)" - echo "--with-gtest=/Path/To/Gtest: set path to gtest (used in unit and performance tests" - echo "--with-hwloc=/Path/To/Hwloc: set path to hwloc" - echo "--with-options=[OPTIONS]: additional options to Kokkos:" - echo " aggressive_vectorization = add ivdep on loops" - echo "--with-cuda-options=[OPT]: additional options to CUDA:" - echo " force_uvm, use_ldg, enable_lambda, rdc" - echo "--make-j=[NUM]: set -j flag used during build." - exit 0 - ;; + echo "Kokkos configure options:" + echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory." + echo "--qthreads-path=/Path/To/Qthreads: Path to Qthreads install directory." + echo " Overrides path given by --with-qthreads." + echo "--prefix=/Install/Path: Path to install the Kokkos library." + echo "" + echo "--with-cuda[=/Path/To/Cuda]: Enable Cuda and set path to Cuda Toolkit." + echo "--with-openmp: Enable OpenMP backend." + echo "--with-pthread: Enable Pthreads backend." + echo "--with-serial: Enable Serial backend." + echo "--with-qthreads[=/Path/To/Qthreads]: Enable Qthreads backend." + echo "--with-devices: Explicitly add a set of backends." + echo "" + echo "--arch=[OPT]: Set target architectures. Options are:" + echo " ARMv80 = ARMv8.0 Compatible CPU" + echo " ARMv81 = ARMv8.1 Compatible CPU" + echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" + echo " SNB = Intel Sandy/Ivy Bridge CPUs" + echo " HSW = Intel Haswell CPUs" + echo " BDW = Intel Broadwell Xeon E-class CPUs" + echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" + echo " KNC = Intel Knights Corner Xeon Phi" + echo " KNL = Intel Knights Landing Xeon Phi" + echo " Kepler30 = NVIDIA Kepler generation CC 3.0" + echo " Kepler35 = NVIDIA Kepler generation CC 3.5" + echo " Kepler37 = NVIDIA Kepler generation CC 3.7" + echo " Pascal60 = NVIDIA Pascal generation CC 6.0" + echo " Pascal61 = NVIDIA Pascal generation CC 6.1" + echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" + echo " Power8 = IBM POWER8 CPUs" + echo " Power9 = IBM POWER9 CPUs" + echo "" + echo "--compiler=/Path/To/Compiler Set the compiler." + echo "--debug,-dbg: Enable Debugging." + echo "--cxxflags=[FLAGS] Overwrite CXXFLAGS for library build and test" + echo " build. This will still set certain required" + echo " flags via KOKKOS_CXXFLAGS (such as -fopenmp," + echo " --std=c++11, etc.)." + echo "--ldflags=[FLAGS] Overwrite LDFLAGS for library build and test" + echo " build. This will still set certain required" + echo " flags via KOKKOS_LDFLAGS (such as -fopenmp," + echo " -lpthread, etc.)." + echo "--with-gtest=/Path/To/Gtest: Set path to gtest. (Used in unit and performance" + echo " tests.)" + echo "--with-hwloc=/Path/To/Hwloc: Set path to hwloc." + echo "--with-options=[OPT]: Additional options to Kokkos:" + echo " aggressive_vectorization = add ivdep on loops" + echo "--with-cuda-options=[OPT]: Additional options to CUDA:" + echo " force_uvm, use_ldg, enable_lambda, rdc" + echo "--make-j=[NUM]: Set -j flag used during build." + exit 0 + ;; *) - echo "warning: ignoring unknown option $key" - ;; -esac -shift + echo "warning: ignoring unknown option $key" + ;; + esac + + shift done -# If KOKKOS_PATH undefined, assume parent dir of this -# script is the KOKKOS_PATH +# Remove leading ',' from KOKKOS_DEVICES. +KOKKOS_DEVICES=$(echo $KOKKOS_DEVICES | sed 's/^,//') + +# If KOKKOS_PATH undefined, assume parent dir of this script is the KOKKOS_PATH. if [ -z "$KOKKOS_PATH" ]; then - KOKKOS_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) + KOKKOS_PATH=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) else - # Ensure KOKKOS_PATH is abs path - KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) + # Ensure KOKKOS_PATH is abs path + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) fi if [ "${KOKKOS_PATH}" = "${PWD}" ] || [ "${KOKKOS_PATH}" = "${PWD}/" ]; then -echo "Running generate_makefile.sh in the Kokkos root directory is not allowed" -exit + echo "Running generate_makefile.sh in the Kokkos root directory is not allowed" + exit fi KOKKOS_SRC_PATH=${KOKKOS_PATH} @@ -160,52 +173,63 @@ KOKKOS_SETTINGS="KOKKOS_SRC_PATH=${KOKKOS_SRC_PATH}" #KOKKOS_SETTINGS="KOKKOS_PATH=${KOKKOS_PATH}" if [ ${#COMPILER} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXX=${COMPILER}" fi + if [ ${#KOKKOS_DEVICES} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEVICES=${KOKKOS_DEVICES}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEVICES=${KOKKOS_DEVICES}" fi + if [ ${#KOKKOS_ARCH} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_ARCH=${KOKKOS_ARCH}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_ARCH=${KOKKOS_ARCH}" fi + if [ ${#KOKKOS_DEBUG} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEBUG=${KOKKOS_DEBUG}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_DEBUG=${KOKKOS_DEBUG}" fi + if [ ${#CUDA_PATH} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CUDA_PATH=${CUDA_PATH}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CUDA_PATH=${CUDA_PATH}" fi + if [ ${#CXXFLAGS} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXXFLAGS=\"${CXXFLAGS}\"" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} CXXFLAGS=\"${CXXFLAGS}\"" fi + if [ ${#LDFLAGS} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} LDFLAGS=\"${LDFLAGS}\"" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} LDFLAGS=\"${LDFLAGS}\"" fi + if [ ${#GTEST_PATH} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" else -GTEST_PATH=${KOKKOS_PATH}/tpls/gtest -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" + GTEST_PATH=${KOKKOS_PATH}/tpls/gtest + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} GTEST_PATH=${GTEST_PATH}" fi + if [ ${#HWLOC_PATH} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} HWLOC_PATH=${HWLOC_PATH} KOKKOS_USE_TPLS=hwloc" fi -if [ ${#QTHREAD_PATH} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} QTHREAD_PATH=${QTHREAD_PATH}" + +if [ ${#QTHREADS_PATH} -gt 0 ]; then + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} QTHREADS_PATH=${QTHREADS_PATH}" fi + if [ ${#KOKKOS_OPT} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_OPTIONS=${KOKKOS_OPT}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_OPTIONS=${KOKKOS_OPT}" fi + if [ ${#KOKKOS_CUDA_OPT} -gt 0 ]; then -KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}" + KOKKOS_SETTINGS="${KOKKOS_SETTINGS} KOKKOS_CUDA_OPTIONS=${KOKKOS_CUDA_OPT}" fi KOKKOS_SETTINGS_NO_KOKKOS_PATH="${KOKKOS_SETTINGS}" KOKKOS_TEST_INSTALL_PATH="${PWD}/install" if [ ${#PREFIX} -gt 0 ]; then -KOKKOS_INSTALL_PATH="${PREFIX}" + KOKKOS_INSTALL_PATH="${PREFIX}" else -KOKKOS_INSTALL_PATH=${KOKKOS_TEST_INSTALL_PATH} + KOKKOS_INSTALL_PATH=${KOKKOS_TEST_INSTALL_PATH} fi @@ -229,7 +253,7 @@ mkdir example/fenl mkdir example/tutorial if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then -mkdir example/ichol + mkdir example/ichol fi KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}"